Compare commits

..

1 Commits

Author SHA1 Message Date
YeonGyu-Kim
fd7faff792 fix: expand error classifier patterns and auto-enable model_fallback (#2393) 2026-03-12 01:18:15 +09:00
36 changed files with 375 additions and 461 deletions

View File

@@ -181,7 +181,7 @@ When Sisyphus delegates to a subagent, it doesn't pick a model. It picks a **cat
| `quick` | Single-file changes, typos |
| `ultrabrain` | Hard logic, architecture decisions |
Agent says what kind of work. Harness picks the right model. `ultrabrain` now routes to GPT-5.4 xhigh by default. You touch nothing.
Agent says what kind of work. Harness picks the right model. You touch nothing.
### Claude Code Compatibility

View File

@@ -147,11 +147,11 @@ When agents delegate work, they don't pick a model name — they pick a **catego
| Category | When Used | Fallback Chain |
| -------------------- | -------------------------- | -------------------------------------------- |
| `visual-engineering` | Frontend, UI, CSS, design | Gemini 3.1 Pro → GLM 5 → Claude Opus |
| `ultrabrain` | Maximum reasoning needed | GPT-5.4 → Gemini 3.1 Pro → Claude Opus |
| `ultrabrain` | Maximum reasoning needed | GPT-5.3 Codex → Gemini 3.1 Pro → Claude Opus |
| `deep` | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3.1 Pro |
| `artistry` | Creative, novel approaches | Gemini 3.1 Pro → Claude Opus → GPT-5.4 |
| `quick` | Simple, fast tasks | Claude Haiku → Gemini Flash → GPT-5-Nano |
| `unspecified-high` | General complex work | Claude Opus → GPT-5.4 (high) → GLM 5 → K2P5 |
| `unspecified-high` | General complex work | GPT-5.4 → Claude Opus → GLM 5 → K2P5 |
| `unspecified-low` | General standard work | Claude Sonnet → GPT-5.3 Codex → Gemini Flash |
| `writing` | Text, docs, prose | Gemini Flash → Claude Sonnet |
@@ -190,7 +190,7 @@ See the [Orchestration System Guide](./orchestration.md) for how agents dispatch
"categories": {
"quick": { "model": "opencode/gpt-5-nano" },
"unspecified-low": { "model": "anthropic/claude-sonnet-4-6" },
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
"unspecified-high": { "model": "openai/gpt-5.4-high" },
"visual-engineering": {
"model": "google/gemini-3.1-pro",
"variant": "high",

View File

@@ -296,12 +296,12 @@ task({ category: "quick", prompt: "..." }); // "Just get it done fast"
| Category | Model | When to Use |
| -------------------- | ---------------------- | ----------------------------------------------------------- |
| `visual-engineering` | Gemini 3.1 Pro | Frontend, UI/UX, design, styling, animation |
| `ultrabrain` | GPT-5.4 (xhigh) | Deep logical reasoning, complex architecture decisions |
| `ultrabrain` | GPT-5.3 Codex (xhigh) | Deep logical reasoning, complex architecture decisions |
| `artistry` | Gemini 3.1 Pro (high) | Highly creative or artistic tasks, novel ideas |
| `quick` | Claude Haiku 4.5 | Trivial tasks - single file changes, typo fixes |
| `deep` | GPT-5.3 Codex (medium) | Goal-oriented autonomous problem-solving, thorough research |
| `unspecified-low` | Claude Sonnet 4.6 | Tasks that don't fit other categories, low effort |
| `unspecified-high` | Claude Opus 4.6 (max) | Tasks that don't fit other categories, high effort |
| `unspecified-high` | GPT-5.4 (high) | Tasks that don't fit other categories, high effort |
| `writing` | Gemini 3 Flash | Documentation, prose, technical writing |
### Skills: Domain-Specific Instructions

View File

@@ -101,7 +101,7 @@ Use Hephaestus when you need deep architectural reasoning, complex debugging acr
- **Multi-model orchestration.** Pure Codex is single-model. OmO routes different tasks to different models automatically. GPT for deep reasoning. Gemini for frontend. Haiku for speed. The right brain for the right job.
- **Background agents.** Fire 5+ agents in parallel. Something Codex simply cannot do. While one agent writes code, another researches patterns, another checks documentation. Like a real dev team.
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.4. `quick` gets Haiku. No manual juggling.
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.3 Codex. `quick` gets Haiku. No manual juggling.
- **Accumulated wisdom.** Subagents learn from previous results. Conventions discovered in task 1 are passed to task 5. Mistakes made early aren't repeated. The system gets smarter as it works.
### Prometheus: The Strategic Planner
@@ -193,13 +193,13 @@ You can override specific agents or categories in your config:
},
// General high-effort work
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
"unspecified-high": { "model": "openai/gpt-5.4", "variant": "high" },
// Quick tasks: use the cheapest models
"quick": { "model": "anthropic/claude-haiku-4-5" },
// Deep reasoning: GPT-5.4
"ultrabrain": { "model": "openai/gpt-5.4", "variant": "xhigh" },
// Deep reasoning: GPT-5.3-codex
"ultrabrain": { "model": "openai/gpt-5.3-codex", "variant": "xhigh" },
},
}
```

View File

@@ -100,7 +100,7 @@ Here's a practical starting configuration:
"unspecified-low": { "model": "anthropic/claude-sonnet-4-6" },
// unspecified-high — complex work
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
"unspecified-high": { "model": "openai/gpt-5.4-high" },
// writing — docs/prose
"writing": { "model": "google/gemini-3-flash" },
@@ -225,12 +225,12 @@ Domain-specific model delegation used by the `task()` tool. When Sisyphus delega
| Category | Default Model | Description |
| -------------------- | ------------------------------- | ---------------------------------------------- |
| `visual-engineering` | `google/gemini-3.1-pro` (high) | Frontend, UI/UX, design, animation |
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture |
| `ultrabrain` | `openai/gpt-5.3-codex` (xhigh) | Deep logical reasoning, complex architecture |
| `deep` | `openai/gpt-5.3-codex` (medium) | Autonomous problem-solving, thorough research |
| `artistry` | `google/gemini-3.1-pro` (high) | Creative/unconventional approaches |
| `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks, typo fixes, single-file changes |
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | General tasks, low effort |
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | General tasks, high effort |
| `unspecified-high` | `openai/gpt-5.4` (high) | General tasks, high effort |
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
> **Note**: Built-in defaults only apply if the category is present in your config. Otherwise the system default model is used.
@@ -283,12 +283,12 @@ Disable categories: `{ "disabled_categories": ["ultrabrain"] }`
| Category | Default Model | Provider Priority |
| ---------------------- | ------------------- | -------------------------------------------------------------- |
| **visual-engineering** | `gemini-3.1-pro` | `gemini-3.1-pro``glm-5``claude-opus-4-6` |
| **ultrabrain** | `gpt-5.4` | `gpt-5.4``gemini-3.1-pro``claude-opus-4-6` |
| **ultrabrain** | `gpt-5.3-codex` | `gpt-5.3-codex``gemini-3.1-pro``claude-opus-4-6` |
| **deep** | `gpt-5.3-codex` | `gpt-5.3-codex``claude-opus-4-6``gemini-3.1-pro` |
| **artistry** | `gemini-3.1-pro` | `gemini-3.1-pro``claude-opus-4-6``gpt-5.4` |
| **quick** | `claude-haiku-4-5` | `claude-haiku-4-5``gemini-3-flash``gpt-5-nano` |
| **unspecified-low** | `claude-sonnet-4-6` | `claude-sonnet-4-6``gpt-5.3-codex``gemini-3-flash` |
| **unspecified-high** | `claude-opus-4-6` | `claude-opus-4-6``gpt-5.4 (high)``glm-5``k2p5``kimi-k2.5` |
| **unspecified-high** | `gpt-5.4` | `gpt-5.4``claude-opus-4-6``glm-5``k2p5``kimi-k2.5` |
| **writing** | `gemini-3-flash` | `gemini-3-flash``claude-sonnet-4-6` |
Run `bunx oh-my-opencode doctor --verbose` to see effective model resolution for your config.

View File

@@ -108,12 +108,12 @@ By combining these two concepts, you can generate optimal agents through `task`.
| Category | Default Model | Use Cases |
| -------------------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------------- |
| `visual-engineering` | `google/gemini-3.1-pro` | Frontend, UI/UX, design, styling, animation |
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
| `ultrabrain` | `openai/gpt-5.3-codex` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
| `deep` | `openai/gpt-5.3-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. |
| `artistry` | `google/gemini-3.1-pro` (high) | Highly creative/artistic tasks, novel ideas |
| `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | Tasks that don't fit other categories, low effort required |
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | Tasks that don't fit other categories, high effort required |
| `unspecified-high` | `openai/gpt-5.4` (high) | Tasks that don't fit other categories, high effort required |
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
### Usage
@@ -332,7 +332,7 @@ You can create powerful specialized agents by combining Categories and Skills.
- **Category**: `ultrabrain`
- **load_skills**: `[]` (pure reasoning)
- **Effect**: Leverages GPT-5.4 xhigh reasoning for in-depth system architecture analysis.
- **Effect**: Leverages GPT-5.3 Codex's logical reasoning for in-depth system architecture analysis.
#### The Maintainer (Quick Fixes)

View File

@@ -115,7 +115,6 @@ export async function createBuiltinAgents(
browserProvider,
uiSelectedModel,
availableModels,
isFirstRunNoCache,
disabledSkills,
disableOmoEnv,
})

View File

@@ -7,7 +7,7 @@ import { AGENT_MODEL_REQUIREMENTS, isModelAvailable } from "../../shared"
import { buildAgent, isFactory } from "../agent-builder"
import { applyOverrides } from "./agent-overrides"
import { applyEnvironmentContext } from "./environment-context"
import { applyModelResolution, getFirstFallbackModel } from "./model-resolution"
import { applyModelResolution } from "./model-resolution"
export function collectPendingBuiltinAgents(input: {
agentSources: Record<BuiltinAgentName, import("../agent-builder").AgentSource>
@@ -21,7 +21,6 @@ export function collectPendingBuiltinAgents(input: {
browserProvider?: BrowserAutomationProvider
uiSelectedModel?: string
availableModels: Set<string>
isFirstRunNoCache: boolean
disabledSkills?: Set<string>
useTaskSystem?: boolean
disableOmoEnv?: boolean
@@ -38,7 +37,6 @@ export function collectPendingBuiltinAgents(input: {
browserProvider,
uiSelectedModel,
availableModels,
isFirstRunNoCache,
disabledSkills,
disableOmoEnv = false,
} = input
@@ -68,16 +66,13 @@ export function collectPendingBuiltinAgents(input: {
const isPrimaryAgent = isFactory(source) && source.mode === "primary"
let resolution = applyModelResolution({
const resolution = applyModelResolution({
uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
userModel: override?.model,
requirement,
availableModels,
systemDefaultModel,
})
if (!resolution && isFirstRunNoCache && !override?.model) {
resolution = getFirstFallbackModel(requirement)
}
if (!resolution) continue
const { model, variant: resolvedVariant } = resolution

View File

@@ -483,23 +483,17 @@ describe("createBuiltinAgents without systemDefaultModel", () => {
cacheSpy.mockRestore?.()
})
test("oracle is created on first run when no cache and no systemDefaultModel", async () => {
// #given
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(new Set())
test("agents NOT created when no cache and no systemDefaultModel (first run without defaults)", async () => {
// #given
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
try {
// #when
const agents = await createBuiltinAgents([], {}, undefined, undefined)
// #when
const agents = await createBuiltinAgents([], {}, undefined, undefined)
// #then
expect(agents.oracle).toBeDefined()
expect(agents.oracle.model).toBe("openai/gpt-5.4")
} finally {
fetchSpy.mockRestore()
cacheSpy.mockRestore()
}
})
// #then
expect(agents.oracle).toBeUndefined()
cacheSpy.mockRestore?.()
})
test("sisyphus created via connected cache fallback when all providers available", async () => {
// #given

View File

@@ -8,7 +8,7 @@ export const BackgroundTaskConfigSchema = z.object({
maxDescendants: z.number().int().min(1).optional(),
/** Stale timeout in milliseconds - interrupt tasks with no activity for this duration (default: 180000 = 3 minutes, minimum: 60000 = 1 minute) */
staleTimeoutMs: z.number().min(60000).optional(),
/** Timeout for tasks that never received any progress update, falling back to startedAt (default: 1800000 = 30 minutes, minimum: 60000 = 1 minute) */
/** Timeout for tasks that never received any progress update, falling back to startedAt (default: 600000 = 10 minutes, minimum: 60000 = 1 minute) */
messageStalenessTimeoutMs: z.number().min(60000).optional(),
syncPollTimeoutMs: z.number().min(60000).optional(),
})

View File

@@ -4,7 +4,7 @@ import type { BackgroundTask, LaunchInput } from "./types"
export const TASK_TTL_MS = 30 * 60 * 1000
export const MIN_STABILITY_TIME_MS = 10 * 1000
export const DEFAULT_STALE_TIMEOUT_MS = 180_000
export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 600_000
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
export const MIN_IDLE_TIME_MS = 5000
export const POLLING_INTERVAL_MS = 3000

View File

@@ -1,60 +0,0 @@
declare const require: (name: string) => any
const { describe, expect, test, mock } = require("bun:test")
import { DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS } from "./constants"
import { checkAndInterruptStaleTasks } from "./task-poller"
import type { BackgroundTask } from "./types"
function createRunningTask(startedAt: Date): BackgroundTask {
return {
id: "task-1",
sessionID: "ses-1",
parentSessionID: "parent-ses-1",
parentMessageID: "msg-1",
description: "test",
prompt: "test",
agent: "explore",
status: "running",
startedAt,
progress: undefined,
}
}
describe("DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS", () => {
test("uses a 30 minute default", () => {
// #given
const expectedTimeout = 30 * 60 * 1000
// #when
const timeout = DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS
// #then
expect(timeout).toBe(expectedTimeout)
})
test("does not interrupt a never-updated task after 15 minutes when config is omitted", async () => {
// #given
const task = createRunningTask(new Date(Date.now() - 15 * 60 * 1000))
const client = {
session: {
abort: mock(() => Promise.resolve()),
},
}
const concurrencyManager = {
release: mock(() => {}),
}
const notifyParentSession = mock(() => Promise.resolve())
// #when
await checkAndInterruptStaleTasks({
tasks: [task],
client: client as never,
config: undefined,
concurrencyManager: concurrencyManager as never,
notifyParentSession,
})
// #then
expect(task.status).toBe("running")
})
})

View File

@@ -117,13 +117,13 @@ describe("checkAndInterruptStaleTasks", () => {
})
it("should use DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS when messageStalenessTimeoutMs is not configured", async () => {
//#given — task started 35 minutes ago, no config for messageStalenessTimeoutMs
//#given — task started 15 minutes ago, no config for messageStalenessTimeoutMs
const task = createRunningTask({
startedAt: new Date(Date.now() - 35 * 60 * 1000),
startedAt: new Date(Date.now() - 15 * 60 * 1000),
progress: undefined,
})
//#when — default is 30 minutes (1_800_000ms)
//#when — default is 10 minutes (600_000ms)
await checkAndInterruptStaleTasks({
tasks: [task],
client: mockClient as never,

View File

@@ -1025,6 +1025,7 @@ Original task: Build something`
const transcriptPath = join(TEST_DIR, "transcript.jsonl")
const toolResultEntry = JSON.stringify({
type: "tool_result",
timestamp: new Date().toISOString(),
tool_name: "write",
tool_input: {},
tool_output: { output: "Task complete! <promise>DONE</promise>" },

View File

@@ -11,15 +11,6 @@ import {
migrateConfigFile,
} from "./shared";
const PARTIAL_STRING_ARRAY_KEYS = new Set([
"disabled_mcps",
"disabled_agents",
"disabled_skills",
"disabled_hooks",
"disabled_commands",
"disabled_tools",
]);
export function parseConfigPartially(
rawConfig: Record<string, unknown>
): OhMyOpenCodeConfig | null {
@@ -32,14 +23,6 @@ export function parseConfigPartially(
const invalidSections: string[] = [];
for (const key of Object.keys(rawConfig)) {
if (PARTIAL_STRING_ARRAY_KEYS.has(key)) {
const sectionValue = rawConfig[key];
if (Array.isArray(sectionValue) && sectionValue.every((value) => typeof value === "string")) {
partialConfig[key] = sectionValue;
}
continue;
}
const sectionResult = OhMyOpenCodeConfigSchema.safeParse({ [key]: rawConfig[key] });
if (sectionResult.success) {
const parsed = sectionResult.data as Record<string, unknown>;

View File

@@ -78,22 +78,6 @@ export async function applyAgentConfig(params: {
const useTaskSystem = params.pluginConfig.experimental?.task_system ?? false;
const disableOmoEnv = params.pluginConfig.experimental?.disable_omo_env ?? false;
const includeClaudeAgents = params.pluginConfig.claude_code?.agents ?? true;
const userAgents = includeClaudeAgents ? loadUserAgents() : {};
const projectAgents = includeClaudeAgents ? loadProjectAgents(params.ctx.directory) : {};
const rawPluginAgents = params.pluginComponents.agents;
const customAgentSummaries = [
...Object.entries(userAgents),
...Object.entries(projectAgents),
...Object.entries(rawPluginAgents).filter(([, config]) => config !== undefined),
].map(([name, config]) => ({
name,
description: typeof (config as Record<string, unknown>)?.description === "string"
? (config as Record<string, unknown>).description as string
: "",
}));
const builtinAgents = await createBuiltinAgents(
migratedDisabledAgents,
params.pluginConfig.agents,
@@ -102,7 +86,7 @@ export async function applyAgentConfig(params: {
params.pluginConfig.categories,
params.pluginConfig.git_master,
allDiscoveredSkills,
customAgentSummaries,
params.ctx.client,
browserProvider,
currentModel,
disabledSkills,
@@ -110,6 +94,11 @@ export async function applyAgentConfig(params: {
disableOmoEnv,
);
const includeClaudeAgents = params.pluginConfig.claude_code?.agents ?? true;
const userAgents = includeClaudeAgents ? loadUserAgents() : {};
const projectAgents = includeClaudeAgents ? loadProjectAgents(params.ctx.directory) : {};
const rawPluginAgents = params.pluginComponents.agents;
const pluginAgents = Object.fromEntries(
Object.entries(rawPluginAgents).map(([key, value]) => [
key,

View File

@@ -174,7 +174,6 @@ export function createEventHandler(args: {
await Promise.resolve(hooks.todoContinuationEnforcer?.handler?.(input));
await Promise.resolve(hooks.unstableAgentBabysitter?.event?.(input));
await Promise.resolve(hooks.contextWindowMonitor?.event?.(input));
await Promise.resolve(hooks.preemptiveCompaction?.event?.(input));
await Promise.resolve(hooks.directoryAgentsInjector?.event?.(input));
await Promise.resolve(hooks.directoryReadmeInjector?.event?.(input));
await Promise.resolve(hooks.rulesInjector?.event?.(input));

View File

@@ -0,0 +1,131 @@
import type { OhMyOpenCodeConfig, HookName } from "../../config"
import { createModelFallbackHook } from "../../hooks"
import { normalizeSDKResponse } from "../../shared"
import { resolveModelFallbackEnabled } from "./model-fallback-config"
type SafeHook = <THook>(hookName: HookName, factory: () => THook) => THook | null
type ModelFallbackSessionContext = {
directory: string
client: {
session: {
get: (input: { path: { id: string } }) => Promise<unknown>
update: (input: {
path: { id: string }
body: { title: string }
query: { directory: string }
}) => Promise<unknown>
}
tui: {
showToast: (input: {
body: {
title: string
message: string
variant: "success" | "error" | "info" | "warning"
duration: number
}
}) => Promise<unknown>
}
}
}
function createFallbackTitleUpdater(
ctx: ModelFallbackSessionContext,
enabled: boolean,
):
| ((input: {
sessionID: string
providerID: string
modelID: string
variant?: string
}) => Promise<void>)
| undefined {
if (!enabled) {
return undefined
}
const fallbackTitleMaxEntries = 200
const fallbackTitleState = new Map<string, { baseTitle?: string; lastKey?: string }>()
return async (input) => {
const key = `${input.providerID}/${input.modelID}${input.variant ? `:${input.variant}` : ""}`
const existing = fallbackTitleState.get(input.sessionID) ?? {}
if (existing.lastKey === key) {
return
}
if (!existing.baseTitle) {
const sessionResp = await ctx.client.session.get({ path: { id: input.sessionID } }).catch(() => null)
const sessionInfo = sessionResp
? normalizeSDKResponse(sessionResp, null as { title?: string } | null, {
preferResponseOnMissingData: true,
})
: null
const rawTitle = sessionInfo?.title
if (typeof rawTitle === "string" && rawTitle.length > 0) {
existing.baseTitle = rawTitle.replace(/\s*\[fallback:[^\]]+\]$/i, "").trim()
} else {
existing.baseTitle = "Session"
}
}
const variantLabel = input.variant ? ` ${input.variant}` : ""
const newTitle = `${existing.baseTitle} [fallback: ${input.providerID}/${input.modelID}${variantLabel}]`
await ctx.client.session
.update({
path: { id: input.sessionID },
body: { title: newTitle },
query: { directory: ctx.directory },
})
.catch(() => {})
existing.lastKey = key
fallbackTitleState.set(input.sessionID, existing)
if (fallbackTitleState.size > fallbackTitleMaxEntries) {
const oldestKey = fallbackTitleState.keys().next().value
if (oldestKey) {
fallbackTitleState.delete(oldestKey)
}
}
}
}
export function createConfiguredModelFallbackHook(args: {
ctx: ModelFallbackSessionContext
pluginConfig: OhMyOpenCodeConfig
isHookEnabled: (hookName: HookName) => boolean
safeHook: SafeHook
}): ReturnType<typeof createModelFallbackHook> | null {
const { ctx, pluginConfig, isHookEnabled, safeHook } = args
const isModelFallbackEnabled = resolveModelFallbackEnabled(pluginConfig)
if (!isModelFallbackEnabled || !isHookEnabled("model-fallback")) {
return null
}
const onApplied = createFallbackTitleUpdater(
ctx,
pluginConfig.experimental?.model_fallback_title ?? false,
)
return safeHook("model-fallback", () =>
createModelFallbackHook({
toast: async ({ title, message, variant, duration }) => {
await ctx.client.tui
.showToast({
body: {
title,
message,
variant: variant ?? "warning",
duration: duration ?? 5000,
},
})
.catch(() => {})
},
onApplied,
}),
)
}

View File

@@ -1,13 +1,10 @@
import type { OhMyOpenCodeConfig, HookName } from "../../config"
import type { ModelCacheState } from "../../plugin-state"
import type { PluginContext } from "../types"
import {
createContextWindowMonitorHook,
createSessionRecoveryHook,
createSessionNotification,
createThinkModeHook,
createModelFallbackHook,
createAnthropicContextWindowLimitRecoveryHook,
createAutoUpdateCheckerHook,
createAgentUsageReminderHook,
@@ -31,10 +28,10 @@ import {
detectExternalNotificationPlugin,
getNotificationConflictWarning,
log,
normalizeSDKResponse,
} from "../../shared"
import { safeCreateHook } from "../../shared/safe-create-hook"
import { sessionExists } from "../../tools"
import { createConfiguredModelFallbackHook } from "./create-model-fallback-session-hook"
export type SessionHooks = {
contextWindowMonitor: ReturnType<typeof createContextWindowMonitorHook> | null
@@ -42,7 +39,7 @@ export type SessionHooks = {
sessionRecovery: ReturnType<typeof createSessionRecoveryHook> | null
sessionNotification: ReturnType<typeof createSessionNotification> | null
thinkMode: ReturnType<typeof createThinkModeHook> | null
modelFallback: ReturnType<typeof createModelFallbackHook> | null
modelFallback: ReturnType<typeof createConfiguredModelFallbackHook>
anthropicContextWindowLimitRecovery: ReturnType<typeof createAnthropicContextWindowLimitRecoveryHook> | null
autoUpdateChecker: ReturnType<typeof createAutoUpdateCheckerHook> | null
agentUsageReminder: ReturnType<typeof createAgentUsageReminderHook> | null
@@ -63,7 +60,7 @@ export type SessionHooks = {
}
export function createSessionHooks(args: {
ctx: PluginContext
ctx: Parameters<typeof createContextWindowMonitorHook>[0]
pluginConfig: OhMyOpenCodeConfig
modelCacheState: ModelCacheState
isHookEnabled: (hookName: HookName) => boolean
@@ -105,73 +102,12 @@ export function createSessionHooks(args: {
? safeHook("think-mode", () => createThinkModeHook())
: null
const enableFallbackTitle = pluginConfig.experimental?.model_fallback_title ?? false
const fallbackTitleMaxEntries = 200
const fallbackTitleState = new Map<string, { baseTitle?: string; lastKey?: string }>()
const updateFallbackTitle = async (input: {
sessionID: string
providerID: string
modelID: string
variant?: string
}) => {
if (!enableFallbackTitle) return
const key = `${input.providerID}/${input.modelID}${input.variant ? `:${input.variant}` : ""}`
const existing = fallbackTitleState.get(input.sessionID) ?? {}
if (existing.lastKey === key) return
if (!existing.baseTitle) {
const sessionResp = await ctx.client.session.get({ path: { id: input.sessionID } }).catch(() => null)
const sessionInfo = sessionResp
? normalizeSDKResponse(sessionResp, null as { title?: string } | null, { preferResponseOnMissingData: true })
: null
const rawTitle = sessionInfo?.title
if (typeof rawTitle === "string" && rawTitle.length > 0) {
existing.baseTitle = rawTitle.replace(/\s*\[fallback:[^\]]+\]$/i, "").trim()
} else {
existing.baseTitle = "Session"
}
}
const variantLabel = input.variant ? ` ${input.variant}` : ""
const newTitle = `${existing.baseTitle} [fallback: ${input.providerID}/${input.modelID}${variantLabel}]`
await ctx.client.session
.update({
path: { id: input.sessionID },
body: { title: newTitle },
query: { directory: ctx.directory },
})
.catch(() => {})
existing.lastKey = key
fallbackTitleState.set(input.sessionID, existing)
if (fallbackTitleState.size > fallbackTitleMaxEntries) {
const oldestKey = fallbackTitleState.keys().next().value
if (oldestKey) fallbackTitleState.delete(oldestKey)
}
}
// Model fallback hook (configurable via model_fallback config + disabled_hooks)
// This handles automatic model switching when model errors occur
const isModelFallbackConfigEnabled = pluginConfig.model_fallback ?? true
const modelFallback = isModelFallbackConfigEnabled && isHookEnabled("model-fallback")
? safeHook("model-fallback", () =>
createModelFallbackHook({
toast: async ({ title, message, variant, duration }) => {
await ctx.client.tui
.showToast({
body: {
title,
message,
variant: variant ?? "warning",
duration: duration ?? 5000,
},
})
.catch(() => {})
},
onApplied: enableFallbackTitle ? updateFallbackTitle : undefined,
}))
: null
const modelFallback = createConfiguredModelFallbackHook({
ctx,
pluginConfig,
isHookEnabled,
safeHook,
})
const anthropicContextWindowLimitRecovery = isHookEnabled("anthropic-context-window-limit-recovery")
? safeHook("anthropic-context-window-limit-recovery", () =>

View File

@@ -0,0 +1,63 @@
declare const require: (name: string) => any
const { describe, expect, test } = require("bun:test")
import type { OhMyOpenCodeConfig } from "../../config"
import {
hasConfiguredModelFallbacks,
resolveModelFallbackEnabled,
} from "./model-fallback-config"
describe("model-fallback-config", () => {
test("detects agent fallback_models configuration", () => {
//#given
const pluginConfig: OhMyOpenCodeConfig = {
agents: {
sisyphus: {
fallback_models: ["openai/gpt-5.2", "anthropic/claude-opus-4-6"],
},
},
}
//#when
const result = hasConfiguredModelFallbacks(pluginConfig)
//#then
expect(result).toBe(true)
})
test("auto-enables model fallback when category fallback_models are configured", () => {
//#given
const pluginConfig: OhMyOpenCodeConfig = {
categories: {
quick: {
fallback_models: ["openai/gpt-5.2"],
},
},
}
//#when
const result = resolveModelFallbackEnabled(pluginConfig)
//#then
expect(result).toBe(true)
})
test("keeps model fallback disabled when explicitly turned off", () => {
//#given
const pluginConfig: OhMyOpenCodeConfig = {
model_fallback: false,
agents: {
sisyphus: {
fallback_models: ["openai/gpt-5.2"],
},
},
}
//#when
const result = resolveModelFallbackEnabled(pluginConfig)
//#then
expect(result).toBe(false)
})
})

View File

@@ -0,0 +1,33 @@
import type { OhMyOpenCodeConfig } from "../../config"
import { log, normalizeFallbackModels } from "../../shared"
type FallbackModelsConfig = {
fallback_models?: string | string[]
}
function hasFallbackModels(config: FallbackModelsConfig | undefined): boolean {
return (normalizeFallbackModels(config?.fallback_models)?.length ?? 0) > 0
}
export function hasConfiguredModelFallbacks(pluginConfig: OhMyOpenCodeConfig): boolean {
const agentConfigs = Object.values<FallbackModelsConfig | undefined>(pluginConfig.agents ?? {})
if (agentConfigs.some(hasFallbackModels)) {
return true
}
const categoryConfigs = Object.values<FallbackModelsConfig | undefined>(pluginConfig.categories ?? {})
return categoryConfigs.some(hasFallbackModels)
}
export function resolveModelFallbackEnabled(pluginConfig: OhMyOpenCodeConfig): boolean {
const hasConfiguredFallbacks = hasConfiguredModelFallbacks(pluginConfig)
if (pluginConfig.model_fallback === false && hasConfiguredFallbacks) {
log(
"model_fallback is disabled while fallback_models are configured; set model_fallback=true to keep provider fallback retries enabled",
)
}
return pluginConfig.model_fallback ?? hasConfiguredFallbacks
}

View File

@@ -40,6 +40,28 @@ describe("model-error-classifier", () => {
expect(result).toBe(true)
})
test("treats FreeUsageLimitError names as retryable", () => {
//#given
const error = { name: "FreeUsageLimitError" }
//#when
const result = shouldRetryError(error)
//#then
expect(result).toBe(true)
})
test("treats free tier usage limit messages as retryable", () => {
//#given
const error = { message: "Free tier daily limit reached for this provider" }
//#when
const result = shouldRetryError(error)
//#then
expect(result).toBe(true)
})
test("selectFallbackProvider prefers first connected provider in preference order", () => {
//#given
readConnectedProvidersCacheMock.mockReturnValue(["anthropic", "nvidia"])

View File

@@ -6,13 +6,14 @@ import { readConnectedProvidersCache } from "./connected-providers-cache"
* These errors completely halt the action loop and should trigger fallback retry.
*/
const RETRYABLE_ERROR_NAMES = new Set([
"ProviderModelNotFoundError",
"RateLimitError",
"QuotaExceededError",
"InsufficientCreditsError",
"ModelUnavailableError",
"ProviderConnectionError",
"AuthenticationError",
"providermodelnotfounderror",
"ratelimiterror",
"quotaexceedederror",
"insufficientcreditserror",
"modelunavailableerror",
"providerconnectionerror",
"authenticationerror",
"freeusagelimiterror",
])
/**
@@ -20,24 +21,28 @@ const RETRYABLE_ERROR_NAMES = new Set([
* These errors are typically user-induced or fixable without switching models.
*/
const NON_RETRYABLE_ERROR_NAMES = new Set([
"MessageAbortedError",
"PermissionDeniedError",
"ContextLengthError",
"TimeoutError",
"ValidationError",
"SyntaxError",
"UserError",
"messageabortederror",
"permissiondeniederror",
"contextlengtherror",
"timeouterror",
"validationerror",
"syntaxerror",
"usererror",
])
/**
* Message patterns that indicate a retryable error even without a known error name.
*/
const RETRYABLE_MESSAGE_PATTERNS = [
const RETRYABLE_MESSAGE_PATTERNS: Array<string | RegExp> = [
"rate_limit",
"rate limit",
"quota",
"quota will reset after",
"usage limit has been reached",
/free\s+usage/i,
/free\s+tier/i,
/daily\s+limit/i,
/limit\s+reached/i,
"all credentials for model",
"cooling down",
"exhausted your capacity",
@@ -55,17 +60,9 @@ const RETRYABLE_MESSAGE_PATTERNS = [
"timeout",
"service unavailable",
"internal_server_error",
"free usage",
"usage exceeded",
"credit",
"balance",
"temporarily unavailable",
"try again",
"503",
"502",
"504",
"429",
"529",
]
const AUTO_RETRY_GATE_PATTERNS = [
@@ -85,6 +82,11 @@ function hasProviderAutoRetrySignal(message: string): boolean {
return AUTO_RETRY_GATE_PATTERNS.some((pattern) => message.includes(pattern))
}
function matchesRetryableMessagePattern(message: string): boolean {
return RETRYABLE_MESSAGE_PATTERNS.some((pattern) =>
typeof pattern === "string" ? message.includes(pattern) : pattern.test(message))
}
export interface ErrorInfo {
name?: string
message?: string
@@ -97,12 +99,14 @@ export interface ErrorInfo {
export function isRetryableModelError(error: ErrorInfo): boolean {
// If we have an error name, check against known lists
if (error.name) {
const normalizedErrorName = error.name.toLowerCase()
// Explicit non-retryable takes precedence
if (NON_RETRYABLE_ERROR_NAMES.has(error.name)) {
if (NON_RETRYABLE_ERROR_NAMES.has(normalizedErrorName)) {
return false
}
// Check if it's a known retryable error
if (RETRYABLE_ERROR_NAMES.has(error.name)) {
if (RETRYABLE_ERROR_NAMES.has(normalizedErrorName)) {
return true
}
}
@@ -112,7 +116,7 @@ export function isRetryableModelError(error: ErrorInfo): boolean {
if (hasProviderAutoRetrySignal(msg)) {
return true
}
return RETRYABLE_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern))
return matchesRetryableMessagePattern(msg)
}
/**

View File

@@ -239,19 +239,19 @@ describe("AGENT_MODEL_REQUIREMENTS", () => {
})
describe("CATEGORY_MODEL_REQUIREMENTS", () => {
test("ultrabrain has valid fallbackChain with gpt-5.4 as primary", () => {
test("ultrabrain has valid fallbackChain with gpt-5.3-codex as primary", () => {
// given - ultrabrain category requirement
const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"]
// when - accessing ultrabrain requirement
// then - fallbackChain exists with gpt-5.4 as first entry
// then - fallbackChain exists with gpt-5.3-codex as first entry
expect(ultrabrain).toBeDefined()
expect(ultrabrain.fallbackChain).toBeArray()
expect(ultrabrain.fallbackChain.length).toBeGreaterThan(0)
const primary = ultrabrain.fallbackChain[0]
expect(primary.variant).toBe("xhigh")
expect(primary.model).toBe("gpt-5.4")
expect(primary.model).toBe("gpt-5.3-codex")
expect(primary.providers[0]).toBe("openai")
})
@@ -326,25 +326,20 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
expect(primary.providers[0]).toBe("anthropic")
})
test("unspecified-high has claude-opus-4-6 as primary and gpt-5.4 as secondary", () => {
test("unspecified-high has gpt-5.4 as primary", () => {
// #given - unspecified-high category requirement
const unspecifiedHigh = CATEGORY_MODEL_REQUIREMENTS["unspecified-high"]
// #when - accessing unspecified-high requirement
// #then - claude-opus-4-6 is first and gpt-5.4 is second
// #then - gpt-5.4 is first
expect(unspecifiedHigh).toBeDefined()
expect(unspecifiedHigh.fallbackChain).toBeArray()
expect(unspecifiedHigh.fallbackChain.length).toBeGreaterThan(1)
const primary = unspecifiedHigh.fallbackChain[0]
expect(primary.model).toBe("claude-opus-4-6")
expect(primary.variant).toBe("max")
expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
const secondary = unspecifiedHigh.fallbackChain[1]
expect(secondary.model).toBe("gpt-5.4")
expect(secondary.variant).toBe("high")
expect(secondary.providers).toEqual(["openai", "github-copilot", "opencode"])
expect(primary.model).toBe("gpt-5.4")
expect(primary.variant).toBe("high")
expect(primary.providers).toEqual(["openai", "github-copilot", "opencode"])
})
test("artistry has valid fallbackChain with gemini-3.1-pro as primary", () => {

View File

@@ -205,7 +205,7 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
fallbackChain: [
{
providers: ["openai", "opencode"],
model: "gpt-5.4",
model: "gpt-5.3-codex",
variant: "xhigh",
},
{
@@ -288,16 +288,16 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
},
"unspecified-high": {
fallbackChain: [
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{
providers: ["openai", "github-copilot", "opencode"],
model: "gpt-5.4",
variant: "high",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{

View File

@@ -92,12 +92,12 @@
| Category | Model | Domain |
|----------|-------|--------|
| visual-engineering | gemini-3.1-pro high | Frontend, UI/UX |
| ultrabrain | gpt-5.4 xhigh | Hard logic |
| ultrabrain | gpt-5.3-codex xhigh | Hard logic |
| deep | gpt-5.3-codex medium | Autonomous problem-solving |
| artistry | gemini-3.1-pro high | Creative approaches |
| quick | claude-haiku-4-5 | Trivial tasks |
| unspecified-low | claude-sonnet-4-6 | Moderate effort |
| unspecified-high | claude-opus-4-6 max | High effort |
| unspecified-high | gpt-5.4 high | High effort |
| writing | kimi-k2p5 | Documentation |
## HOW TO ADD A TOOL

View File

@@ -1,68 +0,0 @@
/// <reference types="bun-types" />
import type { ToolContext } from "@opencode-ai/plugin/tool"
import { describe, expect, test } from "bun:test"
import type { BackgroundTask } from "../../features/background-agent"
import { clearPendingStore, consumeToolMetadata } from "../../features/tool-metadata-store"
import type { BackgroundOutputClient, BackgroundOutputManager } from "./clients"
import { createBackgroundOutput } from "./create-background-output"
const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode"
type ToolContextWithCallID = ToolContext & {
callID: string
}
describe("createBackgroundOutput metadata", () => {
test("omits sessionId metadata when task session is not yet assigned", async () => {
// #given
clearPendingStore()
const task: BackgroundTask = {
id: "task-1",
sessionID: undefined,
parentSessionID: "main-1",
parentMessageID: "msg-1",
description: "background task",
prompt: "do work",
agent: "test-agent",
status: "running",
}
const manager: BackgroundOutputManager = {
getTask: id => (id === task.id ? task : undefined),
}
const client: BackgroundOutputClient = {
session: {
messages: async () => ({ data: [] }),
},
}
const tool = createBackgroundOutput(manager, client)
const context = {
sessionID: "test-session",
messageID: "test-message",
agent: "test-agent",
directory: projectDir,
worktree: projectDir,
abort: new AbortController().signal,
metadata: () => {},
ask: async () => {},
callID: "call-1",
} as ToolContextWithCallID
// #when
await tool.execute({ task_id: task.id }, context)
// #then
expect(consumeToolMetadata("test-session", "call-1")).toEqual({
title: "test-agent - background task",
metadata: {
agent: "test-agent",
category: undefined,
description: "background task",
task_id: "task-1",
},
})
clearPendingStore()
})
})

View File

@@ -75,7 +75,7 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client:
agent: task.agent,
category: task.category,
description: task.description,
...(task.sessionID ? { sessionId: task.sessionID } : {}),
sessionId: task.sessionID ?? "pending",
} as Record<string, unknown>,
}
ctx.metadata?.(meta)

View File

@@ -1,84 +0,0 @@
/// <reference types="bun-types" />
import type { PluginInput } from "@opencode-ai/plugin"
import type { ToolContext } from "@opencode-ai/plugin/tool"
import { describe, expect, mock, test } from "bun:test"
import type { BackgroundManager } from "../../features/background-agent"
import { clearPendingStore, consumeToolMetadata } from "../../features/tool-metadata-store"
import { createBackgroundTask } from "./create-background-task"
const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode"
type ToolContextWithCallID = ToolContext & {
callID: string
}
describe("createBackgroundTask metadata", () => {
test("omits sessionId metadata when session is not yet assigned", async () => {
// #given
clearPendingStore()
const manager = {
launch: mock(() => Promise.resolve({
id: "task-1",
sessionID: null,
description: "Test task",
agent: "test-agent",
status: "pending",
})),
getTask: mock(() => undefined),
} as unknown as BackgroundManager
const client = {
session: {
messages: mock(() => Promise.resolve({ data: [] })),
},
} as unknown as PluginInput["client"]
let capturedMetadata: { title?: string; metadata?: Record<string, unknown> } | undefined
const tool = createBackgroundTask(manager, client)
const originalDateNow = Date.now
let dateNowCallCount = 0
Date.now = () => {
dateNowCallCount += 1
return dateNowCallCount === 1 ? 0 : 30001
}
try {
// #when
const context: ToolContextWithCallID = {
sessionID: "test-session",
messageID: "test-message",
agent: "test-agent",
directory: projectDir,
worktree: projectDir,
abort: new AbortController().signal,
ask: async () => {},
callID: "call-1",
metadata: input => {
capturedMetadata = input
},
}
const output = await tool.execute(
{
description: "Test background task",
prompt: "Test prompt",
agent: "test-agent",
},
context
)
// #then
expect(output).toContain("Session ID: (not yet assigned)")
expect(output).not.toContain('Session ID: pending')
expect(capturedMetadata?.metadata).toEqual({})
expect(consumeToolMetadata("test-session", "call-1")).toEqual({
title: "Test background task",
metadata: {},
})
} finally {
Date.now = originalDateNow
clearPendingStore()
}
})
})

View File

@@ -94,9 +94,7 @@ export function createBackgroundTask(
const bgMeta = {
title: args.description,
metadata: {
...(sessionId ? { sessionId } : {}),
},
metadata: { sessionId: sessionId ?? "pending" },
}
await ctx.metadata?.(bgMeta)
@@ -107,7 +105,7 @@ export function createBackgroundTask(
return `Background task launched successfully.
Task ID: ${task.id}
Session ID: ${sessionId ?? "(not yet assigned)"}
Session ID: ${sessionId ?? "pending"}
Description: ${task.description}
Agent: ${task.agent}
Status: ${task.status}

View File

@@ -284,12 +284,12 @@ You are NOT an interactive assistant. You are an autonomous problem-solver.
export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
"visual-engineering": { model: "google/gemini-3.1-pro", variant: "high" },
ultrabrain: { model: "openai/gpt-5.4", variant: "xhigh" },
ultrabrain: { model: "openai/gpt-5.3-codex", variant: "xhigh" },
deep: { model: "openai/gpt-5.3-codex", variant: "medium" },
artistry: { model: "google/gemini-3.1-pro", variant: "high" },
quick: { model: "anthropic/claude-haiku-4-5" },
"unspecified-low": { model: "anthropic/claude-sonnet-4-6" },
"unspecified-high": { model: "anthropic/claude-opus-4-6", variant: "max" },
"unspecified-high": { model: "openai/gpt-5.4-high" },
writing: { model: "kimi-for-coding/k2p5" },
}

View File

@@ -3,17 +3,6 @@ const { describe, expect, test } = require("bun:test")
import { __resetTimingConfig, __setTimingConfig, getDefaultSyncPollTimeoutMs } from "./timing"
describe("timing sync poll timeout defaults", () => {
test("default sync timeout is 30 minutes", () => {
// #given
__resetTimingConfig()
// #when
const timeout = getDefaultSyncPollTimeoutMs()
// #then
expect(timeout).toBe(30 * 60 * 1000)
})
test("default sync timeout accessor follows MAX_POLL_TIME_MS config", () => {
// #given
__resetTimingConfig()

View File

@@ -3,7 +3,7 @@ let MIN_STABILITY_TIME_MS = 10000
let STABILITY_POLLS_REQUIRED = 3
let WAIT_FOR_SESSION_INTERVAL_MS = 100
let WAIT_FOR_SESSION_TIMEOUT_MS = 30000
const DEFAULT_POLL_TIMEOUT_MS = 30 * 60 * 1000
const DEFAULT_POLL_TIMEOUT_MS = 10 * 60 * 1000
let MAX_POLL_TIME_MS = DEFAULT_POLL_TIMEOUT_MS
let SESSION_CONTINUATION_STABILITY_MS = 5000

View File

@@ -83,7 +83,7 @@ describe("sisyphus-task", () => {
// when / #then
expect(category).toBeDefined()
expect(category.model).toBe("openai/gpt-5.4")
expect(category.model).toBe("openai/gpt-5.3-codex")
expect(category.variant).toBe("xhigh")
})
@@ -97,14 +97,14 @@ describe("sisyphus-task", () => {
expect(category.variant).toBe("medium")
})
test("unspecified-high category uses claude-opus-4-6 max as primary", () => {
test("unspecified-high category uses explicit high model", () => {
// given
const category = DEFAULT_CATEGORIES["unspecified-high"]
// when / #then
expect(category).toBeDefined()
expect(category.model).toBe("anthropic/claude-opus-4-6")
expect(category.variant).toBe("max")
expect(category.model).toBe("openai/gpt-5.4-high")
expect(category.variant).toBeUndefined()
})
})
@@ -1036,7 +1036,7 @@ describe("sisyphus-task", () => {
abort: new AbortController().signal,
}
// when - unspecified-high uses claude-opus-4-6 max in DEFAULT_CATEGORIES
// when - unspecified-high uses the explicit high model in DEFAULT_CATEGORIES
await tool.execute(
{
description: "Test unspecified-high default variant",
@@ -1048,11 +1048,10 @@ describe("sisyphus-task", () => {
toolContext
)
// then - claude-opus-4-6 should be passed with max variant
// then - the explicit high model should be passed without a separate variant
expect(launchInput.model).toEqual({
providerID: "anthropic",
modelID: "claude-opus-4-6",
variant: "max",
providerID: "openai",
modelID: "gpt-5.4-high",
})
}, { timeout: 20000 })
@@ -1097,7 +1096,7 @@ describe("sisyphus-task", () => {
abort: new AbortController().signal,
}
// when - unspecified-high uses claude-opus-4-6 max in DEFAULT_CATEGORIES
// when - unspecified-high uses the explicit high model in DEFAULT_CATEGORIES
await tool.execute(
{
description: "Test unspecified-high sync variant",
@@ -1109,12 +1108,12 @@ describe("sisyphus-task", () => {
toolContext
)
// then - claude-opus-4-6 should be passed with max variant
// then - the explicit high model should be passed without a separate variant
expect(promptBody.model).toEqual({
providerID: "anthropic",
modelID: "claude-opus-4-6",
providerID: "openai",
modelID: "gpt-5.4-high",
})
expect(promptBody.variant).toBe("max")
expect(promptBody.variant).toBeUndefined()
}, { timeout: 20000 })
})
@@ -2403,7 +2402,7 @@ describe("sisyphus-task", () => {
abort: new AbortController().signal,
}
// when - using ultrabrain category (default model is openai/gpt-5.4)
// when - using ultrabrain category (default model is openai/gpt-5.3-codex)
await tool.execute(
{
description: "Override precedence test",
@@ -2455,7 +2454,7 @@ describe("sisyphus-task", () => {
client: mockClient,
sisyphusJuniorModel: "anthropic/claude-sonnet-4-6",
userCategories: {
ultrabrain: { model: "openai/gpt-5.4" },
ultrabrain: { model: "openai/gpt-5.3-codex" },
},
connectedProvidersOverride: TEST_CONNECTED_PROVIDERS,
availableModelsOverride: createTestAvailableModels(),
@@ -2482,7 +2481,7 @@ describe("sisyphus-task", () => {
// then - explicit category model should win
expect(launchInput.model.providerID).toBe("openai")
expect(launchInput.model.modelID).toBe("gpt-5.4")
expect(launchInput.model.modelID).toBe("gpt-5.3-codex")
})
test("sisyphus-junior model override works with quick category (#1295)", async () => {
@@ -2947,7 +2946,7 @@ describe("sisyphus-task", () => {
// then - catalog model is used
expect(resolved).not.toBeNull()
expect(resolved!.config.model).toBe("openai/gpt-5.4")
expect(resolved!.config.model).toBe("openai/gpt-5.3-codex")
expect(resolved!.config.variant).toBe("xhigh")
})
@@ -2971,10 +2970,10 @@ describe("sisyphus-task", () => {
// when
const resolved = resolveCategoryConfig(categoryName, { inheritedModel, systemDefaultModel: SYSTEM_DEFAULT_MODEL })
// then - category's built-in model wins (ultrabrain uses gpt-5.4)
// then - category's built-in model wins (ultrabrain uses gpt-5.3-codex)
expect(resolved).not.toBeNull()
const actualModel = resolved!.config.model
expect(actualModel).toBe("openai/gpt-5.4")
expect(actualModel).toBe("openai/gpt-5.3-codex")
})
test("when user defines model - modelInfo should report user-defined regardless of inheritedModel", () => {
@@ -3028,12 +3027,12 @@ describe("sisyphus-task", () => {
const categoryName = "ultrabrain"
const inheritedModel = "anthropic/claude-opus-4-6"
// when category has a built-in model (gpt-5.4 for ultrabrain)
// when category has a built-in model (gpt-5.3-codex for ultrabrain)
const resolved = resolveCategoryConfig(categoryName, { inheritedModel, systemDefaultModel: SYSTEM_DEFAULT_MODEL })
// then category's built-in model should be used, NOT inheritedModel
expect(resolved).not.toBeNull()
expect(resolved!.model).toBe("openai/gpt-5.4")
expect(resolved!.model).toBe("openai/gpt-5.3-codex")
})
test("FIXED: systemDefaultModel is used when no userConfig.model and no inheritedModel", () => {

View File

@@ -123,11 +123,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
})
if (args.run_in_background === undefined) {
if (args.category || args.subagent_type || args.session_id) {
args.run_in_background = false
} else {
throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
}
throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
}
if (typeof args.load_skills === "string") {
try {

View File

@@ -30,8 +30,8 @@ export function createHashlineEditTool(): ToolDefinition {
pos: tool.schema.string().optional().describe("Primary anchor in LINE#ID format"),
end: tool.schema.string().optional().describe("Range end anchor in LINE#ID format"),
lines: tool.schema
.union([tool.schema.string(), tool.schema.null()])
.describe("Replacement or inserted lines as newline-delimited string. null deletes with replace"),
.union([tool.schema.string(), tool.schema.array(tool.schema.string()), tool.schema.null()])
.describe("Replacement or inserted lines. null/[] deletes with replace"),
})
)
.describe("Array of edit operations to apply (empty when delete=true)"),