diff --git a/assets/oh-my-opencode.schema.json b/assets/oh-my-opencode.schema.json index ea92a49e5..b9fbe2404 100644 --- a/assets/oh-my-opencode.schema.json +++ b/assets/oh-my-opencode.schema.json @@ -102,6 +102,7 @@ "task-resume-info", "stop-continuation-guard", "tasks-todowrite-disabler", + "runtime-fallback", "write-existing-file-guard", "anthropic-effort", "hashline-read-enhancer", @@ -142,6 +143,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -327,6 +341,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -512,6 +539,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -697,6 +737,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -882,6 +935,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -1067,6 +1133,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -1252,6 +1331,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -1437,6 +1529,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -1622,6 +1727,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -1807,6 +1925,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -1992,6 +2123,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -2177,6 +2321,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -2362,6 +2519,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -2547,6 +2717,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -2740,6 +2923,19 @@ "model": { "type": "string" }, + "fallback_models": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, "variant": { "type": "string" }, @@ -3152,6 +3348,37 @@ ], "additionalProperties": false }, + "runtime_fallback": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "retry_on_errors": { + "type": "array", + "items": { + "type": "number" + } + }, + "max_fallback_attempts": { + "type": "number", + "minimum": 1, + "maximum": 20 + }, + "cooldown_seconds": { + "type": "number", + "minimum": 0 + }, + "timeout_seconds": { + "type": "number", + "minimum": 0 + }, + "notify_on_fallback": { + "type": "boolean" + } + }, + "additionalProperties": false + }, "background_task": { "type": "object", "properties": { diff --git a/docs/configurations.md b/docs/configurations.md index 1c7f82127..51cd5af50 100644 --- a/docs/configurations.md +++ b/docs/configurations.md @@ -163,19 +163,20 @@ Override built-in agent settings: } ``` -Each agent supports: `model`, `temperature`, `top_p`, `prompt`, `prompt_append`, `tools`, `disable`, `description`, `mode`, `color`, `permission`, `category`, `variant`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `providerOptions`. +Each agent supports: `model`, `fallback_models`, `temperature`, `top_p`, `prompt`, `prompt_append`, `tools`, `disable`, `description`, `mode`, `color`, `permission`, `category`, `variant`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `providerOptions`. ### Additional Agent Options -| Option | Type | Description | -| ------------------- | ------- | ----------------------------------------------------------------------------------------------- | -| `category` | string | Category name to inherit model and other settings from category defaults | -| `variant` | string | Model variant (e.g., `max`, `high`, `medium`, `low`, `xhigh`) | -| `maxTokens` | number | Maximum tokens for response. Passed directly to OpenCode SDK. | -| `thinking` | object | Extended thinking configuration for Anthropic models. See [Thinking Options](#thinking-options) below. | -| `reasoningEffort` | string | OpenAI reasoning effort level. Values: `low`, `medium`, `high`, `xhigh`. | -| `textVerbosity` | string | Text verbosity level. Values: `low`, `medium`, `high`. | -| `providerOptions` | object | Provider-specific options passed directly to OpenCode SDK. | +| Option | Type | Description | +| ------------------- | -------------- | ----------------------------------------------------------------------------------------------- | +| `fallback_models` | string/array | Fallback models for runtime switching on API errors. Single string or array of model strings. | +| `category` | string | Category name to inherit model and other settings from category defaults | +| `variant` | string | Model variant (e.g., `max`, `high`, `medium`, `low`, `xhigh`) | +| `maxTokens` | number | Maximum tokens for response. Passed directly to OpenCode SDK. | +| `thinking` | object | Extended thinking configuration for Anthropic models. See [Thinking Options](#thinking-options) below. | +| `reasoningEffort` | string | OpenAI reasoning effort level. Values: `low`, `medium`, `high`, `xhigh`. | +| `textVerbosity` | string | Text verbosity level. Values: `low`, `medium`, `high`. | +| `providerOptions` | object | Provider-specific options passed directly to OpenCode SDK. | #### Thinking Options (Anthropic) @@ -714,6 +715,84 @@ Configure concurrency limits for background agent tasks. This controls how many - Allow more concurrent tasks for fast/cheap models (e.g., Gemini Flash) - Respect provider rate limits by setting provider-level caps +## Runtime Fallback + +Automatically switch to backup models when the primary model encounters retryable API errors (rate limits, overload, etc.) or provider key misconfiguration errors (for example, missing API key). This keeps conversations running without manual intervention. + +```json +{ + "runtime_fallback": { + "enabled": true, + "retry_on_errors": [400, 429, 503, 529], + "max_fallback_attempts": 3, + "cooldown_seconds": 60, + "timeout_seconds": 30, + "notify_on_fallback": true + } +} +``` + +| Option | Default | Description | +| ----------------------- | ---------------------- | --------------------------------------------------------------------------- | +| `enabled` | `true` | Enable runtime fallback | +| `retry_on_errors` | `[400, 429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. | +| `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-20) | +| `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model | +| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. **⚠️ Set to `0` to disable auto-retry signal detection** (see below). | +| `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model | + +### timeout_seconds: Understanding the 0 Value + +**⚠️ IMPORTANT**: Setting `timeout_seconds: 0` **disables auto-retry signal detection**. This is a critical behavior change: + +| Setting | Behavior | +|---------|----------| +| `timeout_seconds: 30` (default) | ✅ **Full fallback coverage**: Error-based fallback (429, 503, etc.) + auto-retry signal detection (provider messages like "retrying in 8h") | +| `timeout_seconds: 0` | ⚠️ **Limited fallback**: Only error-based fallback works. Provider retry messages are **completely ignored**. Timeout-based escalation is **disabled**. | + +**When `timeout_seconds: 0`:** +- ✅ HTTP errors (429, 503, 529) still trigger fallback +- ✅ Provider key errors (missing API key) still trigger fallback +- ❌ Provider retry messages ("retrying in Xh") are **ignored** +- ❌ Timeout-based escalation is **disabled** +- ❌ Hanging requests do **not** advance to the next fallback model + +**Recommendation**: Use a non-zero value (e.g., `30` seconds) to enable full fallback coverage. Only set to `0` if you explicitly want to disable auto-retry signal detection. + +### How It Works + +1. When an API error matching `retry_on_errors` occurs (or a classified provider key error such as missing API key), the hook intercepts it +2. The next request automatically uses the next available model from `fallback_models` +3. Failed models enter a cooldown period before being retried +4. If `timeout_seconds > 0` and a fallback provider hangs, timeout advances to the next fallback model +5. Toast notification (optional) informs you of the model switch + +### Configuring Fallback Models + +Define `fallback_models` at the agent or category level: + +```json +{ + "agents": { + "sisyphus": { + "model": "anthropic/claude-opus-4-5", + "fallback_models": ["openai/gpt-5.2", "google/gemini-3-pro"] + } + }, + "categories": { + "ultrabrain": { + "model": "openai/gpt-5.2-codex", + "fallback_models": ["anthropic/claude-opus-4-5", "google/gemini-3-pro"] + } + } +} +``` + +When the primary model fails: +1. First fallback: `openai/gpt-5.2` +2. Second fallback: `google/gemini-3-pro` +3. After `max_fallback_attempts`, returns to primary model + ## Categories Categories enable domain-specific task delegation via the `task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent. @@ -830,15 +909,75 @@ Add your own categories or override built-in ones: } ``` -Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `tools`, `prompt_append`, `variant`, `description`, `is_unstable_agent`. +Each category supports: `model`, `fallback_models`, `temperature`, `top_p`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `tools`, `prompt_append`, `variant`, `description`, `is_unstable_agent`. ### Additional Category Options -| Option | Type | Default | Description | -| ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- | -| `description` | string | - | Human-readable description of the category's purpose. Shown in task prompt. | -| `is_unstable_agent`| boolean | `false` | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. | +| Option | Type | Default | Description | +| ------------------- | ------------ | ------- | --------------------------------------------------------------------------------------------------- | +| `fallback_models` | string/array | - | Fallback models for runtime switching on API errors. Single string or array of model strings. | +| `description` | string | - | Human-readable description of the category's purpose. Shown in delegate_task prompt. | +| `is_unstable_agent` | boolean | `false` | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. | +## Runtime Fallback + +Automatically switch to backup models when the primary model encounters retryable API errors (rate limits, overload, etc.) or provider key misconfiguration errors (for example, missing API key). This keeps conversations running without manual intervention. + +```json +{ + "runtime_fallback": { + "enabled": true, + "retry_on_errors": [429, 503, 529], + "max_fallback_attempts": 3, + "cooldown_seconds": 60, + "timeout_seconds": 30, + "notify_on_fallback": true + } +} +``` + +| Option | Default | Description | +| ----------------------- | ----------------- | --------------------------------------------------------------------------- | +| `enabled` | `true` | Enable runtime fallback | +| `retry_on_errors` | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. | +| `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-10) | +| `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model | +| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. | +| `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model | + +### How It Works + +1. When an API error matching `retry_on_errors` occurs (or a classified provider key error such as missing API key), the hook intercepts it +2. The next request automatically uses the next available model from `fallback_models` +3. Failed models enter a cooldown period before being retried +4. If a fallback provider hangs, timeout advances to the next fallback model +5. Toast notification (optional) informs you of the model switch + +### Configuring Fallback Models + +Define `fallback_models` at the agent or category level: + +```json +{ + "agents": { + "sisyphus": { + "model": "anthropic/claude-opus-4-5", + "fallback_models": ["openai/gpt-5.2", "google/gemini-3-pro"] + } + }, + "categories": { + "ultrabrain": { + "model": "openai/gpt-5.2-codex", + "fallback_models": ["anthropic/claude-opus-4-5", "google/gemini-3-pro"] + } + } +} +``` + +When the primary model fails: +1. First fallback: `openai/gpt-5.2` +2. Second fallback: `google/gemini-3-pro` +3. After `max_fallback_attempts`, returns to primary model ## Model Resolution System At runtime, Oh My OpenCode uses a 3-step resolution process to determine which model to use for each agent and category. This happens dynamically based on your configuration and available models. @@ -973,7 +1112,7 @@ Disable specific built-in hooks via `disabled_hooks` in `~/.config/opencode/oh-m } ``` -Available hooks: `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `no-sisyphus-gpt`, `start-work` +Available hooks: `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `no-sisyphus-gpt`, `start-work`, `runtime-fallback` **Note on `directory-agents-injector`**: This hook is **automatically disabled** when running on OpenCode 1.1.37+ because OpenCode now has native support for dynamically resolving AGENTS.md files from subdirectories (PR #10678). This prevents duplicate AGENTS.md injection. For older OpenCode versions, the hook remains active to provide the same functionality. diff --git a/docs/features.md b/docs/features.md index 37a90a971..60c59e009 100644 --- a/docs/features.md +++ b/docs/features.md @@ -352,6 +352,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle. | **session-recovery** | Stop | Recovers from session errors - missing tool results, thinking block issues, empty messages. | | **anthropic-context-window-limit-recovery** | Stop | Handles Claude context window limits gracefully. | | **background-compaction** | Stop | Auto-compacts sessions hitting token limits. | +| **runtime-fallback** | Event | Automatically switches to backup models on retryable API errors (e.g., 429, 503, 529), provider key misconfiguration errors (e.g., missing API key), and auto-retry signals (when `timeout_seconds > 0`). Configurable retry logic with per-model cooldown. See [Runtime Fallback Configuration](configurations.md#runtime-fallback) for details on `timeout_seconds` behavior. | #### Truncation & Context Management diff --git a/src/agents/types.ts b/src/agents/types.ts index 92834883f..4ee1b3a6f 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -100,6 +100,7 @@ export type AgentName = BuiltinAgentName export type AgentOverrideConfig = Partial & { prompt_append?: string variant?: string + fallback_models?: string | string[] } export type AgentOverrides = Partial> diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts index b179a47a9..2feb71216 100644 --- a/src/agents/utils.test.ts +++ b/src/agents/utils.test.ts @@ -51,7 +51,7 @@ describe("createBuiltinAgents with model overrides", () => { expect(agents.sisyphus.thinking).toBeUndefined() }) - test("Atlas uses uiSelectedModel when provided", async () => { + test("Atlas uses uiSelectedModel", async () => { // #given const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-6"]) diff --git a/src/config/index.ts b/src/config/index.ts index 5f881831b..a561a2e66 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -11,6 +11,8 @@ export { RalphLoopConfigSchema, TmuxConfigSchema, TmuxLayoutSchema, + RuntimeFallbackConfigSchema, + FallbackModelsSchema, } from "./schema" export type { @@ -29,4 +31,6 @@ export type { TmuxLayout, SisyphusConfig, SisyphusTasksConfig, + RuntimeFallbackConfig, + FallbackModels, } from "./schema" diff --git a/src/config/schema.ts b/src/config/schema.ts index e4c55c6ff..0d2c590ba 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -9,11 +9,13 @@ export * from "./schema/comment-checker" export * from "./schema/commands" export * from "./schema/dynamic-context-pruning" export * from "./schema/experimental" +export * from "./schema/fallback-models" export * from "./schema/git-master" export * from "./schema/hooks" export * from "./schema/notification" export * from "./schema/oh-my-opencode-config" export * from "./schema/ralph-loop" +export * from "./schema/runtime-fallback" export * from "./schema/skills" export * from "./schema/sisyphus" export * from "./schema/sisyphus-agent" diff --git a/src/config/schema/agent-overrides.ts b/src/config/schema/agent-overrides.ts index 59bb360ee..0b142cb34 100644 --- a/src/config/schema/agent-overrides.ts +++ b/src/config/schema/agent-overrides.ts @@ -1,9 +1,11 @@ import { z } from "zod" +import { FallbackModelsSchema } from "./fallback-models" import { AgentPermissionSchema } from "./internal/permission" export const AgentOverrideConfigSchema = z.object({ /** @deprecated Use `category` instead. Model is inherited from category defaults. */ model: z.string().optional(), + fallback_models: FallbackModelsSchema.optional(), variant: z.string().optional(), /** Category name to inherit model and other settings from CategoryConfig */ category: z.string().optional(), diff --git a/src/config/schema/categories.ts b/src/config/schema/categories.ts index 980b3728d..b12005931 100644 --- a/src/config/schema/categories.ts +++ b/src/config/schema/categories.ts @@ -1,9 +1,11 @@ import { z } from "zod" +import { FallbackModelsSchema } from "./fallback-models" export const CategoryConfigSchema = z.object({ /** Human-readable description of the category's purpose. Shown in task prompt. */ description: z.string().optional(), model: z.string().optional(), + fallback_models: FallbackModelsSchema.optional(), variant: z.string().optional(), temperature: z.number().min(0).max(2).optional(), top_p: z.number().min(0).max(1).optional(), diff --git a/src/config/schema/fallback-models.ts b/src/config/schema/fallback-models.ts new file mode 100644 index 000000000..f9c28f437 --- /dev/null +++ b/src/config/schema/fallback-models.ts @@ -0,0 +1,5 @@ +import { z } from "zod" + +export const FallbackModelsSchema = z.union([z.string(), z.array(z.string())]) + +export type FallbackModels = z.infer diff --git a/src/config/schema/hooks.ts b/src/config/schema/hooks.ts index d82389516..d7229b36d 100644 --- a/src/config/schema/hooks.ts +++ b/src/config/schema/hooks.ts @@ -48,6 +48,7 @@ export const HookNameSchema = z.enum([ "task-resume-info", "stop-continuation-guard", "tasks-todowrite-disabler", + "runtime-fallback", "write-existing-file-guard", "anthropic-effort", "hashline-read-enhancer", diff --git a/src/config/schema/oh-my-opencode-config.ts b/src/config/schema/oh-my-opencode-config.ts index dbeedc379..283d71a3f 100644 --- a/src/config/schema/oh-my-opencode-config.ts +++ b/src/config/schema/oh-my-opencode-config.ts @@ -14,6 +14,7 @@ import { GitMasterConfigSchema } from "./git-master" import { HookNameSchema } from "./hooks" import { NotificationConfigSchema } from "./notification" import { RalphLoopConfigSchema } from "./ralph-loop" +import { RuntimeFallbackConfigSchema } from "./runtime-fallback" import { SkillsConfigSchema } from "./skills" import { SisyphusConfigSchema } from "./sisyphus" import { SisyphusAgentConfigSchema } from "./sisyphus-agent" @@ -44,6 +45,7 @@ export const OhMyOpenCodeConfigSchema = z.object({ auto_update: z.boolean().optional(), skills: SkillsConfigSchema.optional(), ralph_loop: RalphLoopConfigSchema.optional(), + runtime_fallback: RuntimeFallbackConfigSchema.optional(), background_task: BackgroundTaskConfigSchema.optional(), notification: NotificationConfigSchema.optional(), babysitting: BabysittingConfigSchema.optional(), diff --git a/src/config/schema/runtime-fallback.ts b/src/config/schema/runtime-fallback.ts new file mode 100644 index 000000000..53219611b --- /dev/null +++ b/src/config/schema/runtime-fallback.ts @@ -0,0 +1,18 @@ +import { z } from "zod" + +export const RuntimeFallbackConfigSchema = z.object({ + /** Enable runtime fallback (default: true) */ + enabled: z.boolean().optional(), + /** HTTP status codes that trigger fallback (default: [400, 429, 503, 529]) */ + retry_on_errors: z.array(z.number()).optional(), + /** Maximum fallback attempts per session (default: 3) */ + max_fallback_attempts: z.number().min(1).max(20).optional(), + /** Cooldown in seconds before retrying a failed model (default: 60) */ + cooldown_seconds: z.number().min(0).optional(), + /** Session-level timeout in seconds to advance fallback when provider hangs (default: 30). Set to 0 to disable auto-retry signal detection (only error-based fallback remains active). */ + timeout_seconds: z.number().min(0).optional(), + /** Show toast notification when switching to fallback model (default: true) */ + notify_on_fallback: z.boolean().optional(), +}) + +export type RuntimeFallbackConfig = z.infer diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index 649460222..42d80d140 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -19,6 +19,7 @@ import { createInternalAgentTextPart, } from "../../shared" import { setSessionTools } from "../../shared/session-tools-store" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" import { ConcurrencyManager } from "./concurrency" import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema" import { isInsideTmux } from "../../shared/tmux" @@ -910,6 +911,7 @@ export class BackgroundManager { subagentSessions.delete(task.sessionID) } } + SessionCategoryRegistry.remove(sessionID) } if (event.type === "session.status") { @@ -1196,6 +1198,8 @@ export class BackgroundManager { this.client.session.abort({ path: { id: task.sessionID }, }).catch(() => {}) + + SessionCategoryRegistry.remove(task.sessionID) } if (options?.skipNotification) { @@ -1343,6 +1347,8 @@ export class BackgroundManager { this.client.session.abort({ path: { id: task.sessionID }, }).catch(() => {}) + + SessionCategoryRegistry.remove(task.sessionID) } try { @@ -1688,6 +1694,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea this.tasks.delete(taskId) if (task.sessionID) { subagentSessions.delete(task.sessionID) + SessionCategoryRegistry.remove(task.sessionID) } } } diff --git a/src/hooks/AGENTS.md b/src/hooks/AGENTS.md index defe5890c..6658ac58d 100644 --- a/src/hooks/AGENTS.md +++ b/src/hooks/AGENTS.md @@ -9,6 +9,45 @@ ## HOOK TIERS ### Tier 1: Session Hooks (22) — `create-session-hooks.ts` +## STRUCTURE +``` +hooks/ +├── atlas/ # Main orchestration (757 lines) +├── anthropic-context-window-limit-recovery/ # Auto-summarize +├── todo-continuation-enforcer.ts # Force TODO completion +├── ralph-loop/ # Self-referential dev loop +├── claude-code-hooks/ # settings.json compat layer - see AGENTS.md +├── comment-checker/ # Prevents AI slop +├── auto-slash-command/ # Detects /command patterns +├── rules-injector/ # Conditional rules +├── directory-agents-injector/ # Auto-injects AGENTS.md +├── directory-readme-injector/ # Auto-injects README.md +├── edit-error-recovery/ # Recovers from failures +├── thinking-block-validator/ # Ensures valid +├── context-window-monitor.ts # Reminds of headroom +├── session-recovery/ # Auto-recovers from crashes +├── think-mode/ # Dynamic thinking budget +├── keyword-detector/ # ultrawork/search/analyze modes +├── background-notification/ # OS notification +├── prometheus-md-only/ # Planner read-only mode +├── agent-usage-reminder/ # Specialized agent hints +├── auto-update-checker/ # Plugin update check +├── tool-output-truncator.ts # Prevents context bloat +├── compaction-context-injector/ # Injects context on compaction +├── delegate-task-retry/ # Retries failed delegations +├── interactive-bash-session/ # Tmux session management +├── non-interactive-env/ # Non-TTY environment handling +├── start-work/ # Sisyphus work session starter +├── task-resume-info/ # Resume info for cancelled tasks +├── question-label-truncator/ # Auto-truncates question labels +├── category-skill-reminder/ # Reminds of category skills +├── empty-task-response-detector.ts # Detects empty responses +├── sisyphus-junior-notepad/ # Sisyphus Junior notepad +├── stop-continuation-guard/ # Guards stop continuation +├── subagent-question-blocker/ # Blocks subagent questions +├── runtime-fallback/ # Auto-switch models on API errors +└── index.ts # Hook aggregation + registration +``` | Hook | Event | Purpose | |------|-------|---------| diff --git a/src/hooks/index.ts b/src/hooks/index.ts index e82fb4ea5..fe2983038 100644 --- a/src/hooks/index.ts +++ b/src/hooks/index.ts @@ -45,6 +45,8 @@ export { createCompactionTodoPreserverHook } from "./compaction-todo-preserver"; export { createUnstableAgentBabysitterHook } from "./unstable-agent-babysitter"; export { createPreemptiveCompactionHook } from "./preemptive-compaction"; export { createTasksTodowriteDisablerHook } from "./tasks-todowrite-disabler"; +export { createRuntimeFallbackHook, type RuntimeFallbackHook, type RuntimeFallbackOptions } from "./runtime-fallback"; export { createWriteExistingFileGuardHook } from "./write-existing-file-guard"; export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer"; export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./beast-mode-system"; +export { createHashlineEditDiffEnhancerHook } from "./hashline-edit-diff-enhancer"; diff --git a/src/hooks/runtime-fallback/agent-resolver.ts b/src/hooks/runtime-fallback/agent-resolver.ts new file mode 100644 index 000000000..1310a95bb --- /dev/null +++ b/src/hooks/runtime-fallback/agent-resolver.ts @@ -0,0 +1,54 @@ +import { getSessionAgent } from "../../features/claude-code-session-state" + +export const AGENT_NAMES = [ + "sisyphus", + "oracle", + "librarian", + "explore", + "prometheus", + "atlas", + "metis", + "momus", + "hephaestus", + "sisyphus-junior", + "build", + "plan", + "multimodal-looker", +] + +export const agentPattern = new RegExp( + `\\b(${AGENT_NAMES + .sort((a, b) => b.length - a.length) + .map((a) => a.replace(/-/g, "\\-")) + .join("|")})\\b`, + "i", +) + +export function detectAgentFromSession(sessionID: string): string | undefined { + const match = sessionID.match(agentPattern) + if (match) { + return match[1].toLowerCase() + } + return undefined +} + +export function normalizeAgentName(agent: string | undefined): string | undefined { + if (!agent) return undefined + const normalized = agent.toLowerCase().trim() + if (AGENT_NAMES.includes(normalized)) { + return normalized + } + const match = normalized.match(agentPattern) + if (match) { + return match[1].toLowerCase() + } + return undefined +} + +export function resolveAgentForSession(sessionID: string, eventAgent?: string): string | undefined { + return ( + normalizeAgentName(eventAgent) ?? + normalizeAgentName(getSessionAgent(sessionID)) ?? + detectAgentFromSession(sessionID) + ) +} diff --git a/src/hooks/runtime-fallback/auto-retry.ts b/src/hooks/runtime-fallback/auto-retry.ts new file mode 100644 index 000000000..bcb611ac7 --- /dev/null +++ b/src/hooks/runtime-fallback/auto-retry.ts @@ -0,0 +1,213 @@ +import type { HookDeps } from "./types" +import { HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" +import { normalizeAgentName, resolveAgentForSession } from "./agent-resolver" +import { getSessionAgent } from "../../features/claude-code-session-state" +import { getFallbackModelsForSession } from "./fallback-models" +import { prepareFallback } from "./fallback-state" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" + +const SESSION_TTL_MS = 30 * 60 * 1000 + +export function createAutoRetryHelpers(deps: HookDeps) { + const { ctx, config, options, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult, sessionFallbackTimeouts, pluginConfig } = deps + + const abortSessionRequest = async (sessionID: string, source: string): Promise => { + try { + await ctx.client.session.abort({ path: { id: sessionID } }) + log(`[${HOOK_NAME}] Aborted in-flight session request (${source})`, { sessionID }) + } catch (error) { + log(`[${HOOK_NAME}] Failed to abort in-flight session request (${source})`, { + sessionID, + error: String(error), + }) + } + } + + const clearSessionFallbackTimeout = (sessionID: string) => { + const timer = sessionFallbackTimeouts.get(sessionID) + if (timer) { + clearTimeout(timer) + sessionFallbackTimeouts.delete(sessionID) + } + } + + const scheduleSessionFallbackTimeout = (sessionID: string, resolvedAgent?: string) => { + clearSessionFallbackTimeout(sessionID) + + const timeoutMs = options?.session_timeout_ms ?? config.timeout_seconds * 1000 + if (timeoutMs <= 0) return + + const timer = setTimeout(async () => { + sessionFallbackTimeouts.delete(sessionID) + + const state = sessionStates.get(sessionID) + if (!state) return + + if (sessionRetryInFlight.has(sessionID)) { + log(`[${HOOK_NAME}] Overriding in-flight retry due to session timeout`, { sessionID }) + } + + await abortSessionRequest(sessionID, "session.timeout") + sessionRetryInFlight.delete(sessionID) + + if (state.pendingFallbackModel) { + state.pendingFallbackModel = undefined + } + + const fallbackModels = getFallbackModelsForSession(sessionID, resolvedAgent, pluginConfig) + if (fallbackModels.length === 0) return + + log(`[${HOOK_NAME}] Session fallback timeout reached`, { + sessionID, + timeoutSeconds: config.timeout_seconds, + currentModel: state.currentModel, + }) + + const result = prepareFallback(sessionID, state, fallbackModels, config) + if (result.success && result.newModel) { + await autoRetryWithFallback(sessionID, result.newModel, resolvedAgent, "session.timeout") + } + }, timeoutMs) + + sessionFallbackTimeouts.set(sessionID, timer) + } + + const autoRetryWithFallback = async ( + sessionID: string, + newModel: string, + resolvedAgent: string | undefined, + source: string, + ): Promise => { + if (sessionRetryInFlight.has(sessionID)) { + log(`[${HOOK_NAME}] Retry already in flight, skipping (${source})`, { sessionID }) + return + } + + const modelParts = newModel.split("/") + if (modelParts.length < 2) { + log(`[${HOOK_NAME}] Invalid model format (missing provider prefix): ${newModel}`) + return + } + + const fallbackModelObj = { + providerID: modelParts[0], + modelID: modelParts.slice(1).join("/"), + } + + sessionRetryInFlight.add(sessionID) + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const msgs = (messagesResp as { + data?: Array<{ + info?: Record + parts?: Array<{ type?: string; text?: string }> + }> + }).data + const lastUserMsg = msgs?.filter((m) => m.info?.role === "user").pop() + const lastUserPartsRaw = + lastUserMsg?.parts ?? + (lastUserMsg?.info?.parts as Array<{ type?: string; text?: string }> | undefined) + + if (lastUserPartsRaw && lastUserPartsRaw.length > 0) { + log(`[${HOOK_NAME}] Auto-retrying with fallback model (${source})`, { + sessionID, + model: newModel, + }) + + const retryParts = lastUserPartsRaw + .filter((p) => p.type === "text" && typeof p.text === "string" && p.text.length > 0) + .map((p) => ({ type: "text" as const, text: p.text! })) + + if (retryParts.length > 0) { + const retryAgent = resolvedAgent ?? getSessionAgent(sessionID) + sessionAwaitingFallbackResult.add(sessionID) + scheduleSessionFallbackTimeout(sessionID, retryAgent) + + await ctx.client.session.promptAsync({ + path: { id: sessionID }, + body: { + ...(retryAgent ? { agent: retryAgent } : {}), + model: fallbackModelObj, + parts: retryParts, + }, + query: { directory: ctx.directory }, + }) + } + } else { + log(`[${HOOK_NAME}] No user message found for auto-retry (${source})`, { sessionID }) + } + } catch (retryError) { + log(`[${HOOK_NAME}] Auto-retry failed (${source})`, { sessionID, error: String(retryError) }) + } finally { + const state = sessionStates.get(sessionID) + if (state?.pendingFallbackModel === newModel) { + state.pendingFallbackModel = undefined + } + sessionRetryInFlight.delete(sessionID) + } + } + + const resolveAgentForSessionFromContext = async ( + sessionID: string, + eventAgent?: string, + ): Promise => { + const resolved = resolveAgentForSession(sessionID, eventAgent) + if (resolved) return resolved + + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const msgs = (messagesResp as { data?: Array<{ info?: Record }> }).data + if (!msgs || msgs.length === 0) return undefined + + for (let i = msgs.length - 1; i >= 0; i--) { + const info = msgs[i]?.info + const infoAgent = typeof info?.agent === "string" ? info.agent : undefined + const normalized = normalizeAgentName(infoAgent) + if (normalized) { + return normalized + } + } + } catch { + return undefined + } + + return undefined + } + + const cleanupStaleSessions = () => { + const now = Date.now() + let cleanedCount = 0 + for (const [sessionID, lastAccess] of sessionLastAccess.entries()) { + if (now - lastAccess > SESSION_TTL_MS) { + sessionStates.delete(sessionID) + sessionLastAccess.delete(sessionID) + sessionRetryInFlight.delete(sessionID) + sessionAwaitingFallbackResult.delete(sessionID) + clearSessionFallbackTimeout(sessionID) + SessionCategoryRegistry.remove(sessionID) + cleanedCount++ + } + } + if (cleanedCount > 0) { + log(`[${HOOK_NAME}] Cleaned up ${cleanedCount} stale session states`) + } + } + + return { + abortSessionRequest, + clearSessionFallbackTimeout, + scheduleSessionFallbackTimeout, + autoRetryWithFallback, + resolveAgentForSessionFromContext, + cleanupStaleSessions, + } +} + +export type AutoRetryHelpers = ReturnType diff --git a/src/hooks/runtime-fallback/chat-message-handler.ts b/src/hooks/runtime-fallback/chat-message-handler.ts new file mode 100644 index 000000000..9d400f7d2 --- /dev/null +++ b/src/hooks/runtime-fallback/chat-message-handler.ts @@ -0,0 +1,62 @@ +import type { HookDeps } from "./types" +import { HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" +import { createFallbackState } from "./fallback-state" + +export function createChatMessageHandler(deps: HookDeps) { + const { config, sessionStates, sessionLastAccess } = deps + + return async ( + input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } }, + output: { message: { model?: { providerID: string; modelID: string } }; parts?: Array<{ type: string; text?: string }> } + ) => { + if (!config.enabled) return + + const { sessionID } = input + let state = sessionStates.get(sessionID) + + if (!state) return + + sessionLastAccess.set(sessionID, Date.now()) + + const requestedModel = input.model + ? `${input.model.providerID}/${input.model.modelID}` + : undefined + + if (requestedModel && requestedModel !== state.currentModel) { + if (state.pendingFallbackModel && state.pendingFallbackModel === requestedModel) { + state.pendingFallbackModel = undefined + return + } + + log(`[${HOOK_NAME}] Detected manual model change, resetting fallback state`, { + sessionID, + from: state.currentModel, + to: requestedModel, + }) + state = createFallbackState(requestedModel) + sessionStates.set(sessionID, state) + return + } + + if (state.currentModel === state.originalModel) return + + const activeModel = state.currentModel + + log(`[${HOOK_NAME}] Applying fallback model override`, { + sessionID, + from: input.model, + to: activeModel, + }) + + if (output.message && activeModel) { + const parts = activeModel.split("/") + if (parts.length >= 2) { + output.message.model = { + providerID: parts[0], + modelID: parts.slice(1).join("/"), + } + } + } + } +} diff --git a/src/hooks/runtime-fallback/constants.ts b/src/hooks/runtime-fallback/constants.ts new file mode 100644 index 000000000..b8001b96c --- /dev/null +++ b/src/hooks/runtime-fallback/constants.ts @@ -0,0 +1,44 @@ +/** + * Runtime Fallback Hook - Constants + * + * Default values and configuration constants for the runtime fallback feature. + */ + +import type { RuntimeFallbackConfig } from "../../config" + +/** + * Default configuration values for runtime fallback + */ +export const DEFAULT_CONFIG: Required = { + enabled: true, + retry_on_errors: [400, 429, 503, 529], + max_fallback_attempts: 3, + cooldown_seconds: 60, + timeout_seconds: 30, + notify_on_fallback: true, +} + +/** + * Error patterns that indicate rate limiting or temporary failures + * These are checked in addition to HTTP status codes + */ +export const RETRYABLE_ERROR_PATTERNS = [ + /rate.?limit/i, + /too.?many.?requests/i, + /quota.?exceeded/i, + /usage\s+limit\s+has\s+been\s+reached/i, + /service.?unavailable/i, + /overloaded/i, + /temporarily.?unavailable/i, + /try.?again/i, + /credit.*balance.*too.*low/i, + /insufficient.?(?:credits?|funds?|balance)/i, + /(?:^|\s)429(?:\s|$)/, + /(?:^|\s)503(?:\s|$)/, + /(?:^|\s)529(?:\s|$)/, +] + +/** + * Hook name for identification and logging + */ +export const HOOK_NAME = "runtime-fallback" diff --git a/src/hooks/runtime-fallback/error-classifier.ts b/src/hooks/runtime-fallback/error-classifier.ts new file mode 100644 index 000000000..f35819b76 --- /dev/null +++ b/src/hooks/runtime-fallback/error-classifier.ts @@ -0,0 +1,169 @@ +import { DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS } from "./constants" + +export function getErrorMessage(error: unknown): string { + if (!error) return "" + if (typeof error === "string") return error.toLowerCase() + + const errorObj = error as Record + const paths = [ + errorObj.data, + errorObj.error, + errorObj, + (errorObj.data as Record)?.error, + ] + + for (const obj of paths) { + if (obj && typeof obj === "object") { + const msg = (obj as Record).message + if (typeof msg === "string" && msg.length > 0) { + return msg.toLowerCase() + } + } + } + + try { + return JSON.stringify(error).toLowerCase() + } catch { + return "" + } +} + +export function extractStatusCode(error: unknown, retryOnErrors?: number[]): number | undefined { + if (!error) return undefined + + const errorObj = error as Record + + const statusCode = errorObj.statusCode ?? errorObj.status ?? (errorObj.data as Record)?.statusCode + if (typeof statusCode === "number") { + return statusCode + } + + const codes = retryOnErrors ?? DEFAULT_CONFIG.retry_on_errors + const pattern = new RegExp(`\\b(${codes.join("|")})\\b`) + const message = getErrorMessage(error) + const statusMatch = message.match(pattern) + if (statusMatch) { + return parseInt(statusMatch[1], 10) + } + + return undefined +} + +export function extractErrorName(error: unknown): string | undefined { + if (!error || typeof error !== "object") return undefined + + const errorObj = error as Record + const directName = errorObj.name + if (typeof directName === "string" && directName.length > 0) { + return directName + } + + const nestedError = errorObj.error as Record | undefined + const nestedName = nestedError?.name + if (typeof nestedName === "string" && nestedName.length > 0) { + return nestedName + } + + const dataError = (errorObj.data as Record | undefined)?.error as Record | undefined + const dataErrorName = dataError?.name + if (typeof dataErrorName === "string" && dataErrorName.length > 0) { + return dataErrorName + } + + return undefined +} + +export function classifyErrorType(error: unknown): string | undefined { + const message = getErrorMessage(error) + const errorName = extractErrorName(error)?.toLowerCase() + + if ( + errorName?.includes("loadapi") || + (/api.?key.?is.?missing/i.test(message) && /environment variable/i.test(message)) + ) { + return "missing_api_key" + } + + if (/api.?key/i.test(message) && /must be a string/i.test(message)) { + return "invalid_api_key" + } + + if (errorName?.includes("unknownerror") && /model\s+not\s+found/i.test(message)) { + return "model_not_found" + } + + return undefined +} + +export interface AutoRetrySignal { + signal: string +} + +export const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [ + (combined) => /retrying\s+in/i.test(combined), + (combined) => + /(?:too\s+many\s+requests|quota\s*exceeded|usage\s+limit|rate\s+limit|limit\s+reached)/i.test(combined), +] + +export function extractAutoRetrySignal(info: Record | undefined): AutoRetrySignal | undefined { + if (!info) return undefined + + const candidates: string[] = [] + + const directStatus = info.status + if (typeof directStatus === "string") candidates.push(directStatus) + + const summary = info.summary + if (typeof summary === "string") candidates.push(summary) + + const message = info.message + if (typeof message === "string") candidates.push(message) + + const details = info.details + if (typeof details === "string") candidates.push(details) + + const combined = candidates.join("\n") + if (!combined) return undefined + + const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined)) + if (isAutoRetry) { + return { signal: combined } + } + + return undefined +} + +export function containsErrorContent( + parts: Array<{ type?: string; text?: string }> | undefined +): { hasError: boolean; errorMessage?: string } { + if (!parts || parts.length === 0) return { hasError: false } + + const errorParts = parts.filter((p) => p.type === "error") + if (errorParts.length > 0) { + const errorMessages = errorParts.map((p) => p.text).filter((text): text is string => typeof text === "string") + const errorMessage = errorMessages.length > 0 ? errorMessages.join("\n") : undefined + return { hasError: true, errorMessage } + } + + return { hasError: false } +} + +export function isRetryableError(error: unknown, retryOnErrors: number[]): boolean { + const statusCode = extractStatusCode(error, retryOnErrors) + const message = getErrorMessage(error) + const errorType = classifyErrorType(error) + + if (errorType === "missing_api_key") { + return true + } + + if (errorType === "model_not_found") { + return true + } + + if (statusCode && retryOnErrors.includes(statusCode)) { + return true + } + + return RETRYABLE_ERROR_PATTERNS.some((pattern) => pattern.test(message)) +} diff --git a/src/hooks/runtime-fallback/event-handler.ts b/src/hooks/runtime-fallback/event-handler.ts new file mode 100644 index 000000000..cfaf72e65 --- /dev/null +++ b/src/hooks/runtime-fallback/event-handler.ts @@ -0,0 +1,187 @@ +import type { HookDeps } from "./types" +import type { AutoRetryHelpers } from "./auto-retry" +import { HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" +import { extractStatusCode, extractErrorName, classifyErrorType, isRetryableError } from "./error-classifier" +import { createFallbackState, prepareFallback } from "./fallback-state" +import { getFallbackModelsForSession } from "./fallback-models" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" + +export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) { + const { config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult, sessionFallbackTimeouts } = deps + + const handleSessionCreated = (props: Record | undefined) => { + const sessionInfo = props?.info as { id?: string; model?: string } | undefined + const sessionID = sessionInfo?.id + const model = sessionInfo?.model + + if (sessionID && model) { + log(`[${HOOK_NAME}] Session created with model`, { sessionID, model }) + sessionStates.set(sessionID, createFallbackState(model)) + sessionLastAccess.set(sessionID, Date.now()) + } + } + + const handleSessionDeleted = (props: Record | undefined) => { + const sessionInfo = props?.info as { id?: string } | undefined + const sessionID = sessionInfo?.id + + if (sessionID) { + log(`[${HOOK_NAME}] Cleaning up session state`, { sessionID }) + sessionStates.delete(sessionID) + sessionLastAccess.delete(sessionID) + sessionRetryInFlight.delete(sessionID) + sessionAwaitingFallbackResult.delete(sessionID) + helpers.clearSessionFallbackTimeout(sessionID) + SessionCategoryRegistry.remove(sessionID) + } + } + + const handleSessionStop = async (props: Record | undefined) => { + const sessionID = props?.sessionID as string | undefined + if (!sessionID) return + + helpers.clearSessionFallbackTimeout(sessionID) + + if (sessionRetryInFlight.has(sessionID)) { + await helpers.abortSessionRequest(sessionID, "session.stop") + } + + sessionRetryInFlight.delete(sessionID) + sessionAwaitingFallbackResult.delete(sessionID) + + const state = sessionStates.get(sessionID) + if (state?.pendingFallbackModel) { + state.pendingFallbackModel = undefined + } + + log(`[${HOOK_NAME}] Cleared fallback retry state on session.stop`, { sessionID }) + } + + const handleSessionIdle = (props: Record | undefined) => { + const sessionID = props?.sessionID as string | undefined + if (!sessionID) return + + if (sessionAwaitingFallbackResult.has(sessionID)) { + log(`[${HOOK_NAME}] session.idle while awaiting fallback result; keeping timeout armed`, { sessionID }) + return + } + + const hadTimeout = sessionFallbackTimeouts.has(sessionID) + helpers.clearSessionFallbackTimeout(sessionID) + sessionRetryInFlight.delete(sessionID) + + const state = sessionStates.get(sessionID) + if (state?.pendingFallbackModel) { + state.pendingFallbackModel = undefined + } + + if (hadTimeout) { + log(`[${HOOK_NAME}] Cleared fallback timeout after session completion`, { sessionID }) + } + } + + const handleSessionError = async (props: Record | undefined) => { + const sessionID = props?.sessionID as string | undefined + const error = props?.error + const agent = props?.agent as string | undefined + + if (!sessionID) { + log(`[${HOOK_NAME}] session.error without sessionID, skipping`) + return + } + + const resolvedAgent = await helpers.resolveAgentForSessionFromContext(sessionID, agent) + sessionAwaitingFallbackResult.delete(sessionID) + helpers.clearSessionFallbackTimeout(sessionID) + + log(`[${HOOK_NAME}] session.error received`, { + sessionID, + agent, + resolvedAgent, + statusCode: extractStatusCode(error, config.retry_on_errors), + errorName: extractErrorName(error), + errorType: classifyErrorType(error), + }) + + if (!isRetryableError(error, config.retry_on_errors)) { + log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, { + sessionID, + retryable: false, + statusCode: extractStatusCode(error, config.retry_on_errors), + errorName: extractErrorName(error), + errorType: classifyErrorType(error), + }) + return + } + + let state = sessionStates.get(sessionID) + const fallbackModels = getFallbackModelsForSession(sessionID, resolvedAgent, pluginConfig) + + if (fallbackModels.length === 0) { + log(`[${HOOK_NAME}] No fallback models configured`, { sessionID, agent }) + return + } + + if (!state) { + const currentModel = props?.model as string | undefined + if (currentModel) { + state = createFallbackState(currentModel) + sessionStates.set(sessionID, state) + sessionLastAccess.set(sessionID, Date.now()) + } else { + const detectedAgent = resolvedAgent + const agentConfig = detectedAgent + ? pluginConfig?.agents?.[detectedAgent as keyof typeof pluginConfig.agents] + : undefined + const agentModel = agentConfig?.model as string | undefined + if (agentModel) { + log(`[${HOOK_NAME}] Derived model from agent config`, { sessionID, agent: detectedAgent, model: agentModel }) + state = createFallbackState(agentModel) + sessionStates.set(sessionID, state) + sessionLastAccess.set(sessionID, Date.now()) + } else { + log(`[${HOOK_NAME}] No model info available, cannot fallback`, { sessionID }) + return + } + } + } else { + sessionLastAccess.set(sessionID, Date.now()) + } + + const result = prepareFallback(sessionID, state, fallbackModels, config) + + if (result.success && config.notify_on_fallback) { + await deps.ctx.client.tui + .showToast({ + body: { + title: "Model Fallback", + message: `Switching to ${result.newModel?.split("/").pop() || result.newModel} for next request`, + variant: "warning", + duration: 5000, + }, + }) + .catch(() => {}) + } + + if (result.success && result.newModel) { + await helpers.autoRetryWithFallback(sessionID, result.newModel, resolvedAgent, "session.error") + } + + if (!result.success) { + log(`[${HOOK_NAME}] Fallback preparation failed`, { sessionID, error: result.error }) + } + } + + return async ({ event }: { event: { type: string; properties?: unknown } }) => { + if (!config.enabled) return + + const props = event.properties as Record | undefined + + if (event.type === "session.created") { handleSessionCreated(props); return } + if (event.type === "session.deleted") { handleSessionDeleted(props); return } + if (event.type === "session.stop") { await handleSessionStop(props); return } + if (event.type === "session.idle") { handleSessionIdle(props); return } + if (event.type === "session.error") { await handleSessionError(props); return } + } +} diff --git a/src/hooks/runtime-fallback/fallback-models.ts b/src/hooks/runtime-fallback/fallback-models.ts new file mode 100644 index 000000000..e7af3b40e --- /dev/null +++ b/src/hooks/runtime-fallback/fallback-models.ts @@ -0,0 +1,69 @@ +import type { OhMyOpenCodeConfig } from "../../config" +import { AGENT_NAMES, agentPattern } from "./agent-resolver" +import { HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" +import { normalizeFallbackModels } from "../../shared/model-resolver" + +export function getFallbackModelsForSession( + sessionID: string, + agent: string | undefined, + pluginConfig: OhMyOpenCodeConfig | undefined +): string[] { + if (!pluginConfig) return [] + + const sessionCategory = SessionCategoryRegistry.get(sessionID) + if (sessionCategory && pluginConfig.categories?.[sessionCategory]) { + const categoryConfig = pluginConfig.categories[sessionCategory] + if (categoryConfig?.fallback_models) { + return normalizeFallbackModels(categoryConfig.fallback_models) ?? [] + } + } + + const tryGetFallbackFromAgent = (agentName: string): string[] | undefined => { + const agentConfig = pluginConfig.agents?.[agentName as keyof typeof pluginConfig.agents] + if (!agentConfig) return undefined + + if (agentConfig?.fallback_models) { + return normalizeFallbackModels(agentConfig.fallback_models) + } + + const agentCategory = agentConfig?.category + if (agentCategory && pluginConfig.categories?.[agentCategory]) { + const categoryConfig = pluginConfig.categories[agentCategory] + if (categoryConfig?.fallback_models) { + return normalizeFallbackModels(categoryConfig.fallback_models) + } + } + + return undefined + } + + if (agent) { + const result = tryGetFallbackFromAgent(agent) + if (result) return result + } + + const sessionAgentMatch = sessionID.match(agentPattern) + if (sessionAgentMatch) { + const detectedAgent = sessionAgentMatch[1].toLowerCase() + const result = tryGetFallbackFromAgent(detectedAgent) + if (result) return result + } + + const sisyphusFallback = tryGetFallbackFromAgent("sisyphus") + if (sisyphusFallback) { + log(`[${HOOK_NAME}] Using sisyphus fallback models (no agent detected)`, { sessionID }) + return sisyphusFallback + } + + for (const agentName of AGENT_NAMES) { + const result = tryGetFallbackFromAgent(agentName) + if (result) { + log(`[${HOOK_NAME}] Using ${agentName} fallback models (no agent detected)`, { sessionID }) + return result + } + } + + return [] +} diff --git a/src/hooks/runtime-fallback/fallback-state.ts b/src/hooks/runtime-fallback/fallback-state.ts new file mode 100644 index 000000000..15348a21d --- /dev/null +++ b/src/hooks/runtime-fallback/fallback-state.ts @@ -0,0 +1,74 @@ +import type { FallbackState, FallbackResult } from "./types" +import { HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" +import type { RuntimeFallbackConfig } from "../../config" + +export function createFallbackState(originalModel: string): FallbackState { + return { + originalModel, + currentModel: originalModel, + fallbackIndex: -1, + failedModels: new Map(), + attemptCount: 0, + pendingFallbackModel: undefined, + } +} + +export function isModelInCooldown(model: string, state: FallbackState, cooldownSeconds: number): boolean { + const failedAt = state.failedModels.get(model) + if (failedAt === undefined) return false + const cooldownMs = cooldownSeconds * 1000 + return Date.now() - failedAt < cooldownMs +} + +export function findNextAvailableFallback( + state: FallbackState, + fallbackModels: string[], + cooldownSeconds: number +): string | undefined { + for (let i = state.fallbackIndex + 1; i < fallbackModels.length; i++) { + const candidate = fallbackModels[i] + if (!isModelInCooldown(candidate, state, cooldownSeconds)) { + return candidate + } + log(`[${HOOK_NAME}] Skipping fallback model in cooldown`, { model: candidate, index: i }) + } + return undefined +} + +export function prepareFallback( + sessionID: string, + state: FallbackState, + fallbackModels: string[], + config: Required +): FallbackResult { + if (state.attemptCount >= config.max_fallback_attempts) { + log(`[${HOOK_NAME}] Max fallback attempts reached`, { sessionID, attempts: state.attemptCount }) + return { success: false, error: "Max fallback attempts reached", maxAttemptsReached: true } + } + + const nextModel = findNextAvailableFallback(state, fallbackModels, config.cooldown_seconds) + + if (!nextModel) { + log(`[${HOOK_NAME}] No available fallback models`, { sessionID }) + return { success: false, error: "No available fallback models (all in cooldown or exhausted)" } + } + + log(`[${HOOK_NAME}] Preparing fallback`, { + sessionID, + from: state.currentModel, + to: nextModel, + attempt: state.attemptCount + 1, + }) + + const failedModel = state.currentModel + const now = Date.now() + + state.fallbackIndex = fallbackModels.indexOf(nextModel) + state.failedModels.set(failedModel, now) + state.attemptCount++ + state.currentModel = nextModel + state.pendingFallbackModel = nextModel + + return { success: true, newModel: nextModel } +} diff --git a/src/hooks/runtime-fallback/hook.ts b/src/hooks/runtime-fallback/hook.ts new file mode 100644 index 000000000..b37887990 --- /dev/null +++ b/src/hooks/runtime-fallback/hook.ts @@ -0,0 +1,67 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import type { HookDeps, RuntimeFallbackHook, RuntimeFallbackOptions } from "./types" +import { DEFAULT_CONFIG, HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" +import { loadPluginConfig } from "../../plugin-config" +import { createAutoRetryHelpers } from "./auto-retry" +import { createEventHandler } from "./event-handler" +import { createMessageUpdateHandler } from "./message-update-handler" +import { createChatMessageHandler } from "./chat-message-handler" + +export function createRuntimeFallbackHook( + ctx: PluginInput, + options?: RuntimeFallbackOptions +): RuntimeFallbackHook { + const config = { + enabled: options?.config?.enabled ?? DEFAULT_CONFIG.enabled, + retry_on_errors: options?.config?.retry_on_errors ?? DEFAULT_CONFIG.retry_on_errors, + max_fallback_attempts: options?.config?.max_fallback_attempts ?? DEFAULT_CONFIG.max_fallback_attempts, + cooldown_seconds: options?.config?.cooldown_seconds ?? DEFAULT_CONFIG.cooldown_seconds, + timeout_seconds: options?.config?.timeout_seconds ?? DEFAULT_CONFIG.timeout_seconds, + notify_on_fallback: options?.config?.notify_on_fallback ?? DEFAULT_CONFIG.notify_on_fallback, + } + + let pluginConfig = options?.pluginConfig + if (!pluginConfig) { + try { + pluginConfig = loadPluginConfig(ctx.directory, ctx) + } catch { + log(`[${HOOK_NAME}] Plugin config not available`) + } + } + + const deps: HookDeps = { + ctx, + config, + options, + pluginConfig, + sessionStates: new Map(), + sessionLastAccess: new Map(), + sessionRetryInFlight: new Set(), + sessionAwaitingFallbackResult: new Set(), + sessionFallbackTimeouts: new Map(), + } + + const helpers = createAutoRetryHelpers(deps) + const baseEventHandler = createEventHandler(deps, helpers) + const messageUpdateHandler = createMessageUpdateHandler(deps, helpers) + const chatMessageHandler = createChatMessageHandler(deps) + + const cleanupInterval = setInterval(helpers.cleanupStaleSessions, 5 * 60 * 1000) + cleanupInterval.unref() + + const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => { + if (event.type === "message.updated") { + if (!config.enabled) return + const props = event.properties as Record | undefined + await messageUpdateHandler(props) + return + } + await baseEventHandler({ event }) + } + + return { + event: eventHandler, + "chat.message": chatMessageHandler, + } as RuntimeFallbackHook +} diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts new file mode 100644 index 000000000..2e394db6d --- /dev/null +++ b/src/hooks/runtime-fallback/index.test.ts @@ -0,0 +1,2086 @@ +import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test" +import { createRuntimeFallbackHook, type RuntimeFallbackHook } from "./index" +import type { RuntimeFallbackConfig, OhMyOpenCodeConfig } from "../../config" +import * as sharedModule from "../../shared" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" + +describe("runtime-fallback", () => { + let logCalls: Array<{ msg: string; data?: unknown }> + let logSpy: ReturnType + let toastCalls: Array<{ title: string; message: string; variant: string }> + + beforeEach(() => { + logCalls = [] + toastCalls = [] + SessionCategoryRegistry.clear() + logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => { + logCalls.push({ msg, data }) + }) + }) + + afterEach(() => { + SessionCategoryRegistry.clear() + logSpy?.mockRestore() + }) + + function createMockPluginInput(overrides?: { + session?: { + messages?: (args: unknown) => Promise + promptAsync?: (args: unknown) => Promise + abort?: (args: unknown) => Promise + } + }) { + return { + client: { + tui: { + showToast: async (opts: { body: { title: string; message: string; variant: string; duration: number } }) => { + toastCalls.push({ + title: opts.body.title, + message: opts.body.message, + variant: opts.body.variant, + }) + }, + }, + session: { + messages: overrides?.session?.messages ?? (async () => ({ data: [] })), + promptAsync: overrides?.session?.promptAsync ?? (async () => ({})), + abort: overrides?.session?.abort ?? (async () => ({})), + }, + }, + directory: "/test/dir", + } as any + } + + function createMockConfig(overrides?: Partial): RuntimeFallbackConfig { + return { + enabled: true, + retry_on_errors: [429, 503, 529], + max_fallback_attempts: 3, + cooldown_seconds: 60, + notify_on_fallback: true, + ...overrides, + } + } + + function createMockPluginConfigWithCategoryFallback(fallbackModels: string[]): OhMyOpenCodeConfig { + return { + categories: { + test: { + fallback_models: fallbackModels, + }, + }, + } + } + + describe("session.error handling", () => { + test("should detect retryable error with status code 429", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-123" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429, message: "Rate limit exceeded" } }, + }, + }) + + const fallbackLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ sessionID, statusCode: 429 }) + }) + + test("should detect retryable error with status code 503", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-503" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "openai/gpt-5.2" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 503, message: "Service unavailable" } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should detect retryable error with status code 529", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-529" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-3-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 529, message: "Overloaded" } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should skip non-retryable errors", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-400" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 400, message: "Bad request" } }, + }, + }) + + const skipLog = logCalls.find((c) => c.msg.includes("Error not retryable")) + expect(skipLog).toBeDefined() + }) + + test("should log missing API key errors with classification details", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-missing-api-key" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "AI_LoadAPIKeyError", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }) + + const sessionErrorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(sessionErrorLog).toBeDefined() + expect(sessionErrorLog?.data).toMatchObject({ + sessionID, + errorName: "AI_LoadAPIKeyError", + errorType: "missing_api_key", + }) + + const skipLog = logCalls.find((c) => c.msg.includes("Error not retryable")) + expect(skipLog).toBeUndefined() + }) + + test("should trigger fallback for missing API key errors when fallback models are configured", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + }) + const sessionID = "test-session-missing-api-key-fallback" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "AI_LoadAPIKeyError", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }) + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "google/gemini-2.5-pro", to: "openai/gpt-5.2" }) + }) + + test("should detect retryable error from message pattern 'rate limit'", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-pattern" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { message: "You have hit the rate limit" } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should continue fallback chain when fallback model is not found", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "anthropic/claude-opus-4.6", + "openai/gpt-5.2", + ]), + }) + const sessionID = "test-session-model-not-found" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { name: "UnknownError", data: { message: "Model not found: anthropic/claude-opus-4.6." } }, + }, + }, + }) + + const fallbackLogs = logCalls.filter((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLogs.length).toBeGreaterThanOrEqual(2) + expect(fallbackLogs[1]?.data).toMatchObject({ from: "anthropic/claude-opus-4.6", to: "openai/gpt-5.2" }) + + const nonRetryLog = logCalls.find( + (c) => c.msg.includes("Error not retryable") && (c.data as { sessionID?: string } | undefined)?.sessionID === sessionID + ) + expect(nonRetryLog).toBeUndefined() + }) + + test("should trigger fallback on Copilot auto-retry signal in message.updated", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + }) + + const sessionID = "test-session-copilot-auto-retry" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "github-copilot/claude-opus-4.6" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "github-copilot/claude-opus-4.6", + status: + "Too Many Requests: quota exceeded [retrying in ~2 weeks attempt #1]", + }, + }, + }, + }) + + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) + expect(signalLog).toBeDefined() + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "github-copilot/claude-opus-4.6", to: "openai/gpt-5.2" }) + }) + + test("should trigger fallback on OpenAI auto-retry signal in message.updated", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]), + }) + + const sessionID = "test-session-openai-auto-retry" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "openai/gpt-5.3-codex", + status: "The usage limit has been reached [retrying in 27s attempt #6]", + }, + }, + }, + }) + + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) + expect(signalLog).toBeDefined() + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" }) + }) + + test("should NOT trigger fallback on auto-retry signal when timeout_seconds is 0", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 0 }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]), + }) + + const sessionID = "test-session-auto-retry-timeout-disabled" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "openai/gpt-5.3-codex", + status: "The usage limit has been reached [retrying in 27s attempt #6]", + }, + }, + }, + }) + + // Should NOT detect provider auto-retry signal when timeout is disabled + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) + expect(signalLog).toBeUndefined() + + // Should NOT trigger fallback + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeUndefined() + }) + + test("should log when no fallback models configured", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig(), + pluginConfig: {}, + }) + const sessionID = "test-session-no-fallbacks" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429, message: "Rate limit" } }, + }, + }) + + const noFallbackLog = logCalls.find((c) => c.msg.includes("No fallback models configured")) + expect(noFallbackLog).toBeDefined() + }) + }) + + describe("disabled hook", () => { + test("should not process events when disabled", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ enabled: false }), + }) + const sessionID = "test-session-disabled" + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429 } }, + }, + }) + + const sessionErrorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(sessionErrorLog).toBeUndefined() + }) + }) + + describe("session lifecycle", () => { + test("should create state on session.created", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-create" + const model = "anthropic/claude-opus-4-5" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model } }, + }, + }) + + const createLog = logCalls.find((c) => c.msg.includes("Session created with model")) + expect(createLog).toBeDefined() + expect(createLog?.data).toMatchObject({ sessionID, model }) + }) + + test("should cleanup state on session.deleted", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-delete" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.deleted", + properties: { info: { id: sessionID } }, + }, + }) + + const deleteLog = logCalls.find((c) => c.msg.includes("Cleaning up session state")) + expect(deleteLog).toBeDefined() + expect(deleteLog?.data).toMatchObject({ sessionID }) + }) + + test("should handle session.error without prior session.created", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-no-create" + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { statusCode: 429 }, + model: "anthropic/claude-opus-4-5", + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + }) + + describe("error code extraction", () => { + test("should extract status code from error object", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-extract-status" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { statusCode: 429, message: "Rate limit" }, + }, + }, + }) + + const statusLog = logCalls.find((c) => c.data && typeof c.data === "object" && "statusCode" in c.data) + expect(statusLog?.data).toMatchObject({ statusCode: 429 }) + }) + + test("should extract status code from nested error.data", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-nested-status" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { data: { statusCode: 503, message: "Service unavailable" } }, + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + }) + + describe("custom error codes", () => { + test("should support custom retry_on_errors configuration", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ retry_on_errors: [500, 502] }), + }) + const sessionID = "test-session-custom" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 500 } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + }) + + describe("message.updated handling", () => { + test("should handle assistant message errors", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-message-updated" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + error: { statusCode: 429, message: "Rate limit" }, + model: "anthropic/claude-opus-4-5", + }, + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("message.updated with assistant error")) + expect(errorLog).toBeDefined() + }) + + test("should skip non-assistant message errors", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-message-user" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "user", + error: { statusCode: 429 }, + model: "anthropic/claude-opus-4-5", + }, + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("message.updated with assistant error")) + expect(errorLog).toBeUndefined() + }) + + test("should trigger fallback when message.updated has missing API key error without model", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + }) + const sessionID = "test-message-updated-missing-model" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + error: { + name: "AI_LoadAPIKeyError", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "google/gemini-2.5-pro", to: "openai/gpt-5.2" }) + }) + + test("should not advance fallback state from message.updated while retry is already in flight", async () => { + const pending = new Promise(() => {}) + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "hello" }] }], + }), + promptAsync: async () => pending, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "anthropic/claude-opus-4-6", + "openai/gpt-5.2", + ]), + } + ) + + const sessionID = "test-message-updated-inflight-race" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + const sessionErrorPromise = hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + model: "github-copilot/claude-opus-4.6", + }, + }, + }, + }) + + const fallbackLogs = logCalls.filter((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLogs).toHaveLength(1) + + void sessionErrorPromise + }) + + test("should force advance fallback from message.updated when Copilot auto-retry signal appears during in-flight retry", async () => { + const retriedModels: string[] = [] + const pending = new Promise(() => {}) + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "hello" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + + if (retriedModels.length === 1) { + await pending + } + + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "anthropic/claude-opus-4-6", + "openai/gpt-5.2", + ]), + } + ) + + const sessionID = "test-message-updated-inflight-retry-signal" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + const sessionErrorPromise = hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "github-copilot/claude-opus-4.6", + status: + "Too Many Requests: quota exceeded [retrying in ~2 weeks attempt #1]", + }, + }, + }, + }) + + expect(retriedModels.length).toBeGreaterThanOrEqual(2) + expect(retriedModels[0]).toBe("github-copilot/claude-opus-4.6") + expect(retriedModels[1]).toBe("anthropic/claude-opus-4-6") + + void sessionErrorPromise + }) + + test("should advance fallback after session timeout when Copilot retry emits no retryable events", async () => { + const retriedModels: string[] = [] + const abortCalls: Array<{ path?: { id?: string } }> = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "hello" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + abort: async (args: unknown) => { + abortCalls.push(args as { path?: { id?: string } }) + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "anthropic/claude-opus-4-6", + "openai/gpt-5.2", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-timeout-watchdog" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 50)) + + expect(retriedModels).toContain("github-copilot/claude-opus-4.6") + expect(retriedModels).toContain("anthropic/claude-opus-4-6") + expect(abortCalls.some((call) => call.path?.id === sessionID)).toBe(true) + + const timeoutLog = logCalls.find((c) => c.msg.includes("Session fallback timeout reached")) + expect(timeoutLog).toBeDefined() + }) + + test("should keep session timeout active after chat.message model override", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "hello" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "anthropic/claude-opus-4-6", + "openai/gpt-5.2", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-timeout-after-chat-message" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + const output: { message: { model?: { providerID: string; modelID: string } }; parts: Array<{ type: string; text?: string }> } = { + message: {}, + parts: [], + } + await hook["chat.message"]?.( + { + sessionID, + model: { providerID: "github-copilot", modelID: "claude-opus-4.6" }, + }, + output + ) + + await new Promise((resolve) => setTimeout(resolve, 50)) + + expect(retriedModels).toContain("github-copilot/claude-opus-4.6") + expect(retriedModels).toContain("anthropic/claude-opus-4-6") + }) + + test("should abort in-flight fallback request before advancing on timeout", async () => { + const retriedModels: string[] = [] + const abortCalls: Array<{ path?: { id?: string } }> = [] + const never = new Promise(() => {}) + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "hello" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + + if (retriedModels.length === 1) { + await never + } + + return {} + }, + abort: async (args: unknown) => { + abortCalls.push(args as { path?: { id?: string } }) + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "anthropic/claude-opus-4-6", + "openai/gpt-5.2", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-timeout-abort-inflight" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + const sessionErrorPromise = hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 50)) + + expect(abortCalls.some((call) => call.path?.id === sessionID)).toBe(true) + expect(retriedModels).toContain("github-copilot/claude-opus-4.6") + expect(retriedModels).toContain("anthropic/claude-opus-4-6") + + void sessionErrorPromise + }) + + test("should not advance fallback after session.stop cancels timeout-driven retry", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "hello" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "anthropic/claude-opus-4-6", + "openai/gpt-5.2", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-stop-cancels-timeout-fallback" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toContain("github-copilot/claude-opus-4.6") + + await hook.event({ + event: { + type: "session.stop", + properties: { sessionID }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 50)) + + expect(retriedModels).toHaveLength(1) + }) + + test("should not trigger second fallback after successful assistant reply", async () => { + const retriedModels: string[] = [] + const mockMessages = [ + { info: { role: "user" }, parts: [{ type: "text", text: "test" }] }, + ] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: mockMessages, + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "openai/gpt-5.3-codex", + "anthropic/claude-opus-4-6", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-success-clears-timeout" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toEqual(["github-copilot/claude-opus-4.6"]) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "openai/gpt-5.3-codex", + }, + }, + }, + }) + + mockMessages.push({ + info: { role: "assistant" }, + parts: [{ type: "text", text: "Got it - I'm here." }], + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "openai/gpt-5.3-codex", + message: "Got it - I'm here.", + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 50)) + + expect(retriedModels).toEqual(["github-copilot/claude-opus-4.6"]) + }) + + test("should not clear fallback timeout on assistant non-error update with Copilot retry signal", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "openai/gpt-5.3-codex", + "anthropic/claude-opus-4-6", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-copilot-retry-signal-no-error" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toEqual(["github-copilot/claude-opus-4.6"]) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + status: "Too Many Requests: quota exceeded [retrying in ~2 weeks attempt #1]", + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 60)) + + expect(retriedModels).toContain("openai/gpt-5.3-codex") + }) + + test("should not clear fallback timeout on assistant non-error update with OpenAI retry signal", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "openai/gpt-5.3-codex", + "anthropic/claude-opus-4-6", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-openai-retry-signal-no-error" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toEqual(["openai/gpt-5.3-codex"]) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + status: "The usage limit has been reached [retrying in 27s attempt #6]", + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 60)) + + expect(retriedModels).toContain("anthropic/claude-opus-4-6") + }) + + test("should not clear fallback timeout on assistant non-error update without user-visible content", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "openai/gpt-5.3-codex", + "anthropic/claude-opus-4-6", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-no-content-non-error-update" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toEqual(["github-copilot/claude-opus-4.6"]) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "github-copilot/claude-opus-4.6", + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 60)) + + expect(retriedModels).toContain("openai/gpt-5.3-codex") + }) + + test("should not clear fallback timeout from info.message alone without persisted assistant text", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "openai/gpt-5.3-codex", + "anthropic/claude-opus-4-6", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-info-message-without-persisted-text" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toEqual(["github-copilot/claude-opus-4.6"]) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + message: "Thinking: retrying provider request...", + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 60)) + + expect(retriedModels).toContain("openai/gpt-5.3-codex") + }) + + test("should keep timeout armed when session.idle fires before fallback result", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "github-copilot/claude-opus-4.6", + "openai/gpt-5.3-codex", + "anthropic/claude-opus-4-6", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-idle-before-fallback-result" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toEqual(["github-copilot/claude-opus-4.6"]) + + await hook.event({ + event: { + type: "session.idle", + properties: { sessionID }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 60)) + + expect(retriedModels).toContain("openai/gpt-5.3-codex") + }) + + test("triggers fallback when message contains type:error parts (e.g. Minimax insufficient balance)", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + } + ) + + const sessionID = "test-session-error-content" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "minimax/minimax-text-01" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "minimax/minimax-text-01", + }, + parts: [{ type: "error", text: "Upstream error from Minimax: insufficient balance (1008)" }], + }, + }, + }) + + expect(retriedModels).toContain("openai/gpt-5.2") + }) + + test("triggers fallback when message has mixed text and error parts", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]), + } + ) + + const sessionID = "test-session-mixed-content" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "google/gemini-2.5-pro", + }, + parts: [ + { type: "text", text: "Hello" }, + { type: "error", text: "Rate limit exceeded" }, + ], + }, + }, + }) + + expect(retriedModels).toContain("anthropic/claude-opus-4-6") + }) + + test("does NOT trigger fallback for normal type:error-free messages", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [ + { info: { role: "user" }, parts: [{ type: "text", text: "test" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Normal response" }] }, + ], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + } + ) + + const sessionID = "test-session-normal-content" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "anthropic/claude-opus-4-5", + }, + parts: [{ type: "text", text: "Normal response" }], + }, + }, + }) + + expect(retriedModels).toHaveLength(0) + }) + }) + + describe("edge cases", () => { + test("should handle session.error without sessionID", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + + await hook.event({ + event: { + type: "session.error", + properties: { error: { statusCode: 429 } }, + }, + }) + + const skipLog = logCalls.find((c) => c.msg.includes("session.error without sessionID")) + expect(skipLog).toBeDefined() + }) + + test("should handle error as string", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-error-string" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: "rate limit exceeded" }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should handle null error", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-error-null" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: null }, + }, + }) + + const skipLog = logCalls.find((c) => c.msg.includes("Error not retryable")) + expect(skipLog).toBeDefined() + }) + }) + + describe("model switching via chat.message", () => { + test("should apply fallback model on next chat.message after error", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3-pro"]), + }) + const sessionID = "test-session-switch" + SessionCategoryRegistry.register(sessionID, "test") + + //#given + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + //#when + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429, message: "Rate limit" } }, + }, + }) + + const output: { message: { model?: { providerID: string; modelID: string } }; parts: Array<{ type: string; text?: string }> } = { + message: {}, + parts: [], + } + await hook["chat.message"]?.( + { sessionID }, + output + ) + + expect(output.message.model).toEqual({ providerID: "openai", modelID: "gpt-5.2" }) + }) + + test("should notify when fallback occurs", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: true }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + }) + const sessionID = "test-session-notify" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429 } }, + }, + }) + + expect(toastCalls.length).toBe(1) + expect(toastCalls[0]?.message.includes("gpt-5.2")).toBe(true) + }) + }) + + describe("fallback models configuration", () => { + function createMockPluginConfigWithAgentFallback(agentName: string, fallbackModels: string[]): OhMyOpenCodeConfig { + return { + agents: { + [agentName]: { + fallback_models: fallbackModels, + }, + }, + } + } + + test("should use agent-level fallback_models", async () => { + const input = createMockPluginInput() + const hook = createRuntimeFallbackHook(input, { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3-pro"]), + }) + const sessionID = "test-agent-fallback" + + //#given - agent with custom fallback models + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5", agent: "oracle" } }, + }, + }) + + //#when - error occurs + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 503 }, agent: "oracle" }, + }, + }) + + //#then - should prepare fallback to openai/gpt-5.2 + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "anthropic/claude-opus-4-5", to: "openai/gpt-5.2" }) + }) + + test("should detect agent from sessionID pattern", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithAgentFallback("sisyphus", ["openai/gpt-5.2"]), + }) + const sessionID = "sisyphus-session-123" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429 } }, + }, + }) + + //#then - should detect sisyphus from sessionID and use its fallback + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ to: "openai/gpt-5.2" }) + }) + + test("should preserve resolved agent during auto-retry", async () => { + const promptCalls: Array> = [] + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [ + { + info: { role: "user" }, + parts: [{ type: "text", text: "test" }], + }, + ], + }), + promptAsync: async (args: unknown) => { + promptCalls.push(args as Record) + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithAgentFallback("prometheus", ["github-copilot/claude-opus-4.6"]), + }, + ) + const sessionID = "test-preserve-agent-on-retry" + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + model: "anthropic/claude-opus-4-6", + error: { statusCode: 503, message: "Service unavailable" }, + agent: "prometheus", + }, + }, + }) + + expect(promptCalls.length).toBe(1) + const callBody = promptCalls[0]?.body as Record + expect(callBody?.agent).toBe("prometheus") + expect(callBody?.model).toEqual({ providerID: "github-copilot", modelID: "claude-opus-4.6" }) + }) + }) + + describe("cooldown mechanism", () => { + test("should respect cooldown period before retrying failed model", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ cooldown_seconds: 60, notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "openai/gpt-5.2", + "anthropic/claude-opus-4-5", + ]), + }) + const sessionID = "test-session-cooldown" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + //#when - first error occurs, switches to openai + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429 } }, + }, + }) + + //#when - second error occurs immediately; tries to switch back to original model but should be in cooldown + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429 } }, + }, + }) + + const cooldownSkipLog = logCalls.find((c) => c.msg.includes("Skipping fallback model in cooldown")) + expect(cooldownSkipLog).toBeDefined() + }) + }) + + describe("max attempts limit", () => { + test("should stop after max_fallback_attempts", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ max_fallback_attempts: 2 }), + }) + const sessionID = "test-session-max" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + //#when - multiple errors occur exceeding max attempts + for (let i = 0; i < 5; i++) { + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429 } }, + }, + }) + } + + //#then - should have stopped after max attempts + const maxLog = logCalls.find((c) => c.msg.includes("Max fallback attempts reached") || c.msg.includes("No fallback models")) + expect(maxLog).toBeDefined() + }) + }) +}) diff --git a/src/hooks/runtime-fallback/index.ts b/src/hooks/runtime-fallback/index.ts new file mode 100644 index 000000000..b801ef227 --- /dev/null +++ b/src/hooks/runtime-fallback/index.ts @@ -0,0 +1,2 @@ +export { createRuntimeFallbackHook } from "./hook" +export type { RuntimeFallbackHook, RuntimeFallbackOptions } from "./types" diff --git a/src/hooks/runtime-fallback/message-update-handler.ts b/src/hooks/runtime-fallback/message-update-handler.ts new file mode 100644 index 000000000..7e6130955 --- /dev/null +++ b/src/hooks/runtime-fallback/message-update-handler.ts @@ -0,0 +1,216 @@ +import type { HookDeps } from "./types" +import type { AutoRetryHelpers } from "./auto-retry" +import { HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" +import { extractStatusCode, extractErrorName, classifyErrorType, isRetryableError, extractAutoRetrySignal, containsErrorContent } from "./error-classifier" +import { createFallbackState, prepareFallback } from "./fallback-state" +import { getFallbackModelsForSession } from "./fallback-models" + +export function hasVisibleAssistantResponse(extractAutoRetrySignalFn: typeof extractAutoRetrySignal) { + return async ( + ctx: HookDeps["ctx"], + sessionID: string, + _info: Record | undefined, + ): Promise => { + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + + const msgs = (messagesResp as { + data?: Array<{ + info?: Record + parts?: Array<{ type?: string; text?: string }> + }> + }).data + + if (!msgs || msgs.length === 0) return false + + const lastAssistant = [...msgs].reverse().find((m) => m.info?.role === "assistant") + if (!lastAssistant) return false + if (lastAssistant.info?.error) return false + + const parts = lastAssistant.parts ?? + (lastAssistant.info?.parts as Array<{ type?: string; text?: string }> | undefined) + + const textFromParts = (parts ?? []) + .filter((p) => p.type === "text" && typeof p.text === "string") + .map((p) => p.text!.trim()) + .filter((text) => text.length > 0) + .join("\n") + + if (!textFromParts) return false + if (extractAutoRetrySignalFn({ message: textFromParts })) return false + + return true + } catch { + return false + } + } +} + +export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHelpers) { + const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult } = deps + const checkVisibleResponse = hasVisibleAssistantResponse(extractAutoRetrySignal) + + return async (props: Record | undefined) => { + const info = props?.info as Record | undefined + const sessionID = info?.sessionID as string | undefined + const retrySignalResult = extractAutoRetrySignal(info) + const retrySignal = retrySignalResult?.signal + const timeoutEnabled = config.timeout_seconds > 0 + const parts = props?.parts as Array<{ type?: string; text?: string }> | undefined + const errorContentResult = containsErrorContent(parts) + const error = info?.error ?? + (retrySignal && timeoutEnabled ? { name: "ProviderRateLimitError", message: retrySignal } : undefined) ?? + (errorContentResult.hasError ? { name: "MessageContentError", message: errorContentResult.errorMessage || "Message contains error content" } : undefined) + const role = info?.role as string | undefined + const model = info?.model as string | undefined + + if (sessionID && role === "assistant" && !error) { + if (!sessionAwaitingFallbackResult.has(sessionID)) { + return + } + + const hasVisible = await checkVisibleResponse(ctx, sessionID, info) + if (!hasVisible) { + log(`[${HOOK_NAME}] Assistant update observed without visible final response; keeping fallback timeout`, { + sessionID, + model, + }) + return + } + + sessionAwaitingFallbackResult.delete(sessionID) + helpers.clearSessionFallbackTimeout(sessionID) + const state = sessionStates.get(sessionID) + if (state?.pendingFallbackModel) { + state.pendingFallbackModel = undefined + } + log(`[${HOOK_NAME}] Assistant response observed; cleared fallback timeout`, { sessionID, model }) + return + } + + if (sessionID && role === "assistant" && error) { + sessionAwaitingFallbackResult.delete(sessionID) + if (sessionRetryInFlight.has(sessionID) && !retrySignal) { + log(`[${HOOK_NAME}] message.updated fallback skipped (retry in flight)`, { sessionID }) + return + } + + if (retrySignal && sessionRetryInFlight.has(sessionID) && timeoutEnabled) { + log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, { + sessionID, + model, + }) + await helpers.abortSessionRequest(sessionID, "message.updated.retry-signal") + sessionRetryInFlight.delete(sessionID) + } + + if (retrySignal && timeoutEnabled) { + log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model }) + } + + if (!retrySignal) { + helpers.clearSessionFallbackTimeout(sessionID) + } + + log(`[${HOOK_NAME}] message.updated with assistant error`, { + sessionID, + model, + statusCode: extractStatusCode(error, config.retry_on_errors), + errorName: extractErrorName(error), + errorType: classifyErrorType(error), + }) + + if (!isRetryableError(error, config.retry_on_errors)) { + log(`[${HOOK_NAME}] message.updated error not retryable, skipping fallback`, { + sessionID, + statusCode: extractStatusCode(error, config.retry_on_errors), + errorName: extractErrorName(error), + errorType: classifyErrorType(error), + }) + return + } + + let state = sessionStates.get(sessionID) + const agent = info?.agent as string | undefined + const resolvedAgent = await helpers.resolveAgentForSessionFromContext(sessionID, agent) + const fallbackModels = getFallbackModelsForSession(sessionID, resolvedAgent, pluginConfig) + + if (fallbackModels.length === 0) { + return + } + + if (!state) { + let initialModel = model + if (!initialModel) { + const detectedAgent = resolvedAgent + const agentConfig = detectedAgent + ? pluginConfig?.agents?.[detectedAgent as keyof typeof pluginConfig.agents] + : undefined + const agentModel = agentConfig?.model as string | undefined + if (agentModel) { + log(`[${HOOK_NAME}] Derived model from agent config for message.updated`, { + sessionID, + agent: detectedAgent, + model: agentModel, + }) + initialModel = agentModel + } + } + + if (!initialModel) { + log(`[${HOOK_NAME}] message.updated missing model info, cannot fallback`, { + sessionID, + errorName: extractErrorName(error), + errorType: classifyErrorType(error), + }) + return + } + + state = createFallbackState(initialModel) + sessionStates.set(sessionID, state) + sessionLastAccess.set(sessionID, Date.now()) + } else { + sessionLastAccess.set(sessionID, Date.now()) + + if (state.pendingFallbackModel) { + if (retrySignal && timeoutEnabled) { + log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, { + sessionID, + pendingFallbackModel: state.pendingFallbackModel, + }) + state.pendingFallbackModel = undefined + } else { + log(`[${HOOK_NAME}] message.updated fallback skipped (pending fallback in progress)`, { + sessionID, + pendingFallbackModel: state.pendingFallbackModel, + }) + return + } + } + } + + const result = prepareFallback(sessionID, state, fallbackModels, config) + + if (result.success && config.notify_on_fallback) { + await deps.ctx.client.tui + .showToast({ + body: { + title: "Model Fallback", + message: `Switching to ${result.newModel?.split("/").pop() || result.newModel} for next request`, + variant: "warning", + duration: 5000, + }, + }) + .catch(() => {}) + } + + if (result.success && result.newModel) { + await helpers.autoRetryWithFallback(sessionID, result.newModel, resolvedAgent, "message.updated") + } + } + } +} diff --git a/src/hooks/runtime-fallback/types.ts b/src/hooks/runtime-fallback/types.ts new file mode 100644 index 000000000..500715b9e --- /dev/null +++ b/src/hooks/runtime-fallback/types.ts @@ -0,0 +1,41 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import type { RuntimeFallbackConfig, OhMyOpenCodeConfig } from "../../config" + +export interface FallbackState { + originalModel: string + currentModel: string + fallbackIndex: number + failedModels: Map + attemptCount: number + pendingFallbackModel?: string +} + +export interface FallbackResult { + success: boolean + newModel?: string + error?: string + maxAttemptsReached?: boolean +} + +export interface RuntimeFallbackOptions { + config?: RuntimeFallbackConfig + pluginConfig?: OhMyOpenCodeConfig + session_timeout_ms?: number +} + +export interface RuntimeFallbackHook { + event: (input: { event: { type: string; properties?: unknown } }) => Promise + "chat.message"?: (input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } }, output: { message: { model?: { providerID: string; modelID: string } }; parts?: Array<{ type: string; text?: string }> }) => Promise +} + +export interface HookDeps { + ctx: PluginInput + config: Required + options: RuntimeFallbackOptions | undefined + pluginConfig: OhMyOpenCodeConfig | undefined + sessionStates: Map + sessionLastAccess: Map + sessionRetryInFlight: Set + sessionAwaitingFallbackResult: Set + sessionFallbackTimeouts: Map> +} diff --git a/src/plugin/chat-message.ts b/src/plugin/chat-message.ts index 027bc8d2f..faeb65c10 100644 --- a/src/plugin/chat-message.ts +++ b/src/plugin/chat-message.ts @@ -76,6 +76,7 @@ export function createChatMessageHandler(args: { setSessionModel(input.sessionID, input.model) } await hooks.stopContinuationGuard?.["chat.message"]?.(input) + await hooks.runtimeFallback?.["chat.message"]?.(input, output) await hooks.keywordDetector?.["chat.message"]?.(input, output) await hooks.claudeCodeHooks?.["chat.message"]?.(input, output) await hooks.autoSlashCommand?.["chat.message"]?.(input, output) diff --git a/src/plugin/event.ts b/src/plugin/event.ts index 24f6650e5..20657ea95 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -93,14 +93,16 @@ function extractProviderModelFromErrorMessage( return {} } - +type EventInput = Parameters< + NonNullable["event"]> +>[0] export function createEventHandler(args: { ctx: PluginContext pluginConfig: OhMyOpenCodeConfig firstMessageVariantGate: FirstMessageVariantGate managers: Managers hooks: CreatedHooks -}): (input: { event: { type: string; properties?: Record } }) => Promise { +}): (input: EventInput) => Promise { const { ctx, firstMessageVariantGate, managers, hooks } = args // Avoid triggering multiple abort+continue cycles for the same failing assistant message. @@ -109,6 +111,8 @@ export function createEventHandler(args: { const lastKnownModelBySession = new Map() const dispatchToHooks = async (input: { event: { type: string; properties?: Record } }): Promise => { + +const dispatchToHooks = async (input: EventInput): Promise => { await Promise.resolve(hooks.autoUpdateChecker?.event?.(input)) await Promise.resolve(hooks.claudeCodeHooks?.event?.(input)) await Promise.resolve(hooks.backgroundNotificationHook?.event?.(input)) @@ -121,9 +125,10 @@ export function createEventHandler(args: { await Promise.resolve(hooks.rulesInjector?.event?.(input)) await Promise.resolve(hooks.thinkMode?.event?.(input)) await Promise.resolve(hooks.anthropicContextWindowLimitRecovery?.event?.(input)) + await Promise.resolve(hooks.runtimeFallback?.event?.(input)) await Promise.resolve(hooks.agentUsageReminder?.event?.(input)) await Promise.resolve(hooks.categorySkillReminder?.event?.(input)) - await Promise.resolve(hooks.interactiveBashSession?.event?.(input)) + await Promise.resolve(hooks.interactiveBashSession?.event?.(input as EventInput)) await Promise.resolve(hooks.ralphLoop?.event?.(input)) await Promise.resolve(hooks.stopContinuationGuard?.event?.(input)) await Promise.resolve(hooks.compactionTodoPreserver?.event?.(input)) @@ -175,7 +180,7 @@ export function createEventHandler(args: { return } recentSyntheticIdles.set(sessionID, Date.now()) - await dispatchToHooks(syntheticIdle) + await dispatchToHooks(syntheticIdle as EventInput) } const { event } = input diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts index 169231e34..0ae0bca1d 100644 --- a/src/plugin/hooks/create-session-hooks.ts +++ b/src/plugin/hooks/create-session-hooks.ts @@ -25,6 +25,7 @@ import { createNoHephaestusNonGptHook, createQuestionLabelTruncatorHook, createPreemptiveCompactionHook, + createRuntimeFallbackHook, } from "../../hooks" import { createAnthropicEffortHook } from "../../hooks/anthropic-effort" import { @@ -60,6 +61,7 @@ export type SessionHooks = { questionLabelTruncator: ReturnType taskResumeInfo: ReturnType anthropicEffort: ReturnType | null + runtimeFallback: ReturnType | null } export function createSessionHooks(args: { @@ -245,6 +247,13 @@ export function createSessionHooks(args: { ? safeHook("anthropic-effort", () => createAnthropicEffortHook()) : null + const runtimeFallback = isHookEnabled("runtime-fallback") + ? safeHook("runtime-fallback", () => + createRuntimeFallbackHook(ctx, { + config: pluginConfig.runtime_fallback, + pluginConfig, + })) + : null return { contextWindowMonitor, preemptiveCompaction, @@ -269,5 +278,6 @@ export function createSessionHooks(args: { questionLabelTruncator, taskResumeInfo, anthropicEffort, + runtimeFallback, } } diff --git a/src/shared/index.ts b/src/shared/index.ts index ce8e69be9..09187602f 100644 --- a/src/shared/index.ts +++ b/src/shared/index.ts @@ -34,6 +34,7 @@ export * from "./system-directive" export * from "./agent-tool-restrictions" export * from "./model-requirements" export * from "./model-resolver" +export { normalizeFallbackModels } from "./model-resolver" export { resolveModelPipeline } from "./model-resolution-pipeline" export type { ModelResolutionRequest, @@ -58,3 +59,4 @@ export * from "./normalize-sdk-response" export * from "./session-directory-resolver" export * from "./prompt-tools" export * from "./internal-initiator-marker" +export { SessionCategoryRegistry } from "./session-category-registry" diff --git a/src/shared/model-resolution-pipeline.ts b/src/shared/model-resolution-pipeline.ts index 6b39f9d51..d6c6e63d4 100644 --- a/src/shared/model-resolution-pipeline.ts +++ b/src/shared/model-resolution-pipeline.ts @@ -8,6 +8,7 @@ export type ModelResolutionRequest = { intent?: { uiSelectedModel?: string userModel?: string + userFallbackModels?: string[] categoryDefaultModel?: string } constraints: { @@ -101,6 +102,42 @@ export function resolveModelPipeline( }) } + //#when - user configured fallback_models, try them before hardcoded fallback chain + const userFallbackModels = intent?.userFallbackModels + if (userFallbackModels && userFallbackModels.length > 0) { + if (availableModels.size === 0) { + const connectedProviders = constraints.connectedProviders ?? connectedProvidersCache.readConnectedProvidersCache() + const connectedSet = connectedProviders ? new Set(connectedProviders) : null + + if (connectedSet !== null) { + for (const model of userFallbackModels) { + attempted.push(model) + const parts = model.split("/") + if (parts.length >= 2) { + const provider = parts[0] + if (connectedSet.has(provider)) { + log("Model resolved via user fallback_models (connected provider)", { model }) + return { model, provenance: "provider-fallback", attempted } + } + } + } + log("No connected provider found in user fallback_models, falling through to hardcoded chain") + } + } else { + for (const model of userFallbackModels) { + attempted.push(model) + const parts = model.split("/") + const providerHint = parts.length >= 2 ? [parts[0]] : undefined + const match = fuzzyMatchModel(model, availableModels, providerHint) + if (match) { + log("Model resolved via user fallback_models (availability confirmed)", { model: model, match }) + return { model: match, provenance: "provider-fallback", attempted } + } + } + log("No available model found in user fallback_models, falling through to hardcoded chain") + } + } + if (fallbackChain && fallbackChain.length > 0) { if (availableModels.size === 0) { const connectedProviders = constraints.connectedProviders ?? connectedProvidersCache.readConnectedProvidersCache() diff --git a/src/shared/model-resolver.ts b/src/shared/model-resolver.ts index cbaa8c486..e2e02fce3 100644 --- a/src/shared/model-resolver.ts +++ b/src/shared/model-resolver.ts @@ -22,6 +22,7 @@ export type ModelResolutionResult = { export type ExtendedModelResolutionInput = { uiSelectedModel?: string userModel?: string + userFallbackModels?: string[] categoryDefaultModel?: string fallbackChain?: FallbackEntry[] availableModels: Set @@ -44,9 +45,9 @@ export function resolveModel(input: ModelResolutionInput): string | undefined { export function resolveModelWithFallback( input: ExtendedModelResolutionInput, ): ModelResolutionResult | undefined { - const { uiSelectedModel, userModel, categoryDefaultModel, fallbackChain, availableModels, systemDefaultModel } = input + const { uiSelectedModel, userModel, userFallbackModels, categoryDefaultModel, fallbackChain, availableModels, systemDefaultModel } = input const resolved = resolveModelPipeline({ - intent: { uiSelectedModel, userModel, categoryDefaultModel }, + intent: { uiSelectedModel, userModel, userFallbackModels, categoryDefaultModel }, constraints: { availableModels }, policy: { fallbackChain, systemDefaultModel }, }) @@ -61,3 +62,13 @@ export function resolveModelWithFallback( variant: resolved.variant, } } + +/** + * Normalizes fallback_models config (which can be string or string[]) to string[] + * Centralized helper to avoid duplicated normalization logic + */ +export function normalizeFallbackModels(models: string | string[] | undefined): string[] | undefined { + if (!models) return undefined + if (typeof models === "string") return [models] + return models +} diff --git a/src/shared/session-category-registry.ts b/src/shared/session-category-registry.ts new file mode 100644 index 000000000..ce19e1c04 --- /dev/null +++ b/src/shared/session-category-registry.ts @@ -0,0 +1,53 @@ +/** + * Session Category Registry + * + * Maintains a mapping of session IDs to their assigned categories. + * Used by runtime-fallback hook to lookup category-specific fallback_models. + */ + +// Map of sessionID -> category name +const sessionCategoryMap = new Map() + +export const SessionCategoryRegistry = { + /** + * Register a session with its category + */ + register: (sessionID: string, category: string): void => { + sessionCategoryMap.set(sessionID, category) + }, + + /** + * Get the category for a session + */ + get: (sessionID: string): string | undefined => { + return sessionCategoryMap.get(sessionID) + }, + + /** + * Remove a session from the registry (cleanup) + */ + remove: (sessionID: string): void => { + sessionCategoryMap.delete(sessionID) + }, + + /** + * Check if a session is registered + */ + has: (sessionID: string): boolean => { + return sessionCategoryMap.has(sessionID) + }, + + /** + * Get the size of the registry (for debugging) + */ + size: (): number => { + return sessionCategoryMap.size + }, + + /** + * Clear all entries (use with caution, mainly for testing) + */ + clear: (): void => { + sessionCategoryMap.clear() + }, +} diff --git a/src/tools/delegate-task/background-task.ts b/src/tools/delegate-task/background-task.ts index 957200f75..8e49daff5 100644 --- a/src/tools/delegate-task/background-task.ts +++ b/src/tools/delegate-task/background-task.ts @@ -5,6 +5,7 @@ import { getTimingConfig } from "./timing" import { storeToolMetadata } from "../../features/tool-metadata-store" import { formatDetailedError } from "./error-formatting" import { getSessionTools } from "../../shared/session-tools-store" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" export async function executeBackgroundTask( args: DelegateTaskArgs, @@ -51,6 +52,10 @@ export async function executeBackgroundTask( sessionId = updated?.sessionID } + if (args.category && sessionId) { + SessionCategoryRegistry.register(sessionId, args.category) + } + const unstableMeta = { title: args.description, metadata: { diff --git a/src/tools/delegate-task/sync-task.ts b/src/tools/delegate-task/sync-task.ts index 77ed2a4b5..f384b370f 100644 --- a/src/tools/delegate-task/sync-task.ts +++ b/src/tools/delegate-task/sync-task.ts @@ -5,6 +5,7 @@ import { getTaskToastManager } from "../../features/task-toast-manager" import { storeToolMetadata } from "../../features/tool-metadata-store" import { subagentSessions, syncSubagentSessions, setSessionAgent } from "../../features/claude-code-session-state" import { log } from "../../shared/logger" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" import { formatDuration } from "./time-formatter" import { formatDetailedError } from "./error-formatting" import { syncTaskDeps, type SyncTaskDeps } from "./sync-task-deps" @@ -46,6 +47,10 @@ export async function executeSyncTask( setSessionAgent(sessionID, agentToUse) setSessionFallbackChain(sessionID, fallbackChain) + if (args.category) { + SessionCategoryRegistry.register(sessionID, args.category) + } + if (onSyncSessionCreated) { log("[task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID }) await onSyncSessionCreated({ @@ -153,6 +158,7 @@ session_id: ${sessionID} subagentSessions.delete(syncSessionID) syncSubagentSessions.delete(syncSessionID) clearSessionFallbackChain(syncSessionID) + SessionCategoryRegistry.remove(syncSessionID) } } } diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 867650aae..8ac91db68 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -1044,7 +1044,7 @@ describe("sisyphus-task", () => { modelID: "claude-opus-4-6", variant: "max", }) - }) + }, { timeout: 20000 }) test("DEFAULT_CATEGORIES variant passes to sync session.prompt WITHOUT userCategories", async () => { // given - NO userCategories, testing DEFAULT_CATEGORIES for sync mode @@ -2624,31 +2624,35 @@ describe("sisyphus-task", () => { toolContext ) - // then - agent-browser skill should be resolved (not in notFound) + // then - agent-browser skill should be resolved expect(promptBody).toBeDefined() expect(promptBody.system).toBeDefined() - expect(promptBody.system).toContain("agent-browser") + expect(promptBody.system).toContain("") + expect(String(promptBody.system).startsWith("")).toBe(false) }, { timeout: 20000 }) - test("should NOT resolve agent-browser skill when browserProvider is not set", async () => { - // given - task without browserProvider (defaults to playwright) + test("should resolve agent-browser skill even when browserProvider is not set", async () => { + // given - delegate_task without browserProvider const { createDelegateTask } = require("./tools") + let promptBody: any const mockManager = { launch: async () => ({}) } const mockClient = { app: { agents: async () => ({ data: [] }) }, - config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - session: { - get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_no_browser_provider" } }), - prompt: async () => ({ data: {} }), - promptAsync: async () => ({ data: {} }), - messages: async () => ({ - data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }] - }), - status: async () => ({ data: {} }), - }, - } + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_no_browser_provider" } }), + prompt: async (input: any) => { + promptBody = input.body + return { data: {} } + }, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }] + }), + status: async () => ({ data: {} }), + }, + } // No browserProvider passed const tool = createDelegateTask({ @@ -2675,7 +2679,7 @@ describe("sisyphus-task", () => { toolContext ) - // then - should return skill not found error + // then - agent-browser skill should NOT resolve without browserProvider expect(result).toContain("Skills not found") expect(result).toContain("agent-browser") })