Compare commits

...

2 Commits

Author SHA1 Message Date
zengxiaolou
b8c16ac070 Document retry_on_message_patterns option 2026-03-10 20:25:01 +08:00
zengxiaolou
4abbd1fc14 Add configurable runtime fallback message patterns 2026-03-10 20:24:54 +08:00
9 changed files with 141 additions and 16 deletions

View File

@@ -566,6 +566,7 @@ Auto-switches to backup models on API errors.
"runtime_fallback": { "runtime_fallback": {
"enabled": true, "enabled": true,
"retry_on_errors": [400, 429, 503, 529], "retry_on_errors": [400, 429, 503, 529],
"retry_on_message_patterns": ["no\\s+available\\s+accounts?"],
"max_fallback_attempts": 3, "max_fallback_attempts": 3,
"cooldown_seconds": 60, "cooldown_seconds": 60,
"timeout_seconds": 30, "timeout_seconds": 30,
@@ -578,6 +579,7 @@ Auto-switches to backup models on API errors.
| ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ | | ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
| `enabled` | `false` | Enable runtime fallback | | `enabled` | `false` | Enable runtime fallback |
| `retry_on_errors` | `[400,429,503,529]` | HTTP codes that trigger fallback. Also handles classified provider key errors. | | `retry_on_errors` | `[400,429,503,529]` | HTTP codes that trigger fallback. Also handles classified provider key errors. |
| `retry_on_message_patterns` | `[]` | Regex pattern strings matched against provider messages. Use this for provider-specific retry text that does not include status codes. |
| `max_fallback_attempts` | `3` | Max fallback attempts per session (120) | | `max_fallback_attempts` | `3` | Max fallback attempts per session (120) |
| `cooldown_seconds` | `60` | Seconds before retrying a failed model | | `cooldown_seconds` | `60` | Seconds before retrying a failed model |
| `timeout_seconds` | `30` | Seconds before forcing next fallback. **Set to `0` to disable timeout-based escalation and provider retry message detection.** | | `timeout_seconds` | `30` | Seconds before forcing next fallback. **Set to `0` to disable timeout-based escalation and provider retry message detection.** |

View File

@@ -5,6 +5,7 @@ export const RuntimeFallbackConfigSchema = z.object({
enabled: z.boolean().optional(), enabled: z.boolean().optional(),
/** HTTP status codes that trigger fallback (default: [400, 429, 503, 529]) */ /** HTTP status codes that trigger fallback (default: [400, 429, 503, 529]) */
retry_on_errors: z.array(z.number()).optional(), retry_on_errors: z.array(z.number()).optional(),
retry_on_message_patterns: z.array(z.string()).optional(),
/** Maximum fallback attempts per session (default: 3) */ /** Maximum fallback attempts per session (default: 3) */
max_fallback_attempts: z.number().min(1).max(20).optional(), max_fallback_attempts: z.number().min(1).max(20).optional(),
/** Cooldown in seconds before retrying a failed model (default: 60) */ /** Cooldown in seconds before retrying a failed model (default: 60) */

View File

@@ -12,12 +12,25 @@ import type { RuntimeFallbackConfig } from "../../config"
export const DEFAULT_CONFIG: Required<RuntimeFallbackConfig> = { export const DEFAULT_CONFIG: Required<RuntimeFallbackConfig> = {
enabled: false, enabled: false,
retry_on_errors: [429, 500, 502, 503, 504], retry_on_errors: [429, 500, 502, 503, 504],
retry_on_message_patterns: [],
max_fallback_attempts: 3, max_fallback_attempts: 3,
cooldown_seconds: 60, cooldown_seconds: 60,
timeout_seconds: 30, timeout_seconds: 30,
notify_on_fallback: true, notify_on_fallback: true,
} }
export const AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS = [
"too\\s+many\\s+requests",
"quota\\s*exceeded",
"quota\\s+will\\s+reset\\s+after",
"usage\\s+limit",
"rate\\s+limit",
"limit\\s+reached",
"all\\s+credentials\\s+for\\s+model",
"cool(?:ing)?\\s*down",
"exhausted\\s+your\\s+capacity",
]
/** /**
* Error patterns that indicate rate limiting or temporary failures * Error patterns that indicate rate limiting or temporary failures
* These are checked in addition to HTTP status codes * These are checked in addition to HTTP status codes

View File

@@ -57,4 +57,43 @@ describe("runtime-fallback error classifier", () => {
//#then //#then
expect(signal).toBeUndefined() expect(signal).toBeUndefined()
}) })
test("does not classify no-available-accounts without configured message pattern", () => {
//#given
const info = {
status: "No available accounts: no available accounts [retrying in 25s attempt #5]",
}
//#when
const signal = extractAutoRetrySignal(info)
//#then
expect(signal).toBeUndefined()
})
test("classifies no-available-accounts when configured message pattern is provided", () => {
//#given
const info = {
status: "No available accounts: no available accounts [retrying in 25s attempt #5]",
}
//#when
const signal = extractAutoRetrySignal(info, ["no\\s+available\\s+accounts?"])
//#then
expect(signal).toBeDefined()
})
test("treats configured message pattern matches as retryable errors", () => {
//#given
const error = {
message: "No available accounts for provider anthropic",
}
//#when
const retryable = isRetryableError(error, [429, 503, 529], ["no\\s+available\\s+accounts?"])
//#then
expect(retryable).toBe(true)
})
}) })

View File

@@ -1,4 +1,4 @@
import { DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS } from "./constants" import { AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS, DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS } from "./constants"
export function getErrorMessage(error: unknown): string { export function getErrorMessage(error: unknown): string {
if (!error) return "" if (!error) return ""
@@ -99,13 +99,30 @@ export interface AutoRetrySignal {
signal: string signal: string
} }
export const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [ function compilePatterns(patterns: string[]): RegExp[] {
(combined) => /retrying\s+in/i.test(combined), const compiled: RegExp[] = []
(combined) => for (const pattern of patterns) {
/(?:too\s+many\s+requests|quota\s*exceeded|quota\s+will\s+reset\s+after|usage\s+limit|rate\s+limit|limit\s+reached|all\s+credentials\s+for\s+model|cool(?:ing)?\s*down|exhausted\s+your\s+capacity)/i.test(combined), try {
] compiled.push(new RegExp(pattern, "i"))
} catch {
continue
}
}
return compiled
}
export function extractAutoRetrySignal(info: Record<string, unknown> | undefined): AutoRetrySignal | undefined { function resolveAutoRetryKeywordPatterns(retryOnMessagePatterns: string[] = []): RegExp[] {
return compilePatterns([...AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS, ...retryOnMessagePatterns])
}
function resolveRetryableMessagePatterns(retryOnMessagePatterns: string[] = []): RegExp[] {
return [...RETRYABLE_ERROR_PATTERNS, ...compilePatterns(retryOnMessagePatterns)]
}
export function extractAutoRetrySignal(
info: Record<string, unknown> | undefined,
retryOnMessagePatterns: string[] = []
): AutoRetrySignal | undefined {
if (!info) return undefined if (!info) return undefined
const candidates: string[] = [] const candidates: string[] = []
@@ -125,7 +142,12 @@ export function extractAutoRetrySignal(info: Record<string, unknown> | undefined
const combined = candidates.join("\n") const combined = candidates.join("\n")
if (!combined) return undefined if (!combined) return undefined
const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined)) const autoRetryPatterns: Array<(combined: string) => boolean> = [
(text) => /retrying\s+in/i.test(text),
(text) => resolveAutoRetryKeywordPatterns(retryOnMessagePatterns).some((pattern) => pattern.test(text)),
]
const isAutoRetry = autoRetryPatterns.every((test) => test(combined))
if (isAutoRetry) { if (isAutoRetry) {
return { signal: combined } return { signal: combined }
} }
@@ -148,7 +170,11 @@ export function containsErrorContent(
return { hasError: false } return { hasError: false }
} }
export function isRetryableError(error: unknown, retryOnErrors: number[]): boolean { export function isRetryableError(
error: unknown,
retryOnErrors: number[],
retryOnMessagePatterns: string[] = []
): boolean {
const statusCode = extractStatusCode(error, retryOnErrors) const statusCode = extractStatusCode(error, retryOnErrors)
const message = getErrorMessage(error) const message = getErrorMessage(error)
const errorType = classifyErrorType(error) const errorType = classifyErrorType(error)
@@ -165,5 +191,5 @@ export function isRetryableError(error: unknown, retryOnErrors: number[]): boole
return true return true
} }
return RETRYABLE_ERROR_PATTERNS.some((pattern) => pattern.test(message)) return resolveRetryableMessagePatterns(retryOnMessagePatterns).some((pattern) => pattern.test(message))
} }

View File

@@ -116,7 +116,7 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
errorType: classifyErrorType(error), errorType: classifyErrorType(error),
}) })
if (!isRetryableError(error, config.retry_on_errors)) { if (!isRetryableError(error, config.retry_on_errors, config.retry_on_message_patterns)) {
log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, { log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, {
sessionID, sessionID,
retryable: false, retryable: false,
@@ -194,7 +194,10 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
if (!sessionID || status?.type !== "retry") return if (!sessionID || status?.type !== "retry") return
const retryMessage = typeof status.message === "string" ? status.message : "" const retryMessage = typeof status.message === "string" ? status.message : ""
const retrySignal = extractAutoRetrySignal({ status: retryMessage, message: retryMessage }) const retrySignal = extractAutoRetrySignal(
{ status: retryMessage, message: retryMessage },
config.retry_on_message_patterns
)
if (!retrySignal) return if (!retrySignal) return
const retryKey = `${extractRetryAttempt(status.attempt, retryMessage)}:${normalizeRetryStatusMessage(retryMessage)}` const retryKey = `${extractRetryAttempt(status.attempt, retryMessage)}:${normalizeRetryStatusMessage(retryMessage)}`

View File

@@ -15,6 +15,7 @@ export function createRuntimeFallbackHook(
const config = { const config = {
enabled: options?.config?.enabled ?? DEFAULT_CONFIG.enabled, enabled: options?.config?.enabled ?? DEFAULT_CONFIG.enabled,
retry_on_errors: options?.config?.retry_on_errors ?? DEFAULT_CONFIG.retry_on_errors, retry_on_errors: options?.config?.retry_on_errors ?? DEFAULT_CONFIG.retry_on_errors,
retry_on_message_patterns: options?.config?.retry_on_message_patterns ?? DEFAULT_CONFIG.retry_on_message_patterns,
max_fallback_attempts: options?.config?.max_fallback_attempts ?? DEFAULT_CONFIG.max_fallback_attempts, max_fallback_attempts: options?.config?.max_fallback_attempts ?? DEFAULT_CONFIG.max_fallback_attempts,
cooldown_seconds: options?.config?.cooldown_seconds ?? DEFAULT_CONFIG.cooldown_seconds, cooldown_seconds: options?.config?.cooldown_seconds ?? DEFAULT_CONFIG.cooldown_seconds,
timeout_seconds: options?.config?.timeout_seconds ?? DEFAULT_CONFIG.timeout_seconds, timeout_seconds: options?.config?.timeout_seconds ?? DEFAULT_CONFIG.timeout_seconds,

View File

@@ -255,6 +255,41 @@ describe("runtime-fallback", () => {
expect(errorLog).toBeDefined() expect(errorLog).toBeDefined()
}) })
test("should trigger fallback when custom retry_on_message_patterns matches", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig({
notify_on_fallback: false,
retry_on_message_patterns: ["no\\s+available\\s+accounts?"],
}),
pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.4"]),
})
const sessionID = "test-session-custom-message-pattern"
SessionCategoryRegistry.register(sessionID, "test")
await hook.event({
event: {
type: "session.created",
properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-6" } },
},
})
await hook.event({
event: {
type: "session.error",
properties: {
sessionID,
error: {
message: "No available accounts: no available accounts [retrying in 25s attempt #5]",
},
},
},
})
const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
expect(fallbackLog).toBeDefined()
expect(fallbackLog?.data).toMatchObject({ from: "anthropic/claude-opus-4-6", to: "openai/gpt-5.4" })
})
test("should continue fallback chain when fallback model is not found", async () => { test("should continue fallback chain when fallback model is not found", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), { const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig({ notify_on_fallback: false }), config: createMockConfig({ notify_on_fallback: false }),

View File

@@ -52,7 +52,9 @@ export function hasVisibleAssistantResponse(extractAutoRetrySignalFn: typeof ext
export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHelpers) { export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult } = deps const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult } = deps
const checkVisibleResponse = hasVisibleAssistantResponse(extractAutoRetrySignal) const checkVisibleResponse = hasVisibleAssistantResponse((info) =>
extractAutoRetrySignal(info, config.retry_on_message_patterns)
)
return async (props: Record<string, unknown> | undefined) => { return async (props: Record<string, unknown> | undefined) => {
const info = props?.info as Record<string, unknown> | undefined const info = props?.info as Record<string, unknown> | undefined
@@ -61,14 +63,17 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel
const eventParts = props?.parts as Array<{ type?: string; text?: string }> | undefined const eventParts = props?.parts as Array<{ type?: string; text?: string }> | undefined
const infoParts = info?.parts as Array<{ type?: string; text?: string }> | undefined const infoParts = info?.parts as Array<{ type?: string; text?: string }> | undefined
const parts = eventParts && eventParts.length > 0 ? eventParts : infoParts const parts = eventParts && eventParts.length > 0 ? eventParts : infoParts
const retrySignalResult = extractAutoRetrySignal(info) const retrySignalResult = extractAutoRetrySignal(info, config.retry_on_message_patterns)
const partsText = (parts ?? []) const partsText = (parts ?? [])
.filter((p) => typeof p?.text === "string") .filter((p) => typeof p?.text === "string")
.map((p) => (p.text ?? "").trim()) .map((p) => (p.text ?? "").trim())
.filter((text) => text.length > 0) .filter((text) => text.length > 0)
.join("\n") .join("\n")
const retrySignalFromParts = partsText const retrySignalFromParts = partsText
? extractAutoRetrySignal({ message: partsText, status: partsText, summary: partsText })?.signal ? extractAutoRetrySignal(
{ message: partsText, status: partsText, summary: partsText },
config.retry_on_message_patterns
)?.signal
: undefined : undefined
const retrySignal = retrySignalResult?.signal ?? retrySignalFromParts const retrySignal = retrySignalResult?.signal ?? retrySignalFromParts
const errorContentResult = containsErrorContent(parts) const errorContentResult = containsErrorContent(parts)
@@ -134,7 +139,7 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel
errorType: classifyErrorType(error), errorType: classifyErrorType(error),
}) })
if (!isRetryableError(error, config.retry_on_errors)) { if (!isRetryableError(error, config.retry_on_errors, config.retry_on_message_patterns)) {
log(`[${HOOK_NAME}] message.updated error not retryable, skipping fallback`, { log(`[${HOOK_NAME}] message.updated error not retryable, skipping fallback`, {
sessionID, sessionID,
statusCode: extractStatusCode(error, config.retry_on_errors), statusCode: extractStatusCode(error, config.retry_on_errors),