Compare commits
2 Commits
fix/runtim
...
feat/runti
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b8c16ac070 | ||
|
|
4abbd1fc14 |
@@ -566,6 +566,7 @@ Auto-switches to backup models on API errors.
|
|||||||
"runtime_fallback": {
|
"runtime_fallback": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"retry_on_errors": [400, 429, 503, 529],
|
"retry_on_errors": [400, 429, 503, 529],
|
||||||
|
"retry_on_message_patterns": ["no\\s+available\\s+accounts?"],
|
||||||
"max_fallback_attempts": 3,
|
"max_fallback_attempts": 3,
|
||||||
"cooldown_seconds": 60,
|
"cooldown_seconds": 60,
|
||||||
"timeout_seconds": 30,
|
"timeout_seconds": 30,
|
||||||
@@ -578,6 +579,7 @@ Auto-switches to backup models on API errors.
|
|||||||
| ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
|
| ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
|
||||||
| `enabled` | `false` | Enable runtime fallback |
|
| `enabled` | `false` | Enable runtime fallback |
|
||||||
| `retry_on_errors` | `[400,429,503,529]` | HTTP codes that trigger fallback. Also handles classified provider key errors. |
|
| `retry_on_errors` | `[400,429,503,529]` | HTTP codes that trigger fallback. Also handles classified provider key errors. |
|
||||||
|
| `retry_on_message_patterns` | `[]` | Regex pattern strings matched against provider messages. Use this for provider-specific retry text that does not include status codes. |
|
||||||
| `max_fallback_attempts` | `3` | Max fallback attempts per session (1–20) |
|
| `max_fallback_attempts` | `3` | Max fallback attempts per session (1–20) |
|
||||||
| `cooldown_seconds` | `60` | Seconds before retrying a failed model |
|
| `cooldown_seconds` | `60` | Seconds before retrying a failed model |
|
||||||
| `timeout_seconds` | `30` | Seconds before forcing next fallback. **Set to `0` to disable timeout-based escalation and provider retry message detection.** |
|
| `timeout_seconds` | `30` | Seconds before forcing next fallback. **Set to `0` to disable timeout-based escalation and provider retry message detection.** |
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ export const RuntimeFallbackConfigSchema = z.object({
|
|||||||
enabled: z.boolean().optional(),
|
enabled: z.boolean().optional(),
|
||||||
/** HTTP status codes that trigger fallback (default: [400, 429, 503, 529]) */
|
/** HTTP status codes that trigger fallback (default: [400, 429, 503, 529]) */
|
||||||
retry_on_errors: z.array(z.number()).optional(),
|
retry_on_errors: z.array(z.number()).optional(),
|
||||||
|
retry_on_message_patterns: z.array(z.string()).optional(),
|
||||||
/** Maximum fallback attempts per session (default: 3) */
|
/** Maximum fallback attempts per session (default: 3) */
|
||||||
max_fallback_attempts: z.number().min(1).max(20).optional(),
|
max_fallback_attempts: z.number().min(1).max(20).optional(),
|
||||||
/** Cooldown in seconds before retrying a failed model (default: 60) */
|
/** Cooldown in seconds before retrying a failed model (default: 60) */
|
||||||
|
|||||||
@@ -12,12 +12,25 @@ import type { RuntimeFallbackConfig } from "../../config"
|
|||||||
export const DEFAULT_CONFIG: Required<RuntimeFallbackConfig> = {
|
export const DEFAULT_CONFIG: Required<RuntimeFallbackConfig> = {
|
||||||
enabled: false,
|
enabled: false,
|
||||||
retry_on_errors: [429, 500, 502, 503, 504],
|
retry_on_errors: [429, 500, 502, 503, 504],
|
||||||
|
retry_on_message_patterns: [],
|
||||||
max_fallback_attempts: 3,
|
max_fallback_attempts: 3,
|
||||||
cooldown_seconds: 60,
|
cooldown_seconds: 60,
|
||||||
timeout_seconds: 30,
|
timeout_seconds: 30,
|
||||||
notify_on_fallback: true,
|
notify_on_fallback: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS = [
|
||||||
|
"too\\s+many\\s+requests",
|
||||||
|
"quota\\s*exceeded",
|
||||||
|
"quota\\s+will\\s+reset\\s+after",
|
||||||
|
"usage\\s+limit",
|
||||||
|
"rate\\s+limit",
|
||||||
|
"limit\\s+reached",
|
||||||
|
"all\\s+credentials\\s+for\\s+model",
|
||||||
|
"cool(?:ing)?\\s*down",
|
||||||
|
"exhausted\\s+your\\s+capacity",
|
||||||
|
]
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Error patterns that indicate rate limiting or temporary failures
|
* Error patterns that indicate rate limiting or temporary failures
|
||||||
* These are checked in addition to HTTP status codes
|
* These are checked in addition to HTTP status codes
|
||||||
|
|||||||
@@ -57,4 +57,43 @@ describe("runtime-fallback error classifier", () => {
|
|||||||
//#then
|
//#then
|
||||||
expect(signal).toBeUndefined()
|
expect(signal).toBeUndefined()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("does not classify no-available-accounts without configured message pattern", () => {
|
||||||
|
//#given
|
||||||
|
const info = {
|
||||||
|
status: "No available accounts: no available accounts [retrying in 25s attempt #5]",
|
||||||
|
}
|
||||||
|
|
||||||
|
//#when
|
||||||
|
const signal = extractAutoRetrySignal(info)
|
||||||
|
|
||||||
|
//#then
|
||||||
|
expect(signal).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test("classifies no-available-accounts when configured message pattern is provided", () => {
|
||||||
|
//#given
|
||||||
|
const info = {
|
||||||
|
status: "No available accounts: no available accounts [retrying in 25s attempt #5]",
|
||||||
|
}
|
||||||
|
|
||||||
|
//#when
|
||||||
|
const signal = extractAutoRetrySignal(info, ["no\\s+available\\s+accounts?"])
|
||||||
|
|
||||||
|
//#then
|
||||||
|
expect(signal).toBeDefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test("treats configured message pattern matches as retryable errors", () => {
|
||||||
|
//#given
|
||||||
|
const error = {
|
||||||
|
message: "No available accounts for provider anthropic",
|
||||||
|
}
|
||||||
|
|
||||||
|
//#when
|
||||||
|
const retryable = isRetryableError(error, [429, 503, 529], ["no\\s+available\\s+accounts?"])
|
||||||
|
|
||||||
|
//#then
|
||||||
|
expect(retryable).toBe(true)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS } from "./constants"
|
import { AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS, DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS } from "./constants"
|
||||||
|
|
||||||
export function getErrorMessage(error: unknown): string {
|
export function getErrorMessage(error: unknown): string {
|
||||||
if (!error) return ""
|
if (!error) return ""
|
||||||
@@ -99,13 +99,30 @@ export interface AutoRetrySignal {
|
|||||||
signal: string
|
signal: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [
|
function compilePatterns(patterns: string[]): RegExp[] {
|
||||||
(combined) => /retrying\s+in/i.test(combined),
|
const compiled: RegExp[] = []
|
||||||
(combined) =>
|
for (const pattern of patterns) {
|
||||||
/(?:too\s+many\s+requests|quota\s*exceeded|quota\s+will\s+reset\s+after|usage\s+limit|rate\s+limit|limit\s+reached|all\s+credentials\s+for\s+model|cool(?:ing)?\s*down|exhausted\s+your\s+capacity)/i.test(combined),
|
try {
|
||||||
]
|
compiled.push(new RegExp(pattern, "i"))
|
||||||
|
} catch {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return compiled
|
||||||
|
}
|
||||||
|
|
||||||
export function extractAutoRetrySignal(info: Record<string, unknown> | undefined): AutoRetrySignal | undefined {
|
function resolveAutoRetryKeywordPatterns(retryOnMessagePatterns: string[] = []): RegExp[] {
|
||||||
|
return compilePatterns([...AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS, ...retryOnMessagePatterns])
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveRetryableMessagePatterns(retryOnMessagePatterns: string[] = []): RegExp[] {
|
||||||
|
return [...RETRYABLE_ERROR_PATTERNS, ...compilePatterns(retryOnMessagePatterns)]
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractAutoRetrySignal(
|
||||||
|
info: Record<string, unknown> | undefined,
|
||||||
|
retryOnMessagePatterns: string[] = []
|
||||||
|
): AutoRetrySignal | undefined {
|
||||||
if (!info) return undefined
|
if (!info) return undefined
|
||||||
|
|
||||||
const candidates: string[] = []
|
const candidates: string[] = []
|
||||||
@@ -125,7 +142,12 @@ export function extractAutoRetrySignal(info: Record<string, unknown> | undefined
|
|||||||
const combined = candidates.join("\n")
|
const combined = candidates.join("\n")
|
||||||
if (!combined) return undefined
|
if (!combined) return undefined
|
||||||
|
|
||||||
const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined))
|
const autoRetryPatterns: Array<(combined: string) => boolean> = [
|
||||||
|
(text) => /retrying\s+in/i.test(text),
|
||||||
|
(text) => resolveAutoRetryKeywordPatterns(retryOnMessagePatterns).some((pattern) => pattern.test(text)),
|
||||||
|
]
|
||||||
|
|
||||||
|
const isAutoRetry = autoRetryPatterns.every((test) => test(combined))
|
||||||
if (isAutoRetry) {
|
if (isAutoRetry) {
|
||||||
return { signal: combined }
|
return { signal: combined }
|
||||||
}
|
}
|
||||||
@@ -148,7 +170,11 @@ export function containsErrorContent(
|
|||||||
return { hasError: false }
|
return { hasError: false }
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isRetryableError(error: unknown, retryOnErrors: number[]): boolean {
|
export function isRetryableError(
|
||||||
|
error: unknown,
|
||||||
|
retryOnErrors: number[],
|
||||||
|
retryOnMessagePatterns: string[] = []
|
||||||
|
): boolean {
|
||||||
const statusCode = extractStatusCode(error, retryOnErrors)
|
const statusCode = extractStatusCode(error, retryOnErrors)
|
||||||
const message = getErrorMessage(error)
|
const message = getErrorMessage(error)
|
||||||
const errorType = classifyErrorType(error)
|
const errorType = classifyErrorType(error)
|
||||||
@@ -165,5 +191,5 @@ export function isRetryableError(error: unknown, retryOnErrors: number[]): boole
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
return RETRYABLE_ERROR_PATTERNS.some((pattern) => pattern.test(message))
|
return resolveRetryableMessagePatterns(retryOnMessagePatterns).some((pattern) => pattern.test(message))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
|
|||||||
errorType: classifyErrorType(error),
|
errorType: classifyErrorType(error),
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!isRetryableError(error, config.retry_on_errors)) {
|
if (!isRetryableError(error, config.retry_on_errors, config.retry_on_message_patterns)) {
|
||||||
log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, {
|
log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, {
|
||||||
sessionID,
|
sessionID,
|
||||||
retryable: false,
|
retryable: false,
|
||||||
@@ -194,7 +194,10 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
|
|||||||
if (!sessionID || status?.type !== "retry") return
|
if (!sessionID || status?.type !== "retry") return
|
||||||
|
|
||||||
const retryMessage = typeof status.message === "string" ? status.message : ""
|
const retryMessage = typeof status.message === "string" ? status.message : ""
|
||||||
const retrySignal = extractAutoRetrySignal({ status: retryMessage, message: retryMessage })
|
const retrySignal = extractAutoRetrySignal(
|
||||||
|
{ status: retryMessage, message: retryMessage },
|
||||||
|
config.retry_on_message_patterns
|
||||||
|
)
|
||||||
if (!retrySignal) return
|
if (!retrySignal) return
|
||||||
|
|
||||||
const retryKey = `${extractRetryAttempt(status.attempt, retryMessage)}:${normalizeRetryStatusMessage(retryMessage)}`
|
const retryKey = `${extractRetryAttempt(status.attempt, retryMessage)}:${normalizeRetryStatusMessage(retryMessage)}`
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ export function createRuntimeFallbackHook(
|
|||||||
const config = {
|
const config = {
|
||||||
enabled: options?.config?.enabled ?? DEFAULT_CONFIG.enabled,
|
enabled: options?.config?.enabled ?? DEFAULT_CONFIG.enabled,
|
||||||
retry_on_errors: options?.config?.retry_on_errors ?? DEFAULT_CONFIG.retry_on_errors,
|
retry_on_errors: options?.config?.retry_on_errors ?? DEFAULT_CONFIG.retry_on_errors,
|
||||||
|
retry_on_message_patterns: options?.config?.retry_on_message_patterns ?? DEFAULT_CONFIG.retry_on_message_patterns,
|
||||||
max_fallback_attempts: options?.config?.max_fallback_attempts ?? DEFAULT_CONFIG.max_fallback_attempts,
|
max_fallback_attempts: options?.config?.max_fallback_attempts ?? DEFAULT_CONFIG.max_fallback_attempts,
|
||||||
cooldown_seconds: options?.config?.cooldown_seconds ?? DEFAULT_CONFIG.cooldown_seconds,
|
cooldown_seconds: options?.config?.cooldown_seconds ?? DEFAULT_CONFIG.cooldown_seconds,
|
||||||
timeout_seconds: options?.config?.timeout_seconds ?? DEFAULT_CONFIG.timeout_seconds,
|
timeout_seconds: options?.config?.timeout_seconds ?? DEFAULT_CONFIG.timeout_seconds,
|
||||||
|
|||||||
@@ -255,6 +255,41 @@ describe("runtime-fallback", () => {
|
|||||||
expect(errorLog).toBeDefined()
|
expect(errorLog).toBeDefined()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should trigger fallback when custom retry_on_message_patterns matches", async () => {
|
||||||
|
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
||||||
|
config: createMockConfig({
|
||||||
|
notify_on_fallback: false,
|
||||||
|
retry_on_message_patterns: ["no\\s+available\\s+accounts?"],
|
||||||
|
}),
|
||||||
|
pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.4"]),
|
||||||
|
})
|
||||||
|
const sessionID = "test-session-custom-message-pattern"
|
||||||
|
SessionCategoryRegistry.register(sessionID, "test")
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "session.created",
|
||||||
|
properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-6" } },
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "session.error",
|
||||||
|
properties: {
|
||||||
|
sessionID,
|
||||||
|
error: {
|
||||||
|
message: "No available accounts: no available accounts [retrying in 25s attempt #5]",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
|
||||||
|
expect(fallbackLog).toBeDefined()
|
||||||
|
expect(fallbackLog?.data).toMatchObject({ from: "anthropic/claude-opus-4-6", to: "openai/gpt-5.4" })
|
||||||
|
})
|
||||||
|
|
||||||
test("should continue fallback chain when fallback model is not found", async () => {
|
test("should continue fallback chain when fallback model is not found", async () => {
|
||||||
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
||||||
config: createMockConfig({ notify_on_fallback: false }),
|
config: createMockConfig({ notify_on_fallback: false }),
|
||||||
|
|||||||
@@ -52,7 +52,9 @@ export function hasVisibleAssistantResponse(extractAutoRetrySignalFn: typeof ext
|
|||||||
|
|
||||||
export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
|
export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
|
||||||
const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult } = deps
|
const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult } = deps
|
||||||
const checkVisibleResponse = hasVisibleAssistantResponse(extractAutoRetrySignal)
|
const checkVisibleResponse = hasVisibleAssistantResponse((info) =>
|
||||||
|
extractAutoRetrySignal(info, config.retry_on_message_patterns)
|
||||||
|
)
|
||||||
|
|
||||||
return async (props: Record<string, unknown> | undefined) => {
|
return async (props: Record<string, unknown> | undefined) => {
|
||||||
const info = props?.info as Record<string, unknown> | undefined
|
const info = props?.info as Record<string, unknown> | undefined
|
||||||
@@ -61,14 +63,17 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel
|
|||||||
const eventParts = props?.parts as Array<{ type?: string; text?: string }> | undefined
|
const eventParts = props?.parts as Array<{ type?: string; text?: string }> | undefined
|
||||||
const infoParts = info?.parts as Array<{ type?: string; text?: string }> | undefined
|
const infoParts = info?.parts as Array<{ type?: string; text?: string }> | undefined
|
||||||
const parts = eventParts && eventParts.length > 0 ? eventParts : infoParts
|
const parts = eventParts && eventParts.length > 0 ? eventParts : infoParts
|
||||||
const retrySignalResult = extractAutoRetrySignal(info)
|
const retrySignalResult = extractAutoRetrySignal(info, config.retry_on_message_patterns)
|
||||||
const partsText = (parts ?? [])
|
const partsText = (parts ?? [])
|
||||||
.filter((p) => typeof p?.text === "string")
|
.filter((p) => typeof p?.text === "string")
|
||||||
.map((p) => (p.text ?? "").trim())
|
.map((p) => (p.text ?? "").trim())
|
||||||
.filter((text) => text.length > 0)
|
.filter((text) => text.length > 0)
|
||||||
.join("\n")
|
.join("\n")
|
||||||
const retrySignalFromParts = partsText
|
const retrySignalFromParts = partsText
|
||||||
? extractAutoRetrySignal({ message: partsText, status: partsText, summary: partsText })?.signal
|
? extractAutoRetrySignal(
|
||||||
|
{ message: partsText, status: partsText, summary: partsText },
|
||||||
|
config.retry_on_message_patterns
|
||||||
|
)?.signal
|
||||||
: undefined
|
: undefined
|
||||||
const retrySignal = retrySignalResult?.signal ?? retrySignalFromParts
|
const retrySignal = retrySignalResult?.signal ?? retrySignalFromParts
|
||||||
const errorContentResult = containsErrorContent(parts)
|
const errorContentResult = containsErrorContent(parts)
|
||||||
@@ -134,7 +139,7 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel
|
|||||||
errorType: classifyErrorType(error),
|
errorType: classifyErrorType(error),
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!isRetryableError(error, config.retry_on_errors)) {
|
if (!isRetryableError(error, config.retry_on_errors, config.retry_on_message_patterns)) {
|
||||||
log(`[${HOOK_NAME}] message.updated error not retryable, skipping fallback`, {
|
log(`[${HOOK_NAME}] message.updated error not retryable, skipping fallback`, {
|
||||||
sessionID,
|
sessionID,
|
||||||
statusCode: extractStatusCode(error, config.retry_on_errors),
|
statusCode: extractStatusCode(error, config.retry_on_errors),
|
||||||
|
|||||||
Reference in New Issue
Block a user