Merge pull request #2710 from MoerAI/fix/rate-limit-hang
fix(runtime-fallback): detect bare 429 rate-limit signals (fixes #2677)
This commit is contained in:
@@ -31,6 +31,20 @@ describe("runtime-fallback error classifier", () => {
|
||||
expect(signal).toBeDefined()
|
||||
})
|
||||
|
||||
test("detects too-many-requests auto-retry status signals without countdown text", () => {
|
||||
//#given
|
||||
const info = {
|
||||
status:
|
||||
"Too Many Requests: Sorry, you've exhausted this model's rate limit. Please try a different model.",
|
||||
}
|
||||
|
||||
//#when
|
||||
const signal = extractAutoRetrySignal(info)
|
||||
|
||||
//#then
|
||||
expect(signal).toBeDefined()
|
||||
})
|
||||
|
||||
test("treats cooling-down retry messages as retryable", () => {
|
||||
//#given
|
||||
const error = {
|
||||
|
||||
@@ -167,7 +167,7 @@ export function extractAutoRetrySignal(info: Record<string, unknown> | undefined
|
||||
const combined = candidates.join("\n")
|
||||
if (!combined) return undefined
|
||||
|
||||
const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined))
|
||||
const isAutoRetry = AUTO_RETRY_PATTERNS.some((test) => test(combined))
|
||||
if (isAutoRetry) {
|
||||
return { signal: combined }
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, it } from "bun:test"
|
||||
import type { RuntimeFallbackPluginInput } from "./types"
|
||||
import { hasVisibleAssistantResponse } from "./visible-assistant-response"
|
||||
import { extractAutoRetrySignal } from "./error-classifier"
|
||||
|
||||
function createContext(messagesResponse: unknown): RuntimeFallbackPluginInput {
|
||||
return {
|
||||
@@ -53,4 +54,29 @@ describe("hasVisibleAssistantResponse", () => {
|
||||
// then
|
||||
expect(result).toBe(true)
|
||||
})
|
||||
|
||||
it("#given a too-many-requests assistant reply #when visibility is checked #then it is treated as an auto-retry signal", async () => {
|
||||
// given
|
||||
const checkVisibleResponse = hasVisibleAssistantResponse(extractAutoRetrySignal)
|
||||
const ctx = createContext({
|
||||
data: [
|
||||
{ info: { role: "user" }, parts: [{ type: "text", text: "latest question" }] },
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Too Many Requests: Sorry, you've exhausted this model's rate limit. Please try a different model.",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
// when
|
||||
const result = await checkVisibleResponse(ctx, "session-rate-limit", undefined)
|
||||
|
||||
// then
|
||||
expect(result).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user