fix(background-agent): prevent circuit breaker false positives on flat-format events
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import {
|
||||
createToolCallSignature,
|
||||
@@ -19,7 +21,7 @@ function buildWindow(
|
||||
}
|
||||
|
||||
function buildWindowWithInputs(
|
||||
calls: Array<{ tool: string; input?: Record<string, unknown> }>,
|
||||
calls: Array<{ tool: string; input?: Record<string, unknown> | null }>,
|
||||
override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
|
||||
) {
|
||||
const settings = resolveCircuitBreakerSettings(override)
|
||||
@@ -148,7 +150,12 @@ describe("loop-detector", () => {
|
||||
|
||||
describe("#given the same tool is called consecutively", () => {
|
||||
test("#when evaluated #then it triggers", () => {
|
||||
const window = buildWindow(Array.from({ length: 20 }, () => "read"))
|
||||
const window = buildWindowWithInputs(
|
||||
Array.from({ length: 20 }, () => ({
|
||||
tool: "read",
|
||||
input: { filePath: "/src/same.ts" },
|
||||
}))
|
||||
)
|
||||
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
|
||||
@@ -176,7 +183,12 @@ describe("loop-detector", () => {
|
||||
|
||||
describe("#given threshold boundary", () => {
|
||||
test("#when below threshold #then it does not trigger", () => {
|
||||
const belowThresholdWindow = buildWindow(Array.from({ length: 19 }, () => "read"))
|
||||
const belowThresholdWindow = buildWindowWithInputs(
|
||||
Array.from({ length: 19 }, () => ({
|
||||
tool: "read",
|
||||
input: { filePath: "/src/same.ts" },
|
||||
}))
|
||||
)
|
||||
|
||||
const result = detectRepetitiveToolUse(belowThresholdWindow)
|
||||
|
||||
@@ -184,7 +196,12 @@ describe("loop-detector", () => {
|
||||
})
|
||||
|
||||
test("#when equal to threshold #then it triggers", () => {
|
||||
const atThresholdWindow = buildWindow(Array.from({ length: 20 }, () => "read"))
|
||||
const atThresholdWindow = buildWindowWithInputs(
|
||||
Array.from({ length: 20 }, () => ({
|
||||
tool: "read",
|
||||
input: { filePath: "/src/same.ts" },
|
||||
}))
|
||||
)
|
||||
|
||||
const result = detectRepetitiveToolUse(atThresholdWindow)
|
||||
|
||||
@@ -224,16 +241,22 @@ describe("loop-detector", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given tool calls with no input", () => {
|
||||
test("#when evaluated #then it triggers", () => {
|
||||
describe("#given tool calls with undefined input", () => {
|
||||
test("#when evaluated #then it does not trigger", () => {
|
||||
const calls = Array.from({ length: 20 }, () => ({ tool: "read" }))
|
||||
const window = buildWindowWithInputs(calls)
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
expect(result).toEqual({
|
||||
triggered: true,
|
||||
toolName: "read",
|
||||
repeatedCount: 20,
|
||||
})
|
||||
expect(result).toEqual({ triggered: false })
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given tool calls with null input", () => {
|
||||
test("#when evaluated #then it does not trigger", () => {
|
||||
const calls = Array.from({ length: 20 }, () => ({ tool: "read", input: null }))
|
||||
const window = buildWindowWithInputs(calls)
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
|
||||
expect(result).toEqual({ triggered: false })
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -36,6 +36,14 @@ export function recordToolCall(
|
||||
settings: CircuitBreakerSettings,
|
||||
toolInput?: Record<string, unknown> | null
|
||||
): ToolCallWindow {
|
||||
if (toolInput === undefined || toolInput === null) {
|
||||
return {
|
||||
lastSignature: `${toolName}::__unknown-input__`,
|
||||
consecutiveCount: 1,
|
||||
threshold: settings.consecutiveThreshold,
|
||||
}
|
||||
}
|
||||
|
||||
const signature = createToolCallSignature(toolName, toolInput)
|
||||
|
||||
if (window && window.lastSignature === signature) {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { tmpdir } from "node:os"
|
||||
@@ -38,8 +40,8 @@ async function flushAsyncWork() {
|
||||
}
|
||||
|
||||
describe("BackgroundManager circuit breaker", () => {
|
||||
describe("#given the same tool is called consecutively", () => {
|
||||
test("#when consecutive tool events arrive #then the task is cancelled", async () => {
|
||||
describe("#given flat-format tool events have no state.input", () => {
|
||||
test("#when 20 consecutive read events arrive #then the task keeps running", async () => {
|
||||
const manager = createManager({
|
||||
circuitBreaker: {
|
||||
consecutiveThreshold: 20,
|
||||
@@ -71,8 +73,8 @@ describe("BackgroundManager circuit breaker", () => {
|
||||
|
||||
await flushAsyncWork()
|
||||
|
||||
expect(task.status).toBe("cancelled")
|
||||
expect(task.error).toContain("read 20 consecutive times")
|
||||
expect(task.status).toBe("running")
|
||||
expect(task.progress?.toolCalls).toBe(20)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -126,7 +128,7 @@ describe("BackgroundManager circuit breaker", () => {
|
||||
})
|
||||
|
||||
describe("#given the absolute cap is configured lower than the repetition detector needs", () => {
|
||||
test("#when the raw tool-call cap is reached #then the backstop still cancels the task", async () => {
|
||||
test("#when repeated flat-format tool events reach maxToolCalls #then the backstop still cancels the task", async () => {
|
||||
const manager = createManager({
|
||||
maxToolCalls: 3,
|
||||
circuitBreaker: {
|
||||
@@ -150,10 +152,10 @@ describe("BackgroundManager circuit breaker", () => {
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
for (const toolName of ["read", "grep", "edit"]) {
|
||||
for (let i = 0; i < 3; i++) {
|
||||
manager.handleEvent({
|
||||
type: "message.part.updated",
|
||||
properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
|
||||
properties: { sessionID: task.sessionID, type: "tool", tool: "read" },
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user