Merge pull request #1553 from code-yeongyu/fix/1355-atlas-continuation-guard
fix(atlas): stop continuation retry loop on repeated prompt failures
This commit is contained in:
@@ -624,6 +624,11 @@ describe("atlas hook", () => {
|
||||
describe("session.idle handler (boulder continuation)", () => {
|
||||
const MAIN_SESSION_ID = "main-session-123"
|
||||
|
||||
async function flushMicrotasks(): Promise<void> {
|
||||
await Promise.resolve()
|
||||
await Promise.resolve()
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
mock.module("../../features/claude-code-session-state", () => ({
|
||||
getMainSessionID: () => MAIN_SESSION_ID,
|
||||
@@ -965,6 +970,135 @@ describe("atlas hook", () => {
|
||||
expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
test("should stop continuation after 2 consecutive prompt failures (issue #1355)", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
const originalDateNow = Date.now
|
||||
let now = 0
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - idle fires repeatedly, past cooldown each time
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - should attempt only twice, then disable continuation
|
||||
expect(promptMock).toHaveBeenCalledTimes(2)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should reset prompt failure counter on success and only stop after 2 consecutive failures", async () => {
|
||||
//#given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock(() => Promise.resolve())
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.resolve())
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
const originalDateNow = Date.now
|
||||
let now = 0
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - fail, succeed (reset), then fail twice (disable), then attempt again
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
//#then - 4 prompt attempts; 5th idle is skipped after 2 consecutive failures
|
||||
expect(promptMock).toHaveBeenCalledTimes(4)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should reset continuation failure state on session.compacted event", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
const originalDateNow = Date.now
|
||||
let now = 0
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - two failures disables continuation, then compaction resets it
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
await hook.handler({ event: { type: "session.compacted", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - 2 attempts + 1 after compaction (3 total)
|
||||
expect(promptMock).toHaveBeenCalledTimes(3)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should cleanup on session.deleted", async () => {
|
||||
// given - boulder state
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
|
||||
@@ -391,6 +391,7 @@ interface ToolExecuteAfterOutput {
|
||||
interface SessionState {
|
||||
lastEventWasAbortError?: boolean
|
||||
lastContinuationInjectedAt?: number
|
||||
promptFailureCount: number
|
||||
}
|
||||
|
||||
const CONTINUATION_COOLDOWN_MS = 5000
|
||||
@@ -432,13 +433,14 @@ export function createAtlasHook(
|
||||
function getState(sessionID: string): SessionState {
|
||||
let state = sessions.get(sessionID)
|
||||
if (!state) {
|
||||
state = {}
|
||||
state = { promptFailureCount: 0 }
|
||||
sessions.set(sessionID, state)
|
||||
}
|
||||
return state
|
||||
}
|
||||
|
||||
async function injectContinuation(sessionID: string, planName: string, remaining: number, total: number, agent?: string): Promise<void> {
|
||||
const state = getState(sessionID)
|
||||
const hasRunningBgTasks = backgroundManager
|
||||
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
||||
: false
|
||||
@@ -481,21 +483,28 @@ export function createAtlasHook(
|
||||
: undefined
|
||||
}
|
||||
|
||||
await ctx.client.session.prompt({
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
agent: agent ?? "atlas",
|
||||
...(model !== undefined ? { model } : {}),
|
||||
parts: [{ type: "text", text: prompt }],
|
||||
},
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
await ctx.client.session.prompt({
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
agent: agent ?? "atlas",
|
||||
...(model !== undefined ? { model } : {}),
|
||||
parts: [{ type: "text", text: prompt }],
|
||||
},
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
|
||||
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
|
||||
} catch (err) {
|
||||
log(`[${HOOK_NAME}] Boulder continuation failed`, { sessionID, error: String(err) })
|
||||
}
|
||||
}
|
||||
state.promptFailureCount = 0
|
||||
|
||||
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
|
||||
} catch (err) {
|
||||
state.promptFailureCount += 1
|
||||
log(`[${HOOK_NAME}] Boulder continuation failed`, {
|
||||
sessionID,
|
||||
error: String(err),
|
||||
promptFailureCount: state.promptFailureCount,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
handler: async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
|
||||
@@ -541,6 +550,14 @@ export function createAtlasHook(
|
||||
return
|
||||
}
|
||||
|
||||
if (state.promptFailureCount >= 2) {
|
||||
log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, {
|
||||
sessionID,
|
||||
promptFailureCount: state.promptFailureCount,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const hasRunningBgTasks = backgroundManager
|
||||
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
||||
: false
|
||||
@@ -631,6 +648,17 @@ export function createAtlasHook(
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "session.compacted") {
|
||||
const sessionID = (props?.sessionID ?? (props?.info as { id?: string } | undefined)?.id) as
|
||||
| string
|
||||
| undefined
|
||||
if (sessionID) {
|
||||
sessions.delete(sessionID)
|
||||
log(`[${HOOK_NAME}] Session compacted: cleaned up`, { sessionID })
|
||||
}
|
||||
return
|
||||
}
|
||||
},
|
||||
|
||||
"tool.execute.before": async (
|
||||
|
||||
Reference in New Issue
Block a user