Merge pull request #1553 from code-yeongyu/fix/1355-atlas-continuation-guard

fix(atlas): stop continuation retry loop on repeated prompt failures
This commit is contained in:
YeonGyu-Kim
2026-02-06 18:57:32 +09:00
committed by GitHub
2 changed files with 177 additions and 15 deletions

View File

@@ -624,6 +624,11 @@ describe("atlas hook", () => {
describe("session.idle handler (boulder continuation)", () => {
const MAIN_SESSION_ID = "main-session-123"
async function flushMicrotasks(): Promise<void> {
await Promise.resolve()
await Promise.resolve()
}
beforeEach(() => {
mock.module("../../features/claude-code-session-state", () => ({
getMainSessionID: () => MAIN_SESSION_ID,
@@ -965,6 +970,135 @@ describe("atlas hook", () => {
expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
})
test("should stop continuation after 2 consecutive prompt failures (issue #1355)", async () => {
//#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - idle fires repeatedly, past cooldown each time
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - should attempt only twice, then disable continuation
expect(promptMock).toHaveBeenCalledTimes(2)
} finally {
Date.now = originalDateNow
}
})
test("should reset prompt failure counter on success and only stop after 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock(() => Promise.resolve())
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.resolve())
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - fail, succeed (reset), then fail twice (disable), then attempt again
for (let i = 0; i < 5; i++) {
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
}
//#then - 4 prompt attempts; 5th idle is skipped after 2 consecutive failures
expect(promptMock).toHaveBeenCalledTimes(4)
} finally {
Date.now = originalDateNow
}
})
test("should reset continuation failure state on session.compacted event", async () => {
//#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - two failures disables continuation, then compaction resets it
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
await hook.handler({ event: { type: "session.compacted", properties: { sessionID: MAIN_SESSION_ID } } })
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - 2 attempts + 1 after compaction (3 total)
expect(promptMock).toHaveBeenCalledTimes(3)
} finally {
Date.now = originalDateNow
}
})
test("should cleanup on session.deleted", async () => {
// given - boulder state
const planPath = join(TEST_DIR, "test-plan.md")

View File

@@ -391,6 +391,7 @@ interface ToolExecuteAfterOutput {
interface SessionState {
lastEventWasAbortError?: boolean
lastContinuationInjectedAt?: number
promptFailureCount: number
}
const CONTINUATION_COOLDOWN_MS = 5000
@@ -432,13 +433,14 @@ export function createAtlasHook(
function getState(sessionID: string): SessionState {
let state = sessions.get(sessionID)
if (!state) {
state = {}
state = { promptFailureCount: 0 }
sessions.set(sessionID, state)
}
return state
}
async function injectContinuation(sessionID: string, planName: string, remaining: number, total: number, agent?: string): Promise<void> {
const state = getState(sessionID)
const hasRunningBgTasks = backgroundManager
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
: false
@@ -481,21 +483,28 @@ export function createAtlasHook(
: undefined
}
await ctx.client.session.prompt({
path: { id: sessionID },
body: {
agent: agent ?? "atlas",
...(model !== undefined ? { model } : {}),
parts: [{ type: "text", text: prompt }],
},
query: { directory: ctx.directory },
})
await ctx.client.session.prompt({
path: { id: sessionID },
body: {
agent: agent ?? "atlas",
...(model !== undefined ? { model } : {}),
parts: [{ type: "text", text: prompt }],
},
query: { directory: ctx.directory },
})
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
} catch (err) {
log(`[${HOOK_NAME}] Boulder continuation failed`, { sessionID, error: String(err) })
}
}
state.promptFailureCount = 0
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
} catch (err) {
state.promptFailureCount += 1
log(`[${HOOK_NAME}] Boulder continuation failed`, {
sessionID,
error: String(err),
promptFailureCount: state.promptFailureCount,
})
}
}
return {
handler: async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
@@ -541,6 +550,14 @@ export function createAtlasHook(
return
}
if (state.promptFailureCount >= 2) {
log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, {
sessionID,
promptFailureCount: state.promptFailureCount,
})
return
}
const hasRunningBgTasks = backgroundManager
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
: false
@@ -631,6 +648,17 @@ export function createAtlasHook(
}
return
}
if (event.type === "session.compacted") {
const sessionID = (props?.sessionID ?? (props?.info as { id?: string } | undefined)?.id) as
| string
| undefined
if (sessionID) {
sessions.delete(sessionID)
log(`[${HOOK_NAME}] Session compacted: cleaned up`, { sessionID })
}
return
}
},
"tool.execute.before": async (