Merge pull request #1553 from code-yeongyu/fix/1355-atlas-continuation-guard

fix(atlas): stop continuation retry loop on repeated prompt failures
2026-02-06 18:57:32 +09:00
parent c6c149ebb8 aec5624122
commit e9a3d579b3
2 changed files with 177 additions and 15 deletions
--- a/src/hooks/atlas/index.test.ts
+++ b/src/hooks/atlas/index.test.ts
@@ -624,6 +624,11 @@ describe("atlas hook", () => {
  describe("session.idle handler (boulder continuation)", () => {
    const MAIN_SESSION_ID = "main-session-123"

+    async function flushMicrotasks(): Promise<void> {
+      await Promise.resolve()
+      await Promise.resolve()
+    }
+
     beforeEach(() => {
       mock.module("../../features/claude-code-session-state", () => ({
         getMainSessionID: () => MAIN_SESSION_ID,
@@ -965,6 +970,135 @@ describe("atlas hook", () => {
      expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
    })

+    test("should stop continuation after 2 consecutive prompt failures (issue #1355)", async () => {
+      //#given - boulder state with incomplete plan and prompt always fails
+      const planPath = join(TEST_DIR, "test-plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+      const state: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-02T10:00:00Z",
+        session_ids: [MAIN_SESSION_ID],
+        plan_name: "test-plan",
+      }
+      writeBoulderState(TEST_DIR, state)
+
+      const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
+      const mockInput = createMockPluginInput({ promptMock })
+      const hook = createAtlasHook(mockInput)
+
+      const originalDateNow = Date.now
+      let now = 0
+      Date.now = () => now
+
+      try {
+        //#when - idle fires repeatedly, past cooldown each time
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+
+        //#then - should attempt only twice, then disable continuation
+        expect(promptMock).toHaveBeenCalledTimes(2)
+      } finally {
+        Date.now = originalDateNow
+      }
+    })
+
+    test("should reset prompt failure counter on success and only stop after 2 consecutive failures", async () => {
+      //#given - boulder state with incomplete plan
+      const planPath = join(TEST_DIR, "test-plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+      const state: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-02T10:00:00Z",
+        session_ids: [MAIN_SESSION_ID],
+        plan_name: "test-plan",
+      }
+      writeBoulderState(TEST_DIR, state)
+
+      const promptMock = mock(() => Promise.resolve())
+      promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
+      promptMock.mockImplementationOnce(() => Promise.resolve())
+      promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
+      promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
+
+      const mockInput = createMockPluginInput({ promptMock })
+      const hook = createAtlasHook(mockInput)
+
+      const originalDateNow = Date.now
+      let now = 0
+      Date.now = () => now
+
+      try {
+        //#when - fail, succeed (reset), then fail twice (disable), then attempt again
+        for (let i = 0; i < 5; i++) {
+          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+          await flushMicrotasks()
+          now += 6000
+        }
+
+        //#then - 4 prompt attempts; 5th idle is skipped after 2 consecutive failures
+        expect(promptMock).toHaveBeenCalledTimes(4)
+      } finally {
+        Date.now = originalDateNow
+      }
+    })
+
+    test("should reset continuation failure state on session.compacted event", async () => {
+      //#given - boulder state with incomplete plan and prompt always fails
+      const planPath = join(TEST_DIR, "test-plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+      const state: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-02T10:00:00Z",
+        session_ids: [MAIN_SESSION_ID],
+        plan_name: "test-plan",
+      }
+      writeBoulderState(TEST_DIR, state)
+
+      const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
+      const mockInput = createMockPluginInput({ promptMock })
+      const hook = createAtlasHook(mockInput)
+
+      const originalDateNow = Date.now
+      let now = 0
+      Date.now = () => now
+
+      try {
+        //#when - two failures disables continuation, then compaction resets it
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+
+        await hook.handler({ event: { type: "session.compacted", properties: { sessionID: MAIN_SESSION_ID } } })
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+
+        //#then - 2 attempts + 1 after compaction (3 total)
+        expect(promptMock).toHaveBeenCalledTimes(3)
+      } finally {
+        Date.now = originalDateNow
+      }
+    })
+
    test("should cleanup on session.deleted", async () => {
      // given - boulder state
      const planPath = join(TEST_DIR, "test-plan.md")
--- a/src/hooks/atlas/index.ts
+++ b/src/hooks/atlas/index.ts
@@ -391,6 +391,7 @@ interface ToolExecuteAfterOutput {
 interface SessionState {
  lastEventWasAbortError?: boolean
  lastContinuationInjectedAt?: number
+  promptFailureCount: number
 }

 const CONTINUATION_COOLDOWN_MS = 5000
@@ -432,13 +433,14 @@ export function createAtlasHook(
  function getState(sessionID: string): SessionState {
    let state = sessions.get(sessionID)
    if (!state) {
-      state = {}
+      state = { promptFailureCount: 0 }
      sessions.set(sessionID, state)
    }
    return state
  }

  async function injectContinuation(sessionID: string, planName: string, remaining: number, total: number, agent?: string): Promise<void> {
+    const state = getState(sessionID)
    const hasRunningBgTasks = backgroundManager
      ? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
      : false
@@ -481,21 +483,28 @@ export function createAtlasHook(
          : undefined
      }

-       await ctx.client.session.prompt({
-         path: { id: sessionID },
-         body: {
-            agent: agent ?? "atlas",
-           ...(model !== undefined ? { model } : {}),
-           parts: [{ type: "text", text: prompt }],
-         },
-         query: { directory: ctx.directory },
-       })
+        await ctx.client.session.prompt({
+          path: { id: sessionID },
+          body: {
+             agent: agent ?? "atlas",
+            ...(model !== undefined ? { model } : {}),
+            parts: [{ type: "text", text: prompt }],
+          },
+          query: { directory: ctx.directory },
+        })

-      log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
-    } catch (err) {
-      log(`[${HOOK_NAME}] Boulder continuation failed`, { sessionID, error: String(err) })
-    }
-  }
+       state.promptFailureCount = 0
+
+       log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
+     } catch (err) {
+      state.promptFailureCount += 1
+      log(`[${HOOK_NAME}] Boulder continuation failed`, {
+        sessionID,
+        error: String(err),
+        promptFailureCount: state.promptFailureCount,
+      })
+     }
+   }

  return {
    handler: async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
@@ -541,6 +550,14 @@ export function createAtlasHook(
          return
        }

+        if (state.promptFailureCount >= 2) {
+          log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, {
+            sessionID,
+            promptFailureCount: state.promptFailureCount,
+          })
+          return
+        }
+
        const hasRunningBgTasks = backgroundManager
          ? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
          : false
@@ -631,6 +648,17 @@ export function createAtlasHook(
        }
        return
      }
+
+      if (event.type === "session.compacted") {
+        const sessionID = (props?.sessionID ?? (props?.info as { id?: string } | undefined)?.id) as
+          | string
+          | undefined
+        if (sessionID) {
+          sessions.delete(sessionID)
+          log(`[${HOOK_NAME}] Session compacted: cleaned up`, { sessionID })
+        }
+        return
+      }
    },

    "tool.execute.before": async (