fix(todo-continuation-enforcer): require boulder session for continuation

The todo-continuation-enforcer was firing boulder continuation in ALL main sessions with incomplete todos, regardless of whether /start-work was ever executed. This caused unwanted BOULDER CONTINUATION directives in sessions that never invoked /start-work. Changes: - Add readBoulderState check in idle-event.ts to verify session is registered in boulder.json's session_ids array - Change filter condition from main session check to boulder session check - Add 4 new test cases for boulder session gate behavior - Update all existing 41 tests to set up boulder state appropriately Now boulder continuation only fires when: 1. Session is in boulder.json's session_ids (/start-work was executed), OR 2. Session is a background task session (subagent) TDD cycle: - RED: 2 new tests failed as expected (no boulder check in implementation) - GREEN: Implementation added, all 41 tests pass - REFACTOR: Full test suite 2513 pass, typecheck & build clean
2026-02-10 15:39:55 +09:00
parent 563da9470d
commit f84ef532c1
2 changed files with 164 additions and 40 deletions
--- a/src/hooks/todo-continuation-enforcer/idle-event.ts
+++ b/src/hooks/todo-continuation-enforcer/idle-event.ts
@@ -1,7 +1,8 @@
 import type { PluginInput } from "@opencode-ai/plugin"

 import type { BackgroundManager } from "../../features/background-agent"
-import { getMainSessionID, subagentSessions } from "../../features/claude-code-session-state"
+import { readBoulderState } from "../../features/boulder-state"
+import { subagentSessions } from "../../features/claude-code-session-state"
 import type { ToolPermission } from "../../features/hook-message-injector"
 import { log } from "../../shared/logger"

@@ -35,12 +36,12 @@ export async function handleSessionIdle(args: {

  log(`[${HOOK_NAME}] session.idle`, { sessionID })

-  const mainSessionID = getMainSessionID()
-  const isMainSession = sessionID === mainSessionID
  const isBackgroundTaskSession = subagentSessions.has(sessionID)
+  const boulderState = readBoulderState(ctx.directory)
+  const isBoulderSession = boulderState?.session_ids.includes(sessionID) ?? false

-  if (mainSessionID && !isMainSession && !isBackgroundTaskSession) {
-    log(`[${HOOK_NAME}] Skipped: not main or background task session`, { sessionID })
+  if (!isBackgroundTaskSession && !isBoulderSession) {
+    log(`[${HOOK_NAME}] Skipped: not boulder or background task session`, { sessionID })
    return
  }

--- a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts
+++ b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts
@@ -1,3 +1,5 @@
+import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
+import { join } from "node:path"
 import { afterEach, beforeEach, describe, expect, test } from "bun:test"

 import type { BackgroundManager } from "../../features/background-agent"
@@ -119,6 +121,32 @@ function createFakeTimers(): FakeTimers {

 const wait = (ms: number) => new Promise<void>((resolve) => setTimeout(resolve, ms))

+const TEST_BOULDER_DIR = join("/tmp/test", ".sisyphus")
+const TEST_BOULDER_FILE = join(TEST_BOULDER_DIR, "boulder.json")
+
+function writeBoulderJsonForSession(sessionIds: string[]): void {
+  if (!existsSync(TEST_BOULDER_DIR)) {
+    mkdirSync(TEST_BOULDER_DIR, { recursive: true })
+  }
+  writeFileSync(TEST_BOULDER_FILE, JSON.stringify({
+    active_plan: "/tmp/test/.sisyphus/plans/test-plan.md",
+    started_at: new Date().toISOString(),
+    session_ids: sessionIds,
+    plan_name: "test-plan",
+  }), "utf-8")
+}
+
+function cleanupBoulderFile(): void {
+  if (existsSync(TEST_BOULDER_FILE)) {
+    rmSync(TEST_BOULDER_FILE)
+  }
+}
+
+function setupMainSessionWithBoulder(sessionID: string): void {
+  setMainSession(sessionID)
+  writeBoulderJsonForSession([sessionID])
+}
+
 describe("todo-continuation-enforcer", () => {
  let promptCalls: Array<{ sessionID: string; agent?: string; model?: { providerID?: string; modelID?: string }; text: string }>
  let toastCalls: Array<{ title: string; message: string }>
@@ -195,13 +223,14 @@ describe("todo-continuation-enforcer", () => {
  afterEach(() => {
    fakeTimers.restore()
    _resetForTesting()
+    cleanupBoulderFile()
  })

  test("should inject continuation when idle with incomplete todos", async () => {
    fakeTimers.restore()
    // given - main session with incomplete todos
    const sessionID = "main-123"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
      backgroundManager: createMockBackgroundManager(false),
@@ -226,7 +255,7 @@ describe("todo-continuation-enforcer", () => {
  test("should not inject when all todos are complete", async () => {
    // given - session with all todos complete
    const sessionID = "main-456"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const mockInput = createMockPluginInput()
    mockInput.client.session.todo = async () => ({ data: [
@@ -249,7 +278,7 @@ describe("todo-continuation-enforcer", () => {
  test("should not inject when background tasks are running", async () => {
    // given - session with running background tasks
    const sessionID = "main-789"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
      backgroundManager: createMockBackgroundManager(true),
@@ -309,7 +338,7 @@ describe("todo-continuation-enforcer", () => {
  test("should cancel countdown on user message after grace period", async () => {
    // given - session starting countdown
    const sessionID = "main-cancel"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -336,7 +365,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session starting countdown
    const sessionID = "main-grace"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -362,7 +391,7 @@ describe("todo-continuation-enforcer", () => {
  test("should cancel countdown on assistant activity", async () => {
    // given - session starting countdown
    const sessionID = "main-assistant"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -389,7 +418,7 @@ describe("todo-continuation-enforcer", () => {
  test("should cancel countdown on tool execution", async () => {
    // given - session starting countdown
    const sessionID = "main-tool"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -413,7 +442,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip injection during recovery mode", async () => {
    // given - session in recovery mode
    const sessionID = "main-recovery"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -435,7 +464,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session was in recovery, now complete
    const sessionID = "main-recovery-done"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -457,7 +486,7 @@ describe("todo-continuation-enforcer", () => {
  test("should cleanup on session deleted", async () => {
    // given - session starting countdown
    const sessionID = "main-delete"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -481,7 +510,7 @@ describe("todo-continuation-enforcer", () => {
  test("should accept skipAgents option without error", async () => {
    // given - session with skipAgents configured for Prometheus
    const sessionID = "main-prometheus-option"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    // when - create hook with skipAgents option (should not throw)
    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
@@ -501,7 +530,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with incomplete todos
    const sessionID = "main-toast"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -519,7 +548,7 @@ describe("todo-continuation-enforcer", () => {
  test("should not have 10s throttle between injections", async () => {
    // given - new hook instance (no prior state)
    const sessionID = "main-no-throttle"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -552,7 +581,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with incomplete todos
    const sessionID = "main-noabort-error"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -591,7 +620,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip injection when last assistant message has MessageAbortedError", async () => {
    // given - session where last assistant message was aborted
    const sessionID = "main-api-abort"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
@@ -615,7 +644,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session where last assistant message completed normally
    const sessionID = "main-api-no-error"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
@@ -639,7 +668,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session where last message is from user
    const sessionID = "main-api-user-last"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    mockMessages = [
      { info: { id: "msg-1", role: "assistant" } },
@@ -662,7 +691,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip when last assistant message has any abort-like error", async () => {
    // given - session where last assistant message has AbortError (DOMException style)
    const sessionID = "main-api-abort-dom"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
@@ -685,7 +714,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip injection when abort detected via session.error event (event-based, primary)", async () => {
    // given - session with incomplete todos
    const sessionID = "main-event-abort"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant" } },
@@ -715,7 +744,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip injection when AbortError detected via session.error event", async () => {
    // given - session with incomplete todos
    const sessionID = "main-event-abort-dom"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant" } },
@@ -746,7 +775,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with incomplete todos and old abort timestamp
    const sessionID = "main-stale-abort"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant" } },
@@ -779,7 +808,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with abort detected
    const sessionID = "main-clear-on-user"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant" } },
@@ -819,7 +848,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with abort detected
    const sessionID = "main-clear-on-assistant"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant" } },
@@ -858,7 +887,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with abort detected
    const sessionID = "main-clear-on-tool"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant" } },
@@ -896,7 +925,7 @@ describe("todo-continuation-enforcer", () => {
  test("should use event-based detection even when API indicates no abort (event wins)", async () => {
    // given - session with abort event but API shows no error
    const sessionID = "main-event-wins"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant" } },
@@ -926,7 +955,7 @@ describe("todo-continuation-enforcer", () => {
  test("should use API fallback when event is missed but API shows abort", async () => {
    // given - session where event was missed but API shows abort
    const sessionID = "main-api-fallback"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)
    mockMessages = [
      { info: { id: "msg-1", role: "user" } },
      { info: { id: "msg-2", role: "assistant", error: { name: "MessageAbortedError" } } },
@@ -949,7 +978,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with incomplete todos, no prior message context available
    const sessionID = "main-model-preserve"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
      backgroundManager: createMockBackgroundManager(false),
@@ -971,7 +1000,7 @@ describe("todo-continuation-enforcer", () => {
  test("should extract model from assistant message with flat modelID/providerID", async () => {
    // given - session with assistant message that has flat modelID/providerID (OpenCode API format)
    const sessionID = "main-assistant-model"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    // OpenCode returns assistant messages with flat modelID/providerID, not nested model object
    const mockMessagesWithAssistant = [
@@ -1032,7 +1061,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip compaction agent messages when resolving agent info", async () => {
    // given - session where last message is from compaction agent but previous was Sisyphus
    const sessionID = "main-compaction-filter"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const mockMessagesWithCompaction = [
      { info: { id: "msg-1", role: "user", agent: "sisyphus", model: { providerID: "anthropic", modelID: "claude-sonnet-4-5" } } },
@@ -1087,7 +1116,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip injection when only compaction agent messages exist", async () => {
    // given - session with only compaction agent (post-compaction, no prior agent info)
    const sessionID = "main-only-compaction"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const mockMessagesOnlyCompaction = [
      { info: { id: "msg-1", role: "assistant", agent: "compaction" } },
@@ -1140,7 +1169,7 @@ describe("todo-continuation-enforcer", () => {
  test("should skip injection when prometheus agent is after compaction", async () => {
    // given - prometheus session that was compacted
    const sessionID = "main-prometheus-compacted"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const mockMessagesPrometheusCompacted = [
      { info: { id: "msg-1", role: "user", agent: "prometheus" } },
@@ -1196,7 +1225,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with no agent info but skipAgents is empty
    const sessionID = "main-no-agent-no-skip"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const mockMessagesNoAgent = [
      { info: { id: "msg-1", role: "user" } },
@@ -1252,7 +1281,7 @@ describe("todo-continuation-enforcer", () => {
  test("should not inject when isContinuationStopped returns true", async () => {
    // given - session with continuation stopped
    const sessionID = "main-stopped"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
      isContinuationStopped: (id) => id === sessionID,
@@ -1273,7 +1302,7 @@ describe("todo-continuation-enforcer", () => {
    fakeTimers.restore()
    // given - session with continuation not stopped
    const sessionID = "main-not-stopped"
-    setMainSession(sessionID)
+    setupMainSessionWithBoulder(sessionID)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
      isContinuationStopped: () => false,
@@ -1294,7 +1323,7 @@ describe("todo-continuation-enforcer", () => {
    // given - multiple sessions with running countdowns
    const session1 = "main-cancel-all-1"
    const session2 = "main-cancel-all-2"
-    setMainSession(session1)
+    setupMainSessionWithBoulder(session1)

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

@@ -1313,4 +1342,98 @@ describe("todo-continuation-enforcer", () => {
    // then - no continuation injected (all countdowns cancelled)
    expect(promptCalls).toHaveLength(0)
  })
+
+  // ============================================================
+  // BOULDER SESSION GATE TESTS
+  // These tests verify that todo-continuation-enforcer only fires
+  // when the session is registered in boulder.json's session_ids
+  // (i.e., /start-work was executed in the session)
+  // ============================================================
+
+  test("should NOT inject for main session when session is NOT in boulder.json session_ids", async () => {
+    // given - main session that is NOT registered in boulder.json
+    const sessionID = "main-no-boulder-entry"
+    setMainSession(sessionID)
+    writeBoulderJsonForSession(["some-other-session"])
+
+    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
+      backgroundManager: createMockBackgroundManager(false),
+    })
+
+    // when - session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID } },
+    })
+
+    await fakeTimers.advanceBy(3000)
+
+    // then - no continuation injected (session not in boulder)
+    expect(promptCalls).toHaveLength(0)
+  })
+
+  test("should inject for main session when session IS in boulder.json session_ids", async () => {
+    fakeTimers.restore()
+    // given - main session that IS registered in boulder.json
+    const sessionID = "main-in-boulder"
+    setMainSession(sessionID)
+    writeBoulderJsonForSession([sessionID])
+
+    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
+      backgroundManager: createMockBackgroundManager(false),
+    })
+
+    // when - session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID } },
+    })
+
+    await wait(2500)
+
+    // then - continuation injected (session is in boulder)
+    expect(promptCalls.length).toBe(1)
+    expect(promptCalls[0].text).toContain("TODO CONTINUATION")
+  }, { timeout: 15000 })
+
+  test("should NOT inject for main session when no boulder.json exists", async () => {
+    // given - main session with no boulder.json at all
+    const sessionID = "main-no-boulder-file"
+    setMainSession(sessionID)
+    cleanupBoulderFile()
+
+    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {
+      backgroundManager: createMockBackgroundManager(false),
+    })
+
+    // when - session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID } },
+    })
+
+    await fakeTimers.advanceBy(3000)
+
+    // then - no continuation injected (no boulder state)
+    expect(promptCalls).toHaveLength(0)
+  })
+
+  test("should still inject for background task session regardless of boulder state", async () => {
+    fakeTimers.restore()
+    // given - background task session with no boulder entry
+    setMainSession("main-session")
+    const bgTaskSession = "bg-task-boulder-test"
+    subagentSessions.add(bgTaskSession)
+    cleanupBoulderFile()
+
+    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})
+
+    // when - background task session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID: bgTaskSession } },
+    })
+
+    await wait(2500)
+
+    // then - continuation still injected (background tasks bypass boulder check)
+    expect(promptCalls.length).toBe(1)
+    expect(promptCalls[0].sessionID).toBe(bgTaskSession)
+  }, { timeout: 15000 })
 })