From f84ef532c157691c034e7773845a5c53ef73dc6a Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 15:39:55 +0900 Subject: [PATCH] fix(todo-continuation-enforcer): require boulder session for continuation The todo-continuation-enforcer was firing boulder continuation in ALL main sessions with incomplete todos, regardless of whether /start-work was ever executed. This caused unwanted BOULDER CONTINUATION directives in sessions that never invoked /start-work. Changes: - Add readBoulderState check in idle-event.ts to verify session is registered in boulder.json's session_ids array - Change filter condition from main session check to boulder session check - Add 4 new test cases for boulder session gate behavior - Update all existing 41 tests to set up boulder state appropriately Now boulder continuation only fires when: 1. Session is in boulder.json's session_ids (/start-work was executed), OR 2. Session is a background task session (subagent) TDD cycle: - RED: 2 new tests failed as expected (no boulder check in implementation) - GREEN: Implementation added, all 41 tests pass - REFACTOR: Full test suite 2513 pass, typecheck & build clean --- .../todo-continuation-enforcer/idle-event.ts | 11 +- .../todo-continuation-enforcer.test.ts | 193 ++++++++++++++---- 2 files changed, 164 insertions(+), 40 deletions(-) diff --git a/src/hooks/todo-continuation-enforcer/idle-event.ts b/src/hooks/todo-continuation-enforcer/idle-event.ts index 62a945872..ff8ab1e57 100644 --- a/src/hooks/todo-continuation-enforcer/idle-event.ts +++ b/src/hooks/todo-continuation-enforcer/idle-event.ts @@ -1,7 +1,8 @@ import type { PluginInput } from "@opencode-ai/plugin" import type { BackgroundManager } from "../../features/background-agent" -import { getMainSessionID, subagentSessions } from "../../features/claude-code-session-state" +import { readBoulderState } from "../../features/boulder-state" +import { subagentSessions } from "../../features/claude-code-session-state" import type { ToolPermission } from "../../features/hook-message-injector" import { log } from "../../shared/logger" @@ -35,12 +36,12 @@ export async function handleSessionIdle(args: { log(`[${HOOK_NAME}] session.idle`, { sessionID }) - const mainSessionID = getMainSessionID() - const isMainSession = sessionID === mainSessionID const isBackgroundTaskSession = subagentSessions.has(sessionID) + const boulderState = readBoulderState(ctx.directory) + const isBoulderSession = boulderState?.session_ids.includes(sessionID) ?? false - if (mainSessionID && !isMainSession && !isBackgroundTaskSession) { - log(`[${HOOK_NAME}] Skipped: not main or background task session`, { sessionID }) + if (!isBackgroundTaskSession && !isBoulderSession) { + log(`[${HOOK_NAME}] Skipped: not boulder or background task session`, { sessionID }) return } diff --git a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts index ba8ba212a..760165dd7 100644 --- a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts +++ b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts @@ -1,3 +1,5 @@ +import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs" +import { join } from "node:path" import { afterEach, beforeEach, describe, expect, test } from "bun:test" import type { BackgroundManager } from "../../features/background-agent" @@ -119,6 +121,32 @@ function createFakeTimers(): FakeTimers { const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) +const TEST_BOULDER_DIR = join("/tmp/test", ".sisyphus") +const TEST_BOULDER_FILE = join(TEST_BOULDER_DIR, "boulder.json") + +function writeBoulderJsonForSession(sessionIds: string[]): void { + if (!existsSync(TEST_BOULDER_DIR)) { + mkdirSync(TEST_BOULDER_DIR, { recursive: true }) + } + writeFileSync(TEST_BOULDER_FILE, JSON.stringify({ + active_plan: "/tmp/test/.sisyphus/plans/test-plan.md", + started_at: new Date().toISOString(), + session_ids: sessionIds, + plan_name: "test-plan", + }), "utf-8") +} + +function cleanupBoulderFile(): void { + if (existsSync(TEST_BOULDER_FILE)) { + rmSync(TEST_BOULDER_FILE) + } +} + +function setupMainSessionWithBoulder(sessionID: string): void { + setMainSession(sessionID) + writeBoulderJsonForSession([sessionID]) +} + describe("todo-continuation-enforcer", () => { let promptCalls: Array<{ sessionID: string; agent?: string; model?: { providerID?: string; modelID?: string }; text: string }> let toastCalls: Array<{ title: string; message: string }> @@ -195,13 +223,14 @@ describe("todo-continuation-enforcer", () => { afterEach(() => { fakeTimers.restore() _resetForTesting() + cleanupBoulderFile() }) test("should inject continuation when idle with incomplete todos", async () => { fakeTimers.restore() // given - main session with incomplete todos const sessionID = "main-123" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), { backgroundManager: createMockBackgroundManager(false), @@ -226,7 +255,7 @@ describe("todo-continuation-enforcer", () => { test("should not inject when all todos are complete", async () => { // given - session with all todos complete const sessionID = "main-456" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const mockInput = createMockPluginInput() mockInput.client.session.todo = async () => ({ data: [ @@ -249,7 +278,7 @@ describe("todo-continuation-enforcer", () => { test("should not inject when background tasks are running", async () => { // given - session with running background tasks const sessionID = "main-789" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), { backgroundManager: createMockBackgroundManager(true), @@ -309,7 +338,7 @@ describe("todo-continuation-enforcer", () => { test("should cancel countdown on user message after grace period", async () => { // given - session starting countdown const sessionID = "main-cancel" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -336,7 +365,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session starting countdown const sessionID = "main-grace" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -362,7 +391,7 @@ describe("todo-continuation-enforcer", () => { test("should cancel countdown on assistant activity", async () => { // given - session starting countdown const sessionID = "main-assistant" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -389,7 +418,7 @@ describe("todo-continuation-enforcer", () => { test("should cancel countdown on tool execution", async () => { // given - session starting countdown const sessionID = "main-tool" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -413,7 +442,7 @@ describe("todo-continuation-enforcer", () => { test("should skip injection during recovery mode", async () => { // given - session in recovery mode const sessionID = "main-recovery" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -435,7 +464,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session was in recovery, now complete const sessionID = "main-recovery-done" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -457,7 +486,7 @@ describe("todo-continuation-enforcer", () => { test("should cleanup on session deleted", async () => { // given - session starting countdown const sessionID = "main-delete" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -481,7 +510,7 @@ describe("todo-continuation-enforcer", () => { test("should accept skipAgents option without error", async () => { // given - session with skipAgents configured for Prometheus const sessionID = "main-prometheus-option" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) // when - create hook with skipAgents option (should not throw) const hook = createTodoContinuationEnforcer(createMockPluginInput(), { @@ -501,7 +530,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with incomplete todos const sessionID = "main-toast" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -519,7 +548,7 @@ describe("todo-continuation-enforcer", () => { test("should not have 10s throttle between injections", async () => { // given - new hook instance (no prior state) const sessionID = "main-no-throttle" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -552,7 +581,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with incomplete todos const sessionID = "main-noabort-error" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -591,7 +620,7 @@ describe("todo-continuation-enforcer", () => { test("should skip injection when last assistant message has MessageAbortedError", async () => { // given - session where last assistant message was aborted const sessionID = "main-api-abort" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, @@ -615,7 +644,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session where last assistant message completed normally const sessionID = "main-api-no-error" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, @@ -639,7 +668,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session where last message is from user const sessionID = "main-api-user-last" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "assistant" } }, @@ -662,7 +691,7 @@ describe("todo-continuation-enforcer", () => { test("should skip when last assistant message has any abort-like error", async () => { // given - session where last assistant message has AbortError (DOMException style) const sessionID = "main-api-abort-dom" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, @@ -685,7 +714,7 @@ describe("todo-continuation-enforcer", () => { test("should skip injection when abort detected via session.error event (event-based, primary)", async () => { // given - session with incomplete todos const sessionID = "main-event-abort" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant" } }, @@ -715,7 +744,7 @@ describe("todo-continuation-enforcer", () => { test("should skip injection when AbortError detected via session.error event", async () => { // given - session with incomplete todos const sessionID = "main-event-abort-dom" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant" } }, @@ -746,7 +775,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with incomplete todos and old abort timestamp const sessionID = "main-stale-abort" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant" } }, @@ -779,7 +808,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with abort detected const sessionID = "main-clear-on-user" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant" } }, @@ -819,7 +848,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with abort detected const sessionID = "main-clear-on-assistant" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant" } }, @@ -858,7 +887,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with abort detected const sessionID = "main-clear-on-tool" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant" } }, @@ -896,7 +925,7 @@ describe("todo-continuation-enforcer", () => { test("should use event-based detection even when API indicates no abort (event wins)", async () => { // given - session with abort event but API shows no error const sessionID = "main-event-wins" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant" } }, @@ -926,7 +955,7 @@ describe("todo-continuation-enforcer", () => { test("should use API fallback when event is missed but API shows abort", async () => { // given - session where event was missed but API shows abort const sessionID = "main-api-fallback" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) mockMessages = [ { info: { id: "msg-1", role: "user" } }, { info: { id: "msg-2", role: "assistant", error: { name: "MessageAbortedError" } } }, @@ -949,7 +978,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with incomplete todos, no prior message context available const sessionID = "main-model-preserve" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), { backgroundManager: createMockBackgroundManager(false), @@ -971,7 +1000,7 @@ describe("todo-continuation-enforcer", () => { test("should extract model from assistant message with flat modelID/providerID", async () => { // given - session with assistant message that has flat modelID/providerID (OpenCode API format) const sessionID = "main-assistant-model" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) // OpenCode returns assistant messages with flat modelID/providerID, not nested model object const mockMessagesWithAssistant = [ @@ -1032,7 +1061,7 @@ describe("todo-continuation-enforcer", () => { test("should skip compaction agent messages when resolving agent info", async () => { // given - session where last message is from compaction agent but previous was Sisyphus const sessionID = "main-compaction-filter" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const mockMessagesWithCompaction = [ { info: { id: "msg-1", role: "user", agent: "sisyphus", model: { providerID: "anthropic", modelID: "claude-sonnet-4-5" } } }, @@ -1087,7 +1116,7 @@ describe("todo-continuation-enforcer", () => { test("should skip injection when only compaction agent messages exist", async () => { // given - session with only compaction agent (post-compaction, no prior agent info) const sessionID = "main-only-compaction" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const mockMessagesOnlyCompaction = [ { info: { id: "msg-1", role: "assistant", agent: "compaction" } }, @@ -1140,7 +1169,7 @@ describe("todo-continuation-enforcer", () => { test("should skip injection when prometheus agent is after compaction", async () => { // given - prometheus session that was compacted const sessionID = "main-prometheus-compacted" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const mockMessagesPrometheusCompacted = [ { info: { id: "msg-1", role: "user", agent: "prometheus" } }, @@ -1196,7 +1225,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with no agent info but skipAgents is empty const sessionID = "main-no-agent-no-skip" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const mockMessagesNoAgent = [ { info: { id: "msg-1", role: "user" } }, @@ -1252,7 +1281,7 @@ describe("todo-continuation-enforcer", () => { test("should not inject when isContinuationStopped returns true", async () => { // given - session with continuation stopped const sessionID = "main-stopped" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), { isContinuationStopped: (id) => id === sessionID, @@ -1273,7 +1302,7 @@ describe("todo-continuation-enforcer", () => { fakeTimers.restore() // given - session with continuation not stopped const sessionID = "main-not-stopped" - setMainSession(sessionID) + setupMainSessionWithBoulder(sessionID) const hook = createTodoContinuationEnforcer(createMockPluginInput(), { isContinuationStopped: () => false, @@ -1294,7 +1323,7 @@ describe("todo-continuation-enforcer", () => { // given - multiple sessions with running countdowns const session1 = "main-cancel-all-1" const session2 = "main-cancel-all-2" - setMainSession(session1) + setupMainSessionWithBoulder(session1) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) @@ -1313,4 +1342,98 @@ describe("todo-continuation-enforcer", () => { // then - no continuation injected (all countdowns cancelled) expect(promptCalls).toHaveLength(0) }) + + // ============================================================ + // BOULDER SESSION GATE TESTS + // These tests verify that todo-continuation-enforcer only fires + // when the session is registered in boulder.json's session_ids + // (i.e., /start-work was executed in the session) + // ============================================================ + + test("should NOT inject for main session when session is NOT in boulder.json session_ids", async () => { + // given - main session that is NOT registered in boulder.json + const sessionID = "main-no-boulder-entry" + setMainSession(sessionID) + writeBoulderJsonForSession(["some-other-session"]) + + const hook = createTodoContinuationEnforcer(createMockPluginInput(), { + backgroundManager: createMockBackgroundManager(false), + }) + + // when - session goes idle + await hook.handler({ + event: { type: "session.idle", properties: { sessionID } }, + }) + + await fakeTimers.advanceBy(3000) + + // then - no continuation injected (session not in boulder) + expect(promptCalls).toHaveLength(0) + }) + + test("should inject for main session when session IS in boulder.json session_ids", async () => { + fakeTimers.restore() + // given - main session that IS registered in boulder.json + const sessionID = "main-in-boulder" + setMainSession(sessionID) + writeBoulderJsonForSession([sessionID]) + + const hook = createTodoContinuationEnforcer(createMockPluginInput(), { + backgroundManager: createMockBackgroundManager(false), + }) + + // when - session goes idle + await hook.handler({ + event: { type: "session.idle", properties: { sessionID } }, + }) + + await wait(2500) + + // then - continuation injected (session is in boulder) + expect(promptCalls.length).toBe(1) + expect(promptCalls[0].text).toContain("TODO CONTINUATION") + }, { timeout: 15000 }) + + test("should NOT inject for main session when no boulder.json exists", async () => { + // given - main session with no boulder.json at all + const sessionID = "main-no-boulder-file" + setMainSession(sessionID) + cleanupBoulderFile() + + const hook = createTodoContinuationEnforcer(createMockPluginInput(), { + backgroundManager: createMockBackgroundManager(false), + }) + + // when - session goes idle + await hook.handler({ + event: { type: "session.idle", properties: { sessionID } }, + }) + + await fakeTimers.advanceBy(3000) + + // then - no continuation injected (no boulder state) + expect(promptCalls).toHaveLength(0) + }) + + test("should still inject for background task session regardless of boulder state", async () => { + fakeTimers.restore() + // given - background task session with no boulder entry + setMainSession("main-session") + const bgTaskSession = "bg-task-boulder-test" + subagentSessions.add(bgTaskSession) + cleanupBoulderFile() + + const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) + + // when - background task session goes idle + await hook.handler({ + event: { type: "session.idle", properties: { sessionID: bgTaskSession } }, + }) + + await wait(2500) + + // then - continuation still injected (background tasks bypass boulder check) + expect(promptCalls.length).toBe(1) + expect(promptCalls[0].sessionID).toBe(bgTaskSession) + }, { timeout: 15000 }) })