From 5c6194372ef4aa1e6dd3c945f331f541b0d06d88 Mon Sep 17 00:00:00 2001 From: HaD0Yun <102889891+HaD0Yun@users.noreply.github.com> Date: Tue, 17 Mar 2026 17:25:46 +0900 Subject: [PATCH 1/3] feat(atlas): persist preferred task session reuse --- src/features/boulder-state/index.ts | 1 + src/features/boulder-state/storage.test.ts | 130 +++++++++++ src/features/boulder-state/storage.ts | 49 ++++- src/features/boulder-state/top-level-task.ts | 77 +++++++ src/features/boulder-state/types.ts | 30 +++ src/hooks/atlas/atlas-hook.ts | 5 +- .../atlas/boulder-continuation-injector.ts | 8 + src/hooks/atlas/idle-event.ts | 17 +- src/hooks/atlas/index.test.ts | 208 +++++++++++++++++- src/hooks/atlas/subagent-session-id.test.ts | 53 +++++ src/hooks/atlas/subagent-session-id.ts | 12 +- src/hooks/atlas/tool-execute-after.ts | 54 ++++- src/hooks/atlas/tool-execute-before.ts | 23 +- src/hooks/atlas/verification-reminders.ts | 16 +- 14 files changed, 665 insertions(+), 18 deletions(-) create mode 100644 src/features/boulder-state/top-level-task.ts create mode 100644 src/hooks/atlas/subagent-session-id.test.ts diff --git a/src/features/boulder-state/index.ts b/src/features/boulder-state/index.ts index f404e4e0e..17618996b 100644 --- a/src/features/boulder-state/index.ts +++ b/src/features/boulder-state/index.ts @@ -1,3 +1,4 @@ export * from "./types" export * from "./constants" export * from "./storage" +export * from "./top-level-task" diff --git a/src/features/boulder-state/storage.test.ts b/src/features/boulder-state/storage.test.ts index e52174cef..a8740662d 100644 --- a/src/features/boulder-state/storage.test.ts +++ b/src/features/boulder-state/storage.test.ts @@ -11,8 +11,11 @@ import { getPlanName, createBoulderState, findPrometheusPlans, + getTaskSessionState, + upsertTaskSessionState, } from "./storage" import type { BoulderState } from "./types" +import { readCurrentTopLevelTask } from "./top-level-task" describe("boulder-state", () => { const TEST_DIR = join(tmpdir(), "boulder-state-test-" + Date.now()) @@ -134,6 +137,24 @@ describe("boulder-state", () => { expect(result?.session_ids).toEqual(["session-1", "session-2"]) expect(result?.plan_name).toBe("my-plan") }) + + test("should default task_sessions to empty object when missing from JSON", () => { + // given - boulder.json without task_sessions field + const boulderFile = join(SISYPHUS_DIR, "boulder.json") + writeFileSync(boulderFile, JSON.stringify({ + active_plan: "/path/to/plan.md", + started_at: "2026-01-01T00:00:00Z", + session_ids: ["session-1"], + plan_name: "plan", + })) + + // when + const result = readBoulderState(TEST_DIR) + + // then + expect(result).not.toBeNull() + expect(result!.task_sessions).toEqual({}) + }) }) describe("writeBoulderState", () => { @@ -249,6 +270,115 @@ describe("boulder-state", () => { }) }) + describe("task session state", () => { + test("should persist and read preferred session for a top-level plan task", () => { + // given - existing boulder state + const state: BoulderState = { + active_plan: "/plan.md", + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "plan", + } + writeBoulderState(TEST_DIR, state) + + // when + upsertTaskSessionState(TEST_DIR, { + taskKey: "todo:1", + taskLabel: "1", + taskTitle: "Implement auth flow", + sessionId: "ses_task_123", + agent: "sisyphus-junior", + category: "deep", + }) + const result = getTaskSessionState(TEST_DIR, "todo:1") + + // then + expect(result).not.toBeNull() + expect(result?.session_id).toBe("ses_task_123") + expect(result?.task_title).toBe("Implement auth flow") + expect(result?.agent).toBe("sisyphus-junior") + expect(result?.category).toBe("deep") + }) + + test("should overwrite preferred session for the same top-level plan task", () => { + // given - existing boulder state with prior preferred session + const state: BoulderState = { + active_plan: "/plan.md", + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "plan", + task_sessions: { + "todo:1": { + task_key: "todo:1", + task_label: "1", + task_title: "Implement auth flow", + session_id: "ses_old", + updated_at: "2026-01-02T10:00:00Z", + }, + }, + } + writeBoulderState(TEST_DIR, state) + + // when + upsertTaskSessionState(TEST_DIR, { + taskKey: "todo:1", + taskLabel: "1", + taskTitle: "Implement auth flow", + sessionId: "ses_new", + }) + const result = getTaskSessionState(TEST_DIR, "todo:1") + + // then + expect(result?.session_id).toBe("ses_new") + }) + }) + + describe("readCurrentTopLevelTask", () => { + test("should return the first unchecked top-level task in TODOs", () => { + // given - plan with nested and top-level unchecked tasks + const planPath = join(TEST_DIR, "current-task-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Finished task + - [ ] nested acceptance checkbox +- [ ] 2. Current task + +## Final Verification Wave +- [ ] F1. Final review +`) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).not.toBeNull() + expect(result?.key).toBe("todo:2") + expect(result?.title).toBe("Current task") + }) + + test("should fall back to final-wave task when implementation tasks are complete", () => { + // given - plan with only final-wave work remaining + const planPath = join(TEST_DIR, "final-wave-current-task-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Finished task + +## Final Verification Wave +- [ ] F1. Final review +`) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).not.toBeNull() + expect(result?.key).toBe("final-wave:f1") + expect(result?.title).toBe("Final review") + }) + }) + describe("getPlanProgress", () => { test("should count completed and uncompleted checkboxes", () => { // given - plan file with checkboxes diff --git a/src/features/boulder-state/storage.ts b/src/features/boulder-state/storage.ts index ab84368b7..36286a745 100644 --- a/src/features/boulder-state/storage.ts +++ b/src/features/boulder-state/storage.ts @@ -6,7 +6,7 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs" import { dirname, join, basename } from "node:path" -import type { BoulderState, PlanProgress } from "./types" +import type { BoulderState, PlanProgress, TaskSessionState } from "./types" import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants" export function getBoulderFilePath(directory: string): string { @@ -29,6 +29,9 @@ export function readBoulderState(directory: string): BoulderState | null { if (!Array.isArray(parsed.session_ids)) { parsed.session_ids = [] } + if (!parsed.task_sessions || typeof parsed.task_sessions !== "object" || Array.isArray(parsed.task_sessions)) { + parsed.task_sessions = {} + } return parsed as BoulderState } catch { return null @@ -82,6 +85,50 @@ export function clearBoulderState(directory: string): boolean { } } +export function getTaskSessionState(directory: string, taskKey: string): TaskSessionState | null { + const state = readBoulderState(directory) + if (!state?.task_sessions) { + return null + } + + return state.task_sessions[taskKey] ?? null +} + +export function upsertTaskSessionState( + directory: string, + input: { + taskKey: string + taskLabel: string + taskTitle: string + sessionId: string + agent?: string + category?: string + }, +): BoulderState | null { + const state = readBoulderState(directory) + if (!state) { + return null + } + + const taskSessions = state.task_sessions ?? {} + taskSessions[input.taskKey] = { + task_key: input.taskKey, + task_label: input.taskLabel, + task_title: input.taskTitle, + session_id: input.sessionId, + ...(input.agent !== undefined ? { agent: input.agent } : {}), + ...(input.category !== undefined ? { category: input.category } : {}), + updated_at: new Date().toISOString(), + } + + state.task_sessions = taskSessions + if (writeBoulderState(directory, state)) { + return state + } + + return null +} + /** * Find Prometheus plan files for this project. * Prometheus stores plans at: {project}/.sisyphus/plans/{name}.md diff --git a/src/features/boulder-state/top-level-task.ts b/src/features/boulder-state/top-level-task.ts new file mode 100644 index 000000000..d92970b56 --- /dev/null +++ b/src/features/boulder-state/top-level-task.ts @@ -0,0 +1,77 @@ +import { existsSync, readFileSync } from "node:fs" + +import type { TopLevelTaskRef } from "./types" + +const TODO_HEADING_PATTERN = /^##\s+TODOs\b/i +const FINAL_VERIFICATION_HEADING_PATTERN = /^##\s+Final Verification Wave\b/i +const SECOND_LEVEL_HEADING_PATTERN = /^##\s+/ +const UNCHECKED_CHECKBOX_PATTERN = /^(\s*)[-*]\s*\[\s*\]\s*(.+)$/ +const TODO_TASK_PATTERN = /^(\d+)\.\s+(.+)$/ +const FINAL_WAVE_TASK_PATTERN = /^(F\d+)\.\s+(.+)$/i + +type PlanSection = "todo" | "final-wave" | "other" + +function buildTaskRef( + section: "todo" | "final-wave", + taskLabel: string, +): TopLevelTaskRef | null { + const pattern = section === "todo" ? TODO_TASK_PATTERN : FINAL_WAVE_TASK_PATTERN + const match = taskLabel.match(pattern) + if (!match) { + return null + } + + const rawLabel = match[1] + const title = match[2].trim() + + return { + key: `${section}:${rawLabel.toLowerCase()}`, + section, + label: rawLabel, + title, + } +} + +export function readCurrentTopLevelTask(planPath: string): TopLevelTaskRef | null { + if (!existsSync(planPath)) { + return null + } + + try { + const content = readFileSync(planPath, "utf-8") + const lines = content.split(/\r?\n/) + let section: PlanSection = "other" + + for (const line of lines) { + if (SECOND_LEVEL_HEADING_PATTERN.test(line)) { + section = TODO_HEADING_PATTERN.test(line) + ? "todo" + : FINAL_VERIFICATION_HEADING_PATTERN.test(line) + ? "final-wave" + : "other" + } + + const uncheckedTaskMatch = line.match(UNCHECKED_CHECKBOX_PATTERN) + if (!uncheckedTaskMatch) { + continue + } + + if (uncheckedTaskMatch[1].length > 0) { + continue + } + + if (section !== "todo" && section !== "final-wave") { + continue + } + + const taskRef = buildTaskRef(section, uncheckedTaskMatch[2].trim()) + if (taskRef) { + return taskRef + } + } + + return null + } catch { + return null + } +} diff --git a/src/features/boulder-state/types.ts b/src/features/boulder-state/types.ts index b1a225380..ba488f381 100644 --- a/src/features/boulder-state/types.ts +++ b/src/features/boulder-state/types.ts @@ -18,6 +18,8 @@ export interface BoulderState { agent?: string /** Absolute path to the git worktree root where work happens */ worktree_path?: string + /** Preferred reusable subagent sessions keyed by current top-level plan task */ + task_sessions?: Record } export interface PlanProgress { @@ -28,3 +30,31 @@ export interface PlanProgress { /** Whether all tasks are done */ isComplete: boolean } + +export interface TaskSessionState { + /** Stable identifier for the current top-level plan task (e.g. todo:1 / final-wave:F1) */ + task_key: string + /** Original task label from the plan file */ + task_label: string + /** Full task title from the plan file */ + task_title: string + /** Preferred reusable subagent session */ + session_id: string + /** Agent associated with the task session, when known */ + agent?: string + /** Category associated with the task session, when known */ + category?: string + /** Last update timestamp */ + updated_at: string +} + +export interface TopLevelTaskRef { + /** Stable identifier for the current top-level plan task */ + key: string + /** Task section in the Prometheus plan */ + section: "todo" | "final-wave" + /** Original label token (e.g. 1 / F1) */ + label: string + /** Full task title extracted from the checkbox line */ + title: string +} diff --git a/src/hooks/atlas/atlas-hook.ts b/src/hooks/atlas/atlas-hook.ts index 855cdacb6..a814cfe2c 100644 --- a/src/hooks/atlas/atlas-hook.ts +++ b/src/hooks/atlas/atlas-hook.ts @@ -7,6 +7,7 @@ import type { AtlasHookOptions, SessionState } from "./types" export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) { const sessions = new Map() const pendingFilePaths = new Map() + const pendingTaskRefs = new Map() const autoCommit = options?.autoCommit ?? true function getState(sessionID: string): SessionState { @@ -20,7 +21,7 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) { return { handler: createAtlasEventHandler({ ctx, options, sessions, getState }), - "tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }), - "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit, getState }), + "tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths, pendingTaskRefs }), + "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState }), } } diff --git a/src/hooks/atlas/boulder-continuation-injector.ts b/src/hooks/atlas/boulder-continuation-injector.ts index 4f8e35802..2e01e9ab7 100644 --- a/src/hooks/atlas/boulder-continuation-injector.ts +++ b/src/hooks/atlas/boulder-continuation-injector.ts @@ -15,6 +15,8 @@ export async function injectBoulderContinuation(input: { total: number agent?: string worktreePath?: string + preferredTaskSessionId?: string + preferredTaskTitle?: string backgroundManager?: BackgroundManager sessionState: SessionState }): Promise { @@ -26,6 +28,8 @@ export async function injectBoulderContinuation(input: { total, agent, worktreePath, + preferredTaskSessionId, + preferredTaskTitle, backgroundManager, sessionState, } = input @@ -40,9 +44,13 @@ export async function injectBoulderContinuation(input: { } const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : "" + const preferredSessionContext = preferredTaskSessionId + ? `\n\n[Preferred reuse session for current top-level plan task${preferredTaskTitle ? `: ${preferredTaskTitle}` : ""}: ${preferredTaskSessionId}]` + : "" const prompt = BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) + `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` + + preferredSessionContext + worktreeContext try { diff --git a/src/hooks/atlas/idle-event.ts b/src/hooks/atlas/idle-event.ts index 1f5cfeb2c..2d64c8eed 100644 --- a/src/hooks/atlas/idle-event.ts +++ b/src/hooks/atlas/idle-event.ts @@ -1,5 +1,10 @@ import type { PluginInput } from "@opencode-ai/plugin" -import { getPlanProgress, readBoulderState } from "../../features/boulder-state" +import { + getPlanProgress, + getTaskSessionState, + readBoulderState, + readCurrentTopLevelTask, +} from "../../features/boulder-state" import { log } from "../../shared/logger" import { injectBoulderContinuation } from "./boulder-continuation-injector" import { HOOK_NAME } from "./hook-name" @@ -31,6 +36,14 @@ async function injectContinuation(input: { input.sessionState.lastContinuationInjectedAt = Date.now() try { + const currentBoulder = readBoulderState(input.ctx.directory) + const currentTask = currentBoulder + ? readCurrentTopLevelTask(currentBoulder.active_plan) + : null + const preferredTaskSession = currentTask + ? getTaskSessionState(input.ctx.directory, currentTask.key) + : null + await injectBoulderContinuation({ ctx: input.ctx, sessionID: input.sessionID, @@ -39,6 +52,8 @@ async function injectContinuation(input: { total: input.progress.total, agent: input.agent, worktreePath: input.worktreePath, + preferredTaskSessionId: preferredTaskSession?.session_id, + preferredTaskTitle: preferredTaskSession?.task_title, backgroundManager: input.options?.backgroundManager, sessionState: input.sessionState, }) diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts index 269d7928b..8ec696558 100644 --- a/src/hooks/atlas/index.test.ts +++ b/src/hooks/atlas/index.test.ts @@ -404,12 +404,174 @@ describe("atlas hook", () => { // then - should include verification instructions expect(output.output).toContain("LYING") - expect(output.output).toContain("PHASE 1") - expect(output.output).toContain("PHASE 2") + expect(output.output).toContain("PHASE 1") + expect(output.output).toContain("PHASE 2") cleanupMessageStorage(sessionID) }) + test("should persist preferred subagent session for the current top-level task", async () => { + // given - boulder state with a current top-level task, Atlas caller + const sessionID = "session-task-session-track-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "task-session-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow + - [ ] nested acceptance checkbox +`) + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "task-session-plan", + } + writeBoulderState(TEST_DIR, state) + + const hook = createAtlasHook(createMockPluginInput()) + const output = { + title: "Sisyphus Task", + output: `Task completed successfully + + +session_id: ses_auth_flow_123 +`, + metadata: { + agent: "sisyphus-junior", + category: "deep", + }, + } + + // when + await hook["tool.execute.after"]( + { tool: "task", sessionID }, + output + ) + + // then + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123") + expect(updatedState?.task_sessions?.["todo:1"]?.task_title).toBe("Implement auth flow") + expect(updatedState?.task_sessions?.["todo:1"]?.agent).toBe("sisyphus-junior") + expect(updatedState?.task_sessions?.["todo:1"]?.category).toBe("deep") + + cleanupMessageStorage(sessionID) + }) + + test("should preserve the delegated task key even after the plan advances to the next task", async () => { + // given - Atlas caller starts task 1, then the plan advances before task output is processed + const sessionID = "session-stable-task-key-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "stable-task-key-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "stable-task-key-plan", + }) + + const hook = createAtlasHook(createMockPluginInput()) + + // when - Atlas delegates task 1 + await hook["tool.execute.before"]( + { tool: "task", sessionID, callID: "call-task-1" }, + { args: { prompt: "Implement auth flow" } } + ) + + // and the plan is advanced before the task output is processed + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + await hook["tool.execute.after"]( + { tool: "task", sessionID, callID: "call-task-1" }, + { + title: "Sisyphus Task", + output: `Task completed successfully + + +session_id: ses_auth_flow_123 +`, + metadata: { + agent: "sisyphus-junior", + category: "deep", + }, + } + ) + + // then - the completed task session is still recorded against task 1, not task 2 + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123") + expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined() + + cleanupMessageStorage(sessionID) + }) + + test("should not overwrite the current task mapping when task() explicitly resumes an older session", async () => { + // given - current plan is on task 2, but Atlas explicitly resumes an older session for a previous task + const sessionID = "session-cross-task-resume-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "cross-task-resume-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "cross-task-resume-plan", + }) + + const hook = createAtlasHook(createMockPluginInput()) + + // when - Atlas resumes an explicit prior session + await hook["tool.execute.before"]( + { tool: "task", sessionID, callID: "call-resume-old-task" }, + { args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } } + ) + + await hook["tool.execute.after"]( + { tool: "task", sessionID, callID: "call-resume-old-task" }, + { + title: "Sisyphus Task", + output: `Task continued successfully + + +session_id: ses_old_task_111 +`, + metadata: { + agent: "sisyphus-junior", + category: "deep", + }, + } + ) + + // then - Atlas does not poison task 2's preferred session mapping + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined() + + cleanupMessageStorage(sessionID) + }) + describe("completion gate output ordering", () => { const COMPLETION_GATE_SESSION = "completion-gate-order-test" @@ -1147,6 +1309,48 @@ describe("atlas hook", () => { expect(callArgs.body.parts[0].text).toContain("2 remaining") }) + test("should include preferred reuse session in continuation prompt for current top-level task", async () => { + // given - boulder state with tracked preferred session + const planPath = join(TEST_DIR, "preferred-session-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [MAIN_SESSION_ID], + plan_name: "preferred-session-plan", + task_sessions: { + "todo:1": { + task_key: "todo:1", + task_label: "1", + task_title: "Implement auth flow", + session_id: "ses_auth_flow_123", + updated_at: "2026-01-02T10:00:00Z", + }, + }, + }) + + const mockInput = createMockPluginInput() + const hook = createAtlasHook(mockInput) + + // when + await hook.handler({ + event: { + type: "session.idle", + properties: { sessionID: MAIN_SESSION_ID }, + }, + }) + + // then + const callArgs = mockInput._promptMock.mock.calls[0][0] + expect(callArgs.body.parts[0].text).toContain("Preferred reuse session for current top-level plan task") + expect(callArgs.body.parts[0].text).toContain("ses_auth_flow_123") + }) + test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => { // given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work) const planPath = join(TEST_DIR, "test-plan.md") diff --git a/src/hooks/atlas/subagent-session-id.test.ts b/src/hooks/atlas/subagent-session-id.test.ts new file mode 100644 index 000000000..8d4416c9d --- /dev/null +++ b/src/hooks/atlas/subagent-session-id.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, test } from "bun:test" + +import { extractSessionIdFromOutput } from "./subagent-session-id" + +describe("extractSessionIdFromOutput", () => { + test("extracts Session ID blocks from background output", () => { + // given + const output = `Background task launched.\n\nSession ID: ses_bg_12345` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_bg_12345") + }) + + test("extracts session_id from task metadata blocks", () => { + // given + const output = `Task completed.\n\n\nsession_id: ses_sync_12345\n` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_sync_12345") + }) + + test("returns undefined when no session id is present", () => { + // given + const output = "Task completed without metadata" + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBeUndefined() + }) + + test("prefers the session id inside the trailing task_metadata block", () => { + // given + const output = `The previous attempt mentioned session_id: ses_wrong_body_123 but that was only context. + + +session_id: ses_real_metadata_456 +` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_real_metadata_456") + }) +}) diff --git a/src/hooks/atlas/subagent-session-id.ts b/src/hooks/atlas/subagent-session-id.ts index 12cf619b1..9494d5a33 100644 --- a/src/hooks/atlas/subagent-session-id.ts +++ b/src/hooks/atlas/subagent-session-id.ts @@ -1,4 +1,10 @@ -export function extractSessionIdFromOutput(output: string): string { - const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/) - return match?.[1] ?? "" +export function extractSessionIdFromOutput(output: string): string | undefined { + const taskMetadataMatches = [...output.matchAll(/[\s\S]*?session_id:\s*(ses_[a-zA-Z0-9_]+)[\s\S]*?<\/task_metadata>/gi)] + const lastTaskMetadataMatch = taskMetadataMatches.at(-1) + if (lastTaskMetadataMatch) { + return lastTaskMetadataMatch[1] + } + + const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)] + return explicitSessionMatches.at(-1)?.[1] } diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts index 55fb8ddd6..ed23ce011 100644 --- a/src/hooks/atlas/tool-execute-after.ts +++ b/src/hooks/atlas/tool-execute-after.ts @@ -1,5 +1,12 @@ import type { PluginInput } from "@opencode-ai/plugin" -import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state" +import { + appendSessionId, + getPlanProgress, + getTaskSessionState, + readBoulderState, + readCurrentTopLevelTask, + upsertTaskSessionState, +} from "../../features/boulder-state" import { log } from "../../shared/logger" import { isCallerOrchestrator } from "../../shared/session-utils" import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree" @@ -18,13 +25,18 @@ import { isWriteOrEditToolName } from "./write-edit-tool-policy" import type { SessionState } from "./types" import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types" +function resolvePreferredSessionId(currentSessionId?: string, trackedSessionId?: string): string { + return currentSessionId ?? trackedSessionId ?? "" +} + export function createToolExecuteAfterHandler(input: { ctx: PluginInput pendingFilePaths: Map + pendingTaskRefs: Map autoCommit: boolean getState: (sessionID: string) => SessionState }): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise { - const { ctx, pendingFilePaths, autoCommit, getState } = input + const { ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState } = input return async (toolInput, toolOutput): Promise => { // Guard against undefined output (e.g., from /review command - see issue #1035) if (!toolOutput) { @@ -68,10 +80,21 @@ export function createToolExecuteAfterHandler(input: { const gitStats = collectGitDiffStats(ctx.directory) const fileChanges = formatFileChanges(gitStats) const subagentSessionId = extractSessionIdFromOutput(toolOutput.output) + const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined + if (toolInput.callID) { + pendingTaskRefs.delete(toolInput.callID) + } const boulderState = readBoulderState(ctx.directory) if (boulderState) { const progress = getPlanProgress(boulderState.active_plan) + const shouldSkipTaskSessionUpdate = pendingTaskRef === null + const currentTask = shouldSkipTaskSessionUpdate + ? null + : pendingTaskRef ?? readCurrentTopLevelTask(boulderState.active_plan) + const trackedTaskSession = currentTask + ? getTaskSessionState(ctx.directory, currentTask.key) + : null const sessionState = toolInput.sessionID ? getState(toolInput.sessionID) : undefined if (toolInput.sessionID && !boulderState.session_ids?.includes(toolInput.sessionID)) { @@ -82,6 +105,22 @@ export function createToolExecuteAfterHandler(input: { }) } + if (currentTask && subagentSessionId) { + upsertTaskSessionState(ctx.directory, { + taskKey: currentTask.key, + taskLabel: currentTask.label, + taskTitle: currentTask.title, + sessionId: subagentSessionId, + agent: toolOutput.metadata?.agent as string | undefined, + category: toolOutput.metadata?.category as string | undefined, + }) + } + + const preferredSessionId = resolvePreferredSessionId( + subagentSessionId, + trackedTaskSession?.session_id, + ) + // Preserve original subagent response - critical for debugging failed tasks const originalResponse = toolOutput.output const shouldPauseForApproval = sessionState @@ -102,11 +141,11 @@ export function createToolExecuteAfterHandler(input: { } const leadReminder = shouldPauseForApproval - ? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, subagentSessionId) - : buildCompletionGate(boulderState.plan_name, subagentSessionId) + ? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, preferredSessionId) + : buildCompletionGate(boulderState.plan_name, preferredSessionId) const followupReminder = shouldPauseForApproval ? null - : buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false) + : buildOrchestratorReminder(boulderState.plan_name, progress, preferredSessionId, autoCommit, false) toolOutput.output = ` @@ -132,10 +171,13 @@ ${ plan: boulderState.plan_name, progress: `${progress.completed}/${progress.total}`, fileCount: gitStats.length, + preferredSessionId, waitingForFinalWaveApproval: shouldPauseForApproval, }) } else { - toolOutput.output += `\n\n${buildStandaloneVerificationReminder(subagentSessionId)}\n` + toolOutput.output += `\n\n${buildStandaloneVerificationReminder( + resolvePreferredSessionId(subagentSessionId), + )}\n` log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, { sessionID: toolInput.sessionID, diff --git a/src/hooks/atlas/tool-execute-before.ts b/src/hooks/atlas/tool-execute-before.ts index 51f670000..d020adaca 100644 --- a/src/hooks/atlas/tool-execute-before.ts +++ b/src/hooks/atlas/tool-execute-before.ts @@ -2,6 +2,7 @@ import { log } from "../../shared/logger" import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive" import { isCallerOrchestrator } from "../../shared/session-utils" import type { PluginInput } from "@opencode-ai/plugin" +import { readBoulderState, readCurrentTopLevelTask } from "../../features/boulder-state" import { HOOK_NAME } from "./hook-name" import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates" import { isSisyphusPath } from "./sisyphus-path" @@ -10,11 +11,12 @@ import { isWriteOrEditToolName } from "./write-edit-tool-policy" export function createToolExecuteBeforeHandler(input: { ctx: PluginInput pendingFilePaths: Map + pendingTaskRefs: Map }): ( toolInput: { tool: string; sessionID?: string; callID?: string }, toolOutput: { args: Record; message?: string } ) => Promise { - const { ctx, pendingFilePaths } = input + const { ctx, pendingFilePaths, pendingTaskRefs } = input return async (toolInput, toolOutput): Promise => { if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) { @@ -43,6 +45,25 @@ export function createToolExecuteBeforeHandler(input: { // Check task - inject single-task directive if (toolInput.tool === "task") { + if (toolInput.callID) { + const requestedSessionId = toolOutput.args.session_id as string | undefined + if (requestedSessionId) { + pendingTaskRefs.set(toolInput.callID, null) + } else { + const boulderState = readBoulderState(ctx.directory) + const currentTask = boulderState + ? readCurrentTopLevelTask(boulderState.active_plan) + : null + if (currentTask) { + pendingTaskRefs.set(toolInput.callID, { + key: currentTask.key, + label: currentTask.label, + title: currentTask.title, + }) + } + } + } + const prompt = toolOutput.args.prompt as string | undefined if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) { toolOutput.args.prompt = `${SINGLE_TASK_DIRECTIVE}\n` + prompt diff --git a/src/hooks/atlas/verification-reminders.ts b/src/hooks/atlas/verification-reminders.ts index cb988984e..80217798d 100644 --- a/src/hooks/atlas/verification-reminders.ts +++ b/src/hooks/atlas/verification-reminders.ts @@ -1,5 +1,14 @@ import { VERIFICATION_REMINDER } from "./system-reminder-templates" +function buildReuseHint(sessionId: string): string { + return ` +**PREFERRED REUSE SESSION FOR THE CURRENT TOP-LEVEL PLAN TASK** + +- Reuse \`${sessionId}\` first if verification fails or the result needs follow-up. +- Start a fresh subagent session only when reuse is unavailable or would cross task boundaries. +` +} + export function buildCompletionGate(planName: string, sessionId: string): string { return ` **COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE** @@ -25,7 +34,8 @@ task(session_id="${sessionId}", prompt="fix: checkbox not recorded correctly") **Your completion is NOT tracked until the checkbox is marked in the plan file.** -**VERIFICATION_REMINDER**` +**VERIFICATION_REMINDER** +${buildReuseHint(sessionId)}` } function buildVerificationReminder(sessionId: string): string { @@ -38,7 +48,9 @@ ${VERIFICATION_REMINDER} **If ANY verification fails, use this immediately:** \`\`\` task(session_id="${sessionId}", prompt="fix: [describe the specific failure]") -\`\`\`` +\`\`\` + +${buildReuseHint(sessionId)}` } export function buildOrchestratorReminder( From 8adf6a2c478eab6ee854bdef067fc8d1bd9583bf Mon Sep 17 00:00:00 2001 From: HaD0Yun <102889891+HaD0Yun@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:14:17 +0900 Subject: [PATCH 2/3] fix(atlas): tighten session reuse metadata parsing --- src/hooks/atlas/index.test.ts | 58 +++++++++++++++++++++ src/hooks/atlas/subagent-session-id.test.ts | 15 ++++++ src/hooks/atlas/subagent-session-id.ts | 11 ++-- src/hooks/atlas/tool-execute-after.ts | 8 +-- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts index 8ec696558..917d0079e 100644 --- a/src/hooks/atlas/index.test.ts +++ b/src/hooks/atlas/index.test.ts @@ -33,6 +33,8 @@ mock.module("../../shared/opencode-storage-detection", () => ({ })) const { createAtlasHook } = await import("./index") +const { createToolExecuteAfterHandler } = await import("./tool-execute-after") +const { createToolExecuteBeforeHandler } = await import("./tool-execute-before") const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector") describe("atlas hook", () => { @@ -410,6 +412,62 @@ describe("atlas hook", () => { cleanupMessageStorage(sessionID) }) + test("should clean pending task refs when a task returns background launch output", async () => { + // given - direct handlers with shared pending maps + const sessionID = "session-bg-launch-cleanup-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "background-cleanup-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow +`) + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "background-cleanup-plan", + }) + + const pendingFilePaths = new Map() + const pendingTaskRefs = new Map() + const beforeHandler = createToolExecuteBeforeHandler({ + ctx: createMockPluginInput(), + pendingFilePaths, + pendingTaskRefs, + }) + const afterHandler = createToolExecuteAfterHandler({ + ctx: createMockPluginInput(), + pendingFilePaths, + pendingTaskRefs, + autoCommit: true, + getState: () => ({ promptFailureCount: 0 }), + }) + + // when - the task is captured before execution + await beforeHandler( + { tool: "task", sessionID, callID: "call-bg-launch" }, + { args: { prompt: "Implement auth flow" } } + ) + expect(pendingTaskRefs.size).toBe(1) + + // and the task returns a background launch result + await afterHandler( + { tool: "task", sessionID, callID: "call-bg-launch" }, + { + title: "Sisyphus Task", + output: "Background task launched.\n\nSession ID: ses_bg_12345", + metadata: {}, + } + ) + + // then - the pending task ref is still cleaned up + expect(pendingTaskRefs.size).toBe(0) + + cleanupMessageStorage(sessionID) + }) + test("should persist preferred subagent session for the current top-level task", async () => { // given - boulder state with a current top-level task, Atlas caller const sessionID = "session-task-session-track-test" diff --git a/src/hooks/atlas/subagent-session-id.test.ts b/src/hooks/atlas/subagent-session-id.test.ts index 8d4416c9d..5973784c9 100644 --- a/src/hooks/atlas/subagent-session-id.test.ts +++ b/src/hooks/atlas/subagent-session-id.test.ts @@ -50,4 +50,19 @@ session_id: ses_real_metadata_456 // then expect(result).toBe("ses_real_metadata_456") }) + + test("does not let task_metadata parsing bleed into incidental body text after the closing tag", () => { + // given + const output = ` +session_id: ses_real_metadata_456 + + +debug log: session_id: ses_wrong_body_789` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_real_metadata_456") + }) }) diff --git a/src/hooks/atlas/subagent-session-id.ts b/src/hooks/atlas/subagent-session-id.ts index 9494d5a33..d4c5d8709 100644 --- a/src/hooks/atlas/subagent-session-id.ts +++ b/src/hooks/atlas/subagent-session-id.ts @@ -1,8 +1,11 @@ export function extractSessionIdFromOutput(output: string): string | undefined { - const taskMetadataMatches = [...output.matchAll(/[\s\S]*?session_id:\s*(ses_[a-zA-Z0-9_]+)[\s\S]*?<\/task_metadata>/gi)] - const lastTaskMetadataMatch = taskMetadataMatches.at(-1) - if (lastTaskMetadataMatch) { - return lastTaskMetadataMatch[1] + const taskMetadataBlocks = [...output.matchAll(/([\s\S]*?)<\/task_metadata>/gi)] + const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1] + if (lastTaskMetadataBlock) { + const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_]+)/i) + if (taskMetadataSessionMatch) { + return taskMetadataSessionMatch[1] + } } const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)] diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts index ed23ce011..a598e92d3 100644 --- a/src/hooks/atlas/tool-execute-after.ts +++ b/src/hooks/atlas/tool-execute-after.ts @@ -71,6 +71,10 @@ export function createToolExecuteAfterHandler(input: { } const outputStr = toolOutput.output && typeof toolOutput.output === "string" ? toolOutput.output : "" + const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined + if (toolInput.callID) { + pendingTaskRefs.delete(toolInput.callID) + } const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued") if (isBackgroundLaunch) { return @@ -80,10 +84,6 @@ export function createToolExecuteAfterHandler(input: { const gitStats = collectGitDiffStats(ctx.directory) const fileChanges = formatFileChanges(gitStats) const subagentSessionId = extractSessionIdFromOutput(toolOutput.output) - const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined - if (toolInput.callID) { - pendingTaskRefs.delete(toolInput.callID) - } const boulderState = readBoulderState(ctx.directory) if (boulderState) { From 8859da5fef0a5c2a3eea69dfd7c3fb33e7736f60 Mon Sep 17 00:00:00 2001 From: HaD0Yun <102889891+HaD0Yun@users.noreply.github.com> Date: Wed, 18 Mar 2026 17:31:27 +0900 Subject: [PATCH 3/3] fix(atlas): harden task session reuse --- src/hooks/atlas/atlas-hook.ts | 4 +- ...inal-wave-approval-gate-regression.test.ts | 19 +- .../atlas/final-wave-approval-gate.test.ts | 14 +- src/hooks/atlas/index.test.ts | 229 ++++++++++++++++-- src/hooks/atlas/subagent-session-id.ts | 31 +++ src/hooks/atlas/tool-execute-after.ts | 82 ++++++- src/hooks/atlas/tool-execute-before.ts | 34 ++- src/hooks/atlas/types.ts | 8 + 8 files changed, 385 insertions(+), 36 deletions(-) diff --git a/src/hooks/atlas/atlas-hook.ts b/src/hooks/atlas/atlas-hook.ts index a814cfe2c..ca71bb8d9 100644 --- a/src/hooks/atlas/atlas-hook.ts +++ b/src/hooks/atlas/atlas-hook.ts @@ -2,12 +2,12 @@ import type { PluginInput } from "@opencode-ai/plugin" import { createAtlasEventHandler } from "./event-handler" import { createToolExecuteAfterHandler } from "./tool-execute-after" import { createToolExecuteBeforeHandler } from "./tool-execute-before" -import type { AtlasHookOptions, SessionState } from "./types" +import type { AtlasHookOptions, PendingTaskRef, SessionState } from "./types" export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) { const sessions = new Map() const pendingFilePaths = new Map() - const pendingTaskRefs = new Map() + const pendingTaskRefs = new Map() const autoCommit = options?.autoCommit ?? true function getState(sessionID: string): SessionState { diff --git a/src/hooks/atlas/final-wave-approval-gate-regression.test.ts b/src/hooks/atlas/final-wave-approval-gate-regression.test.ts index b653c66d4..ab509d828 100644 --- a/src/hooks/atlas/final-wave-approval-gate-regression.test.ts +++ b/src/hooks/atlas/final-wave-approval-gate-regression.test.ts @@ -4,7 +4,7 @@ import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs" import { tmpdir } from "node:os" import { join } from "node:path" import { createOpencodeClient } from "@opencode-ai/sdk" -import type { AssistantMessage } from "@opencode-ai/sdk" +import type { AssistantMessage, Session } from "@opencode-ai/sdk" import type { BoulderState } from "../../features/boulder-state" import { clearBoulderState, writeBoulderState } from "../../features/boulder-state" @@ -52,6 +52,23 @@ describe("Atlas final-wave approval gate regressions", () => { response: new Response(), })) + Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => { + const parentID = path.id === "ses_nested_scope_review" + ? "atlas-nested-final-wave-session" + : path.id.startsWith("ses_parallel_review_") + ? "atlas-parallel-final-wave-session" + : "main-session-123" + + return { + data: { + id: path.id, + parentID, + } as Session, + request: new Request(`http://localhost/session/${path.id}`), + response: new Response(), + } + }) + return { directory: testDirectory, project: {} as AtlasHookContext["project"], diff --git a/src/hooks/atlas/final-wave-approval-gate.test.ts b/src/hooks/atlas/final-wave-approval-gate.test.ts index 5812c4ba1..5c0e44492 100644 --- a/src/hooks/atlas/final-wave-approval-gate.test.ts +++ b/src/hooks/atlas/final-wave-approval-gate.test.ts @@ -60,10 +60,18 @@ describe("Atlas final verification approval gate", () => { } }) - Reflect.set(client.session, "get", async () => { + Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => { + const parentID = path.id === "ses_final_wave_review" + ? "atlas-final-wave-session" + : path.id === "ses_feature_task" + ? "atlas-non-final-session" + : "main-session-123" return { - data: { parentID: "main-session-123" } as Session, - request: new Request("http://localhost/session/main-session-123"), + data: { + id: path.id, + parentID, + } as Session, + request: new Request(`http://localhost/session/${path.id}`), response: new Response(), } }) diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts index 917d0079e..9cc785791 100644 --- a/src/hooks/atlas/index.test.ts +++ b/src/hooks/atlas/index.test.ts @@ -10,6 +10,7 @@ import { } from "../../features/boulder-state" import type { BoulderState } from "../../features/boulder-state" import { _resetForTesting, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state" +import type { PendingTaskRef } from "./types" const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`) const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message") @@ -41,19 +42,32 @@ describe("atlas hook", () => { let TEST_DIR: string let SISYPHUS_DIR: string - function createMockPluginInput(overrides?: { promptMock?: ReturnType }) { + function createMockPluginInput(overrides?: { + promptMock?: ReturnType + sessionGetMock?: ReturnType + }) { const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve()) + const sessionGetMock = overrides?.sessionGetMock ?? mock(async ({ path }: { path: { id: string } }) => ({ + data: { + id: path.id, + parentID: path.id.startsWith("ses_") ? "session-1" : "main-session-123", + }, + })) return { directory: TEST_DIR, client: { session: { - get: async () => ({ data: { parentID: "main-session-123" } }), + get: sessionGetMock, prompt: promptMock, promptAsync: promptMock, }, }, _promptMock: promptMock, - } as unknown as Parameters[0] & { _promptMock: ReturnType } + _sessionGetMock: sessionGetMock, + } as unknown as Parameters[0] & { + _promptMock: ReturnType + _sessionGetMock: ReturnType + } } function setupMessageStorage(sessionID: string, agent: string): void { @@ -431,7 +445,7 @@ describe("atlas hook", () => { }) const pendingFilePaths = new Map() - const pendingTaskRefs = new Map() + const pendingTaskRefs = new Map() const beforeHandler = createToolExecuteBeforeHandler({ ctx: createMockPluginInput(), pendingFilePaths, @@ -607,25 +621,212 @@ session_id: ses_auth_flow_123 { args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } } ) - await hook["tool.execute.after"]( - { tool: "task", sessionID, callID: "call-resume-old-task" }, - { - title: "Sisyphus Task", - output: `Task continued successfully + const output = { + title: "Sisyphus Task", + output: `Task continued successfully session_id: ses_old_task_111 `, - metadata: { - agent: "sisyphus-junior", - category: "deep", - }, - } + metadata: { + agent: "sisyphus-junior", + category: "deep", + }, + } + await hook["tool.execute.after"]( + { tool: "task", sessionID, callID: "call-resume-old-task" }, + output ) // then - Atlas does not poison task 2's preferred session mapping const updatedState = readBoulderState(TEST_DIR) expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined() + expect(output.output).not.toContain('task(session_id="ses_old_task_111"') + + cleanupMessageStorage(sessionID) + }) + + test("should not reuse an explicitly resumed session id in completion reminders", async () => { + // given - current plan is on task 2 with an existing tracked session + const sessionID = "session-explicit-resume-reminder-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "explicit-resume-reminder-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "explicit-resume-reminder-plan", + task_sessions: { + "todo:2": { + task_key: "todo:2", + task_label: "2", + task_title: "Add API validation", + session_id: "ses_tracked_current_task", + updated_at: "2026-01-02T10:00:00Z", + }, + }, + }) + + const hook = createAtlasHook(createMockPluginInput()) + const output = { + title: "Sisyphus Task", + output: `Task continued successfully + + +session_id: ses_old_task_111 +`, + metadata: {}, + } + + // when + await hook["tool.execute.before"]( + { tool: "task", sessionID, callID: "call-explicit-resume-reminder" }, + { args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } } + ) + await hook["tool.execute.after"]( + { tool: "task", sessionID, callID: "call-explicit-resume-reminder" }, + output + ) + + // then + expect(output.output).not.toContain('task(session_id="ses_old_task_111"') + expect(output.output).toContain("ses_tracked_current_task") + + cleanupMessageStorage(sessionID) + }) + + test("should skip persistence when multiple in-flight task calls claim the same top-level task", async () => { + // given + const sessionID = "session-parallel-task-collision-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "parallel-task-collision-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "parallel-task-collision-plan", + }) + + const pendingFilePaths = new Map() + const pendingTaskRefs = new Map() + const beforeHandler = createToolExecuteBeforeHandler({ + ctx: createMockPluginInput(), + pendingFilePaths, + pendingTaskRefs, + }) + const afterHandler = createToolExecuteAfterHandler({ + ctx: createMockPluginInput(), + pendingFilePaths, + pendingTaskRefs, + autoCommit: true, + getState: () => ({ promptFailureCount: 0 }), + }) + + // when - two task() calls start before either one completes + await beforeHandler( + { tool: "task", sessionID, callID: "call-task-first" }, + { args: { prompt: "Implement auth flow part 1" } } + ) + await beforeHandler( + { tool: "task", sessionID, callID: "call-task-second" }, + { args: { prompt: "Implement auth flow part 2" } } + ) + + const secondPendingTaskRef = pendingTaskRefs.get("call-task-second") + + await afterHandler( + { tool: "task", sessionID, callID: "call-task-second" }, + { + title: "Sisyphus Task", + output: `Task completed successfully + + +session_id: ses_parallel_collision_222 +`, + metadata: {}, + } + ) + + // then + expect(secondPendingTaskRef).toEqual({ + kind: "skip", + reason: "ambiguous_task_key", + task: { + key: "todo:1", + label: "1", + title: "Implement auth flow", + }, + }) + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined() + + cleanupMessageStorage(sessionID) + }) + + test("should ignore extracted session ids that are outside the active boulder lineage", async () => { + // given + const sessionID = "session-untrusted-session-id-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "untrusted-session-id-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "untrusted-session-id-plan", + }) + + const hook = createAtlasHook(createMockPluginInput({ + sessionGetMock: mock(async ({ path }: { path: { id: string } }) => ({ + data: { + id: path.id, + parentID: path.id === "ses_untrusted_999" ? "session-outside-lineage" : "main-session-123", + }, + })), + })) + const output = { + title: "Sisyphus Task", + output: `Task completed successfully + + +session_id: ses_untrusted_999 +`, + metadata: {}, + } + + // when + await hook["tool.execute.after"]( + { tool: "task", sessionID }, + output + ) + + // then + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined() + expect(output.output).not.toContain('task(session_id="ses_untrusted_999"') + expect(output.output).toContain('task(session_id=""') cleanupMessageStorage(sessionID) }) diff --git a/src/hooks/atlas/subagent-session-id.ts b/src/hooks/atlas/subagent-session-id.ts index d4c5d8709..4ce8886f3 100644 --- a/src/hooks/atlas/subagent-session-id.ts +++ b/src/hooks/atlas/subagent-session-id.ts @@ -1,3 +1,8 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import { log } from "../../shared/logger" +import { isSessionInBoulderLineage } from "./boulder-session-lineage" +import { HOOK_NAME } from "./hook-name" + export function extractSessionIdFromOutput(output: string): string | undefined { const taskMetadataBlocks = [...output.matchAll(/([\s\S]*?)<\/task_metadata>/gi)] const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1] @@ -11,3 +16,29 @@ export function extractSessionIdFromOutput(output: string): string | undefined { const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)] return explicitSessionMatches.at(-1)?.[1] } + +export async function validateSubagentSessionId(input: { + client: PluginInput["client"] + sessionID?: string + lineageSessionIDs: string[] +}): Promise { + if (!input.sessionID || input.lineageSessionIDs.length === 0) { + return undefined + } + + const belongsToLineage = await isSessionInBoulderLineage({ + client: input.client, + sessionID: input.sessionID, + boulderSessionIDs: input.lineageSessionIDs, + }) + + if (!belongsToLineage) { + log(`[${HOOK_NAME}] Ignoring extracted session id outside active lineage`, { + sessionID: input.sessionID, + lineageSessionIDs: input.lineageSessionIDs, + }) + return undefined + } + + return input.sessionID +} diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts index a598e92d3..7be1b14df 100644 --- a/src/hooks/atlas/tool-execute-after.ts +++ b/src/hooks/atlas/tool-execute-after.ts @@ -14,7 +14,7 @@ import { shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate" import { HOOK_NAME } from "./hook-name" import { DIRECT_WORK_REMINDER } from "./system-reminder-templates" import { isSisyphusPath } from "./sisyphus-path" -import { extractSessionIdFromOutput } from "./subagent-session-id" +import { extractSessionIdFromOutput, validateSubagentSessionId } from "./subagent-session-id" import { buildCompletionGate, buildFinalWaveApprovalReminder, @@ -22,17 +22,56 @@ import { buildStandaloneVerificationReminder, } from "./verification-reminders" import { isWriteOrEditToolName } from "./write-edit-tool-policy" -import type { SessionState } from "./types" -import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types" +import type { PendingTaskRef, SessionState } from "./types" +import type { ToolExecuteAfterInput, ToolExecuteAfterOutput, TrackedTopLevelTaskRef } from "./types" function resolvePreferredSessionId(currentSessionId?: string, trackedSessionId?: string): string { return currentSessionId ?? trackedSessionId ?? "" } +function resolveTaskContext( + pendingTaskRef: PendingTaskRef | undefined, + planPath: string, +): { + currentTask: TrackedTopLevelTaskRef | null + shouldSkipTaskSessionUpdate: boolean + shouldIgnoreCurrentSessionId: boolean +} { + if (!pendingTaskRef) { + return { + currentTask: readCurrentTopLevelTask(planPath), + shouldSkipTaskSessionUpdate: false, + shouldIgnoreCurrentSessionId: false, + } + } + + if (pendingTaskRef.kind === "track") { + return { + currentTask: pendingTaskRef.task, + shouldSkipTaskSessionUpdate: false, + shouldIgnoreCurrentSessionId: false, + } + } + + if (pendingTaskRef.reason === "explicit_resume") { + return { + currentTask: readCurrentTopLevelTask(planPath), + shouldSkipTaskSessionUpdate: true, + shouldIgnoreCurrentSessionId: true, + } + } + + return { + currentTask: pendingTaskRef.task, + shouldSkipTaskSessionUpdate: true, + shouldIgnoreCurrentSessionId: true, + } +} + export function createToolExecuteAfterHandler(input: { ctx: PluginInput pendingFilePaths: Map - pendingTaskRefs: Map + pendingTaskRefs: Map autoCommit: boolean getState: (sessionID: string) => SessionState }): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise { @@ -83,15 +122,16 @@ export function createToolExecuteAfterHandler(input: { if (toolOutput.output && typeof toolOutput.output === "string") { const gitStats = collectGitDiffStats(ctx.directory) const fileChanges = formatFileChanges(gitStats) - const subagentSessionId = extractSessionIdFromOutput(toolOutput.output) + const extractedSessionId = extractSessionIdFromOutput(toolOutput.output) const boulderState = readBoulderState(ctx.directory) if (boulderState) { const progress = getPlanProgress(boulderState.active_plan) - const shouldSkipTaskSessionUpdate = pendingTaskRef === null - const currentTask = shouldSkipTaskSessionUpdate - ? null - : pendingTaskRef ?? readCurrentTopLevelTask(boulderState.active_plan) + const { + currentTask, + shouldSkipTaskSessionUpdate, + shouldIgnoreCurrentSessionId, + } = resolveTaskContext(pendingTaskRef, boulderState.active_plan) const trackedTaskSession = currentTask ? getTaskSessionState(ctx.directory, currentTask.key) : null @@ -105,7 +145,16 @@ export function createToolExecuteAfterHandler(input: { }) } - if (currentTask && subagentSessionId) { + const lineageSessionIDs = toolInput.sessionID && !boulderState.session_ids.includes(toolInput.sessionID) + ? [...boulderState.session_ids, toolInput.sessionID] + : boulderState.session_ids + const subagentSessionId = await validateSubagentSessionId({ + client: ctx.client, + sessionID: extractedSessionId, + lineageSessionIDs, + }) + + if (currentTask && subagentSessionId && !shouldSkipTaskSessionUpdate) { upsertTaskSessionState(ctx.directory, { taskKey: currentTask.key, taskLabel: currentTask.label, @@ -117,7 +166,7 @@ export function createToolExecuteAfterHandler(input: { } const preferredSessionId = resolvePreferredSessionId( - subagentSessionId, + shouldIgnoreCurrentSessionId ? undefined : subagentSessionId, trackedTaskSession?.session_id, ) @@ -175,8 +224,17 @@ ${ waitingForFinalWaveApproval: shouldPauseForApproval, }) } else { + const lineageSessionIDs = toolInput.sessionID ? [toolInput.sessionID] : [] + const subagentSessionId = await validateSubagentSessionId({ + client: ctx.client, + sessionID: extractedSessionId, + lineageSessionIDs, + }) + const preferredSessionId = pendingTaskRef?.kind === "skip" + ? undefined + : subagentSessionId toolOutput.output += `\n\n${buildStandaloneVerificationReminder( - resolvePreferredSessionId(subagentSessionId), + resolvePreferredSessionId(preferredSessionId), )}\n` log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, { diff --git a/src/hooks/atlas/tool-execute-before.ts b/src/hooks/atlas/tool-execute-before.ts index d020adaca..e00224d84 100644 --- a/src/hooks/atlas/tool-execute-before.ts +++ b/src/hooks/atlas/tool-execute-before.ts @@ -6,18 +6,23 @@ import { readBoulderState, readCurrentTopLevelTask } from "../../features/boulde import { HOOK_NAME } from "./hook-name" import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates" import { isSisyphusPath } from "./sisyphus-path" +import type { PendingTaskRef, TrackedTopLevelTaskRef } from "./types" import { isWriteOrEditToolName } from "./write-edit-tool-policy" export function createToolExecuteBeforeHandler(input: { ctx: PluginInput pendingFilePaths: Map - pendingTaskRefs: Map + pendingTaskRefs: Map }): ( toolInput: { tool: string; sessionID?: string; callID?: string }, toolOutput: { args: Record; message?: string } ) => Promise { const { ctx, pendingFilePaths, pendingTaskRefs } = input + function trackTask(callID: string, task: TrackedTopLevelTaskRef): void { + pendingTaskRefs.set(callID, { kind: "track", task }) + } + return async (toolInput, toolOutput): Promise => { if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) { return @@ -48,18 +53,39 @@ export function createToolExecuteBeforeHandler(input: { if (toolInput.callID) { const requestedSessionId = toolOutput.args.session_id as string | undefined if (requestedSessionId) { - pendingTaskRefs.set(toolInput.callID, null) + pendingTaskRefs.set(toolInput.callID, { + kind: "skip", + reason: "explicit_resume", + }) } else { const boulderState = readBoulderState(ctx.directory) const currentTask = boulderState ? readCurrentTopLevelTask(boulderState.active_plan) : null if (currentTask) { - pendingTaskRefs.set(toolInput.callID, { + const task = { key: currentTask.key, label: currentTask.label, title: currentTask.title, - }) + } + const hasExistingClaim = [...pendingTaskRefs.values()].some((pendingTaskRef) => ( + pendingTaskRef.kind === "track" && pendingTaskRef.task.key === task.key + )) + + if (hasExistingClaim) { + pendingTaskRefs.set(toolInput.callID, { + kind: "skip", + reason: "ambiguous_task_key", + task, + }) + log(`[${HOOK_NAME}] Skipping task session persistence for ambiguous task key`, { + sessionID: toolInput.sessionID, + callID: toolInput.callID, + taskKey: task.key, + }) + } else { + trackTask(toolInput.callID, task) + } } } } diff --git a/src/hooks/atlas/types.ts b/src/hooks/atlas/types.ts index c3aa9bbc7..bd5402e9c 100644 --- a/src/hooks/atlas/types.ts +++ b/src/hooks/atlas/types.ts @@ -1,5 +1,6 @@ import type { AgentOverrides } from "../../config" import type { BackgroundManager } from "../../features/background-agent" +import type { TopLevelTaskRef } from "../../features/boulder-state" export type ModelInfo = { providerID: string; modelID: string } @@ -25,6 +26,13 @@ export interface ToolExecuteAfterOutput { metadata: Record } +export type TrackedTopLevelTaskRef = Pick + +export type PendingTaskRef = + | { kind: "track"; task: TrackedTopLevelTaskRef } + | { kind: "skip"; reason: "explicit_resume" } + | { kind: "skip"; reason: "ambiguous_task_key"; task: TrackedTopLevelTaskRef } + export interface SessionState { lastEventWasAbortError?: boolean lastContinuationInjectedAt?: number