Merge pull request #2676 from code-yeongyu/fix/atlas-task-session-review-followup

fix(atlas): address review findings for task session reuse
2026-03-18 18:50:45 +09:00
parent 33ef4db502 3c7e6a3940
commit 826284f3d9
5 changed files with 289 additions and 4 deletions
--- a/src/features/boulder-state/storage.ts
+++ b/src/features/boulder-state/storage.ts
@@ -9,6 +9,8 @@ import { dirname, join, basename } from "node:path"
 import type { BoulderState, PlanProgress, TaskSessionState } from "./types"
 import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants"

+const RESERVED_KEYS = new Set(["__proto__", "prototype", "constructor"])
+
 export function getBoulderFilePath(directory: string): string {
  return join(directory, BOULDER_DIR, BOULDER_FILE)
 }
@@ -113,6 +115,10 @@ export function upsertTaskSessionState(
    return null
  }

+  if (RESERVED_KEYS.has(input.taskKey)) {
+    return null
+  }
+
  const taskSessions = state.task_sessions ?? {}
  taskSessions[input.taskKey] = {
    task_key: input.taskKey,
--- a/src/features/boulder-state/top-level-task.test.ts
+++ b/src/features/boulder-state/top-level-task.test.ts
@@ -0,0 +1,268 @@
+import { describe, expect, test } from "bun:test"
+import { writeFileSync } from "node:fs"
+import { join } from "node:path"
+import { tmpdir } from "node:os"
+
+import { readCurrentTopLevelTask } from "./top-level-task"
+
+function writePlanFile(fileName: string, content: string): string {
+  const planPath = join(tmpdir(), fileName)
+  writeFileSync(planPath, content, "utf-8")
+  return planPath
+}
+
+describe("readCurrentTopLevelTask", () => {
+  test("returns first unchecked top-level task in TODOs", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-happy-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [ ] 2. Current task
+
+## Final Verification Wave
+- [ ] F1. Final review
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result).toEqual({
+      key: "todo:2",
+      section: "todo",
+      label: "2",
+      title: "Current task",
+    })
+  })
+
+  test("returns null when all tasks are checked", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-all-checked-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [x] 2. Another done task
+
+## Final Verification Wave
+- [x] F1. Final done review
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result).toBeNull()
+  })
+
+  test("returns null for empty plan file", () => {
+    // given
+    const planPath = writePlanFile(`top-level-task-empty-${Date.now()}.md`, "")
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result).toBeNull()
+  })
+
+  test("returns null when plan file does not exist", () => {
+    // given
+    const planPath = join(tmpdir(), `top-level-task-missing-${Date.now()}.md`)
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result).toBeNull()
+  })
+
+  test("skips nested or indented checkboxes", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-nested-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+- [x] 1. Done task
+  - [ ] nested should be ignored
+- [ ] 2. Top-level pending
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result?.key).toBe("todo:2")
+  })
+
+  test("falls back to Final Verification Wave when TODOs are all checked", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-fallback-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [x] 2. Done task
+
+## Final Verification Wave
+- [ ] F1. Final review pending
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result).toEqual({
+      key: "final-wave:f1",
+      section: "final-wave",
+      label: "F1",
+      title: "Final review pending",
+    })
+  })
+
+  test("selects the first unchecked task among mixed checked and unchecked TODOs", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-mixed-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [ ] 2. First unchecked
+- [ ] 3. Second unchecked
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result?.key).toBe("todo:2")
+    expect(result?.title).toBe("First unchecked")
+  })
+
+  test("ignores malformed labels and continues to next unchecked task", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-malformed-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+- [ ] no number prefix
+- [ ] 2. Valid task after malformed label
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result).toEqual({
+      key: "todo:2",
+      section: "todo",
+      label: "2",
+      title: "Valid task after malformed label",
+    })
+  })
+
+  test("supports unchecked tasks with asterisk bullets", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-asterisk-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+* [ ] 1. Task using asterisk bullet
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result?.key).toBe("todo:1")
+    expect(result?.title).toBe("Task using asterisk bullet")
+  })
+
+  test("returns final-wave task when plan has only Final Verification Wave section", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-final-only-${Date.now()}.md`,
+      `# Plan
+
+## Final Verification Wave
+- [ ] F2. Final-only task
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result).toEqual({
+      key: "final-wave:f2",
+      section: "final-wave",
+      label: "F2",
+      title: "Final-only task",
+    })
+  })
+
+  test("returns the first unchecked task when multiple unchecked tasks exist", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-multiple-${Date.now()}.md`,
+      `# Plan
+
+## TODOs
+- [ ] 1. First unchecked task
+- [ ] 2. Second unchecked task
+- [ ] 3. Third unchecked task
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result?.label).toBe("1")
+    expect(result?.title).toBe("First unchecked task")
+  })
+
+  test("ignores unchecked content in non-target sections during section transitions", () => {
+    // given
+    const planPath = writePlanFile(
+      `top-level-task-sections-${Date.now()}.md`,
+      `# Plan
+
+## Notes
+- [ ] 99. Should be ignored because section is not tracked
+
+## TODOs
+- [x] 1. Done implementation task
+
+## Decisions
+- [ ] 100. Should also be ignored
+
+## Final Verification Wave
+- [ ] F3. Final verification task
+`,
+    )
+
+    // when
+    const result = readCurrentTopLevelTask(planPath)
+
+    // then
+    expect(result?.key).toBe("final-wave:f3")
+    expect(result?.section).toBe("final-wave")
+  })
+})
--- a/src/hooks/atlas/subagent-session-id.test.ts
+++ b/src/hooks/atlas/subagent-session-id.test.ts
@@ -25,6 +25,17 @@ describe("extractSessionIdFromOutput", () => {
    expect(result).toBe("ses_sync_12345")
  })

+  test("extracts hyphenated session IDs from task metadata blocks", () => {
+    // given
+    const output = `Task completed.\n\n<task_metadata>\nsession_id: ses_auth-flow-123\n</task_metadata>`
+
+    // when
+    const result = extractSessionIdFromOutput(output)
+
+    // then
+    expect(result).toBe("ses_auth-flow-123")
+  })
+
  test("returns undefined when no session id is present", () => {
    // given
    const output = "Task completed without metadata"
--- a/src/hooks/atlas/subagent-session-id.ts
+++ b/src/hooks/atlas/subagent-session-id.ts
@@ -7,13 +7,13 @@ export function extractSessionIdFromOutput(output: string): string | undefined {
  const taskMetadataBlocks = [...output.matchAll(/<task_metadata>([\s\S]*?)<\/task_metadata>/gi)]
  const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1]
  if (lastTaskMetadataBlock) {
-    const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_]+)/i)
+    const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_-]+)/i)
    if (taskMetadataSessionMatch) {
      return taskMetadataSessionMatch[1]
    }
  }

-  const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)]
+  const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_-]+)/g)]
  return explicitSessionMatches.at(-1)?.[1]
 }

--- a/src/hooks/atlas/tool-execute-after.ts
+++ b/src/hooks/atlas/tool-execute-after.ts
@@ -160,8 +160,8 @@ export function createToolExecuteAfterHandler(input: {
            taskLabel: currentTask.label,
            taskTitle: currentTask.title,
            sessionId: subagentSessionId,
-            agent: toolOutput.metadata?.agent as string | undefined,
-            category: toolOutput.metadata?.category as string | undefined,
+            agent: typeof toolOutput.metadata?.agent === "string" ? toolOutput.metadata.agent : undefined,
+            category: typeof toolOutput.metadata?.category === "string" ? toolOutput.metadata.category : undefined,
          })
        }