Merge pull request #2676 from code-yeongyu/fix/atlas-task-session-review-followup

fix(atlas): address review findings for task session reuse
This commit is contained in:
YeonGyu-Kim
2026-03-18 18:50:45 +09:00
committed by GitHub
5 changed files with 289 additions and 4 deletions

View File

@@ -9,6 +9,8 @@ import { dirname, join, basename } from "node:path"
import type { BoulderState, PlanProgress, TaskSessionState } from "./types"
import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants"
const RESERVED_KEYS = new Set(["__proto__", "prototype", "constructor"])
export function getBoulderFilePath(directory: string): string {
return join(directory, BOULDER_DIR, BOULDER_FILE)
}
@@ -113,6 +115,10 @@ export function upsertTaskSessionState(
return null
}
if (RESERVED_KEYS.has(input.taskKey)) {
return null
}
const taskSessions = state.task_sessions ?? {}
taskSessions[input.taskKey] = {
task_key: input.taskKey,

View File

@@ -0,0 +1,268 @@
import { describe, expect, test } from "bun:test"
import { writeFileSync } from "node:fs"
import { join } from "node:path"
import { tmpdir } from "node:os"
import { readCurrentTopLevelTask } from "./top-level-task"
function writePlanFile(fileName: string, content: string): string {
const planPath = join(tmpdir(), fileName)
writeFileSync(planPath, content, "utf-8")
return planPath
}
describe("readCurrentTopLevelTask", () => {
test("returns first unchecked top-level task in TODOs", () => {
// given
const planPath = writePlanFile(
`top-level-task-happy-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [ ] 2. Current task
## Final Verification Wave
- [ ] F1. Final review
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "todo:2",
section: "todo",
label: "2",
title: "Current task",
})
})
test("returns null when all tasks are checked", () => {
// given
const planPath = writePlanFile(
`top-level-task-all-checked-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [x] 2. Another done task
## Final Verification Wave
- [x] F1. Final done review
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toBeNull()
})
test("returns null for empty plan file", () => {
// given
const planPath = writePlanFile(`top-level-task-empty-${Date.now()}.md`, "")
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toBeNull()
})
test("returns null when plan file does not exist", () => {
// given
const planPath = join(tmpdir(), `top-level-task-missing-${Date.now()}.md`)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toBeNull()
})
test("skips nested or indented checkboxes", () => {
// given
const planPath = writePlanFile(
`top-level-task-nested-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [ ] nested should be ignored
- [ ] 2. Top-level pending
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("todo:2")
})
test("falls back to Final Verification Wave when TODOs are all checked", () => {
// given
const planPath = writePlanFile(
`top-level-task-fallback-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [x] 2. Done task
## Final Verification Wave
- [ ] F1. Final review pending
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "final-wave:f1",
section: "final-wave",
label: "F1",
title: "Final review pending",
})
})
test("selects the first unchecked task among mixed checked and unchecked TODOs", () => {
// given
const planPath = writePlanFile(
`top-level-task-mixed-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [ ] 2. First unchecked
- [ ] 3. Second unchecked
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("todo:2")
expect(result?.title).toBe("First unchecked")
})
test("ignores malformed labels and continues to next unchecked task", () => {
// given
const planPath = writePlanFile(
`top-level-task-malformed-${Date.now()}.md`,
`# Plan
## TODOs
- [ ] no number prefix
- [ ] 2. Valid task after malformed label
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "todo:2",
section: "todo",
label: "2",
title: "Valid task after malformed label",
})
})
test("supports unchecked tasks with asterisk bullets", () => {
// given
const planPath = writePlanFile(
`top-level-task-asterisk-${Date.now()}.md`,
`# Plan
## TODOs
* [ ] 1. Task using asterisk bullet
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("todo:1")
expect(result?.title).toBe("Task using asterisk bullet")
})
test("returns final-wave task when plan has only Final Verification Wave section", () => {
// given
const planPath = writePlanFile(
`top-level-task-final-only-${Date.now()}.md`,
`# Plan
## Final Verification Wave
- [ ] F2. Final-only task
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "final-wave:f2",
section: "final-wave",
label: "F2",
title: "Final-only task",
})
})
test("returns the first unchecked task when multiple unchecked tasks exist", () => {
// given
const planPath = writePlanFile(
`top-level-task-multiple-${Date.now()}.md`,
`# Plan
## TODOs
- [ ] 1. First unchecked task
- [ ] 2. Second unchecked task
- [ ] 3. Third unchecked task
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.label).toBe("1")
expect(result?.title).toBe("First unchecked task")
})
test("ignores unchecked content in non-target sections during section transitions", () => {
// given
const planPath = writePlanFile(
`top-level-task-sections-${Date.now()}.md`,
`# Plan
## Notes
- [ ] 99. Should be ignored because section is not tracked
## TODOs
- [x] 1. Done implementation task
## Decisions
- [ ] 100. Should also be ignored
## Final Verification Wave
- [ ] F3. Final verification task
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("final-wave:f3")
expect(result?.section).toBe("final-wave")
})
})

View File

@@ -25,6 +25,17 @@ describe("extractSessionIdFromOutput", () => {
expect(result).toBe("ses_sync_12345")
})
test("extracts hyphenated session IDs from task metadata blocks", () => {
// given
const output = `Task completed.\n\n<task_metadata>\nsession_id: ses_auth-flow-123\n</task_metadata>`
// when
const result = extractSessionIdFromOutput(output)
// then
expect(result).toBe("ses_auth-flow-123")
})
test("returns undefined when no session id is present", () => {
// given
const output = "Task completed without metadata"

View File

@@ -7,13 +7,13 @@ export function extractSessionIdFromOutput(output: string): string | undefined {
const taskMetadataBlocks = [...output.matchAll(/<task_metadata>([\s\S]*?)<\/task_metadata>/gi)]
const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1]
if (lastTaskMetadataBlock) {
const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_]+)/i)
const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_-]+)/i)
if (taskMetadataSessionMatch) {
return taskMetadataSessionMatch[1]
}
}
const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)]
const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_-]+)/g)]
return explicitSessionMatches.at(-1)?.[1]
}

View File

@@ -160,8 +160,8 @@ export function createToolExecuteAfterHandler(input: {
taskLabel: currentTask.label,
taskTitle: currentTask.title,
sessionId: subagentSessionId,
agent: toolOutput.metadata?.agent as string | undefined,
category: toolOutput.metadata?.category as string | undefined,
agent: typeof toolOutput.metadata?.agent === "string" ? toolOutput.metadata.agent : undefined,
category: typeof toolOutput.metadata?.category === "string" ? toolOutput.metadata.category : undefined,
})
}