fix(atlas): enforce checkbox completion before next task
🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
@@ -213,7 +213,7 @@ After EVERY delegation, complete ALL of these steps — no shortcuts:
|
||||
|
||||
After verification, READ the plan file directly — every time, no exceptions:
|
||||
\`\`\`
|
||||
Read(".sisyphus/tasks/{plan-name}.yaml")
|
||||
Read(".sisyphus/plans/{plan-name}.md")
|
||||
\`\`\`
|
||||
Count remaining \`- [ ]\` tasks. This is your ground truth for what comes next.
|
||||
|
||||
@@ -335,7 +335,7 @@ task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4..
|
||||
\`\`\`
|
||||
|
||||
**Path convention**:
|
||||
- Plan: \`.sisyphus/plans/{name}.md\` (READ ONLY)
|
||||
- Plan: \`.sisyphus/plans/{name}.md\` (you may EDIT to mark checkboxes)
|
||||
- Notepad: \`.sisyphus/notepads/{name}/\` (READ/APPEND)
|
||||
</notepad_protocol>
|
||||
|
||||
@@ -372,6 +372,7 @@ You are the QA gate. Subagents lie. Verify EVERYTHING.
|
||||
- Use lsp_diagnostics, grep, glob
|
||||
- Manage todos
|
||||
- Coordinate and verify
|
||||
- **EDIT \`.sisyphus\/plans\/*.md\` to change \`- [ ]\` to \`- [x]\` after verified task completion**
|
||||
|
||||
**YOU DELEGATE**:
|
||||
- All code writing/editing
|
||||
@@ -403,6 +404,20 @@ You are the QA gate. Subagents lie. Verify EVERYTHING.
|
||||
- **Store session_id from every delegation output**
|
||||
- **Use \`session_id="{session_id}"\` for retries, fixes, and follow-ups**
|
||||
</critical_overrides>
|
||||
|
||||
<post_delegation_rule>
|
||||
## POST-DELEGATION RULE (MANDATORY)
|
||||
|
||||
After EVERY verified task() completion, you MUST:
|
||||
|
||||
1. **EDIT the plan checkbox**: Change \`- [ ]\` to \`- [x]\` for the completed task in \`.sisyphus/plans/{plan-name}.md\`
|
||||
|
||||
2. **READ the plan to confirm**: Read \`.sisyphus/plans/{plan-name}.md\` and verify the checkbox count changed (fewer \`- [ ]\` remaining)
|
||||
|
||||
3. **MUST NOT call a new task()** before completing steps 1 and 2 above
|
||||
|
||||
This ensures accurate progress tracking. Skip this and you lose visibility into what remains.
|
||||
</post_delegation_rule>
|
||||
`
|
||||
|
||||
export function getDefaultAtlasPrompt(): string {
|
||||
|
||||
@@ -309,7 +309,7 @@ task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3..
|
||||
- Instruct subagent to append findings (never overwrite)
|
||||
|
||||
**Paths**:
|
||||
- Plan: \`.sisyphus/plans/{name}.md\` (READ ONLY)
|
||||
- Plan: \`.sisyphus\/plans\/{name}.md\` (you may EDIT to mark checkboxes)
|
||||
- Notepad: \`.sisyphus/notepads/{name}/\` (READ/APPEND)
|
||||
</notepad_protocol>
|
||||
|
||||
@@ -343,6 +343,7 @@ Subagents CLAIM "done" when:
|
||||
- Use lsp_diagnostics, grep, glob
|
||||
- Manage todos
|
||||
- Coordinate and verify
|
||||
- **EDIT \`.sisyphus\/plans\/*.md\` to change \`- [ ]\` to \`- [x]\` after verified task completion**
|
||||
|
||||
**YOU DELEGATE (NO EXCEPTIONS):**
|
||||
- All code writing/editing
|
||||
@@ -373,6 +374,20 @@ Subagents CLAIM "done" when:
|
||||
- Store and reuse session_id for retries
|
||||
- **USE TOOL CALLS for verification — not internal reasoning**
|
||||
</critical_rules>
|
||||
|
||||
<post_delegation_rule>
|
||||
## POST-DELEGATION RULE (MANDATORY)
|
||||
|
||||
After EVERY verified task() completion, you MUST:
|
||||
|
||||
1. **EDIT the plan checkbox**: Change \`- [ ]\` to \`- [x]\` for the completed task in \`.sisyphus/plans/{plan-name}.md\`
|
||||
|
||||
2. **READ the plan to confirm**: Read \`.sisyphus/plans/{plan-name}.md\` and verify the checkbox count changed (fewer \`- [ ]\` remaining)
|
||||
|
||||
3. **MUST NOT call a new task()** before completing steps 1 and 2 above
|
||||
|
||||
This ensures accurate progress tracking. Skip this and you lose visibility into what remains.
|
||||
</post_delegation_rule>
|
||||
`
|
||||
|
||||
export function getGeminiAtlasPrompt(): string {
|
||||
|
||||
@@ -313,7 +313,7 @@ task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3..
|
||||
- Instruct subagent to append findings (never overwrite)
|
||||
|
||||
**Paths**:
|
||||
- Plan: \`.sisyphus/plans/{name}.md\` (READ ONLY)
|
||||
- Plan: \`.sisyphus/plans/{name}.md\` (you may EDIT to mark checkboxes)
|
||||
- Notepad: \`.sisyphus/notepads/{name}/\` (READ/APPEND)
|
||||
</notepad_protocol>
|
||||
|
||||
@@ -348,6 +348,7 @@ Your job is to CATCH THEM. Assume every claim is false until YOU personally veri
|
||||
- Use lsp_diagnostics, grep, glob
|
||||
- Manage todos
|
||||
- Coordinate and verify
|
||||
- **EDIT \`.sisyphus\/plans\/*.md\` to change \`- [ ]\` to \`- [x]\` after verified task completion**
|
||||
|
||||
**YOU DELEGATE**:
|
||||
- All code writing/editing
|
||||
@@ -376,15 +377,19 @@ Your job is to CATCH THEM. Assume every claim is false until YOU personally veri
|
||||
- Store and reuse session_id for retries
|
||||
</critical_rules>
|
||||
|
||||
<user_updates_spec>
|
||||
- Send brief updates (1-2 sentences) only when:
|
||||
- Starting a new major phase
|
||||
- Discovering something that changes the plan
|
||||
- Avoid narrating routine tool calls
|
||||
- Each update must include a concrete outcome ("Found X", "Verified Y", "Delegated Z")
|
||||
- Keep updates varied in structure — don't start each the same way
|
||||
- Do NOT expand task scope; if you notice new work, call it out as optional
|
||||
</user_updates_spec>
|
||||
<post_delegation_rule>
|
||||
## POST-DELEGATION RULE (MANDATORY)
|
||||
|
||||
After EVERY verified task() completion, you MUST:
|
||||
|
||||
1. **EDIT the plan checkbox**: Change \`- [ ]\` to \`- [x]\` for the completed task in \`.sisyphus/plans/{plan-name}.md\`
|
||||
|
||||
2. **READ the plan to confirm**: Read \`.sisyphus/plans/{plan-name}.md\` and verify the checkbox count changed (fewer \`- [ ]\` remaining)
|
||||
|
||||
3. **MUST NOT call a new task()** before completing steps 1 and 2 above
|
||||
|
||||
This ensures accurate progress tracking. Skip this and you lose visibility into what remains.
|
||||
</post_delegation_rule>
|
||||
`;
|
||||
|
||||
export function getGptAtlasPrompt(): string {
|
||||
|
||||
155
src/agents/atlas/prompt-checkbox-enforcement.test.ts
Normal file
155
src/agents/atlas/prompt-checkbox-enforcement.test.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
import { describe, test, expect } from "bun:test"
|
||||
import { ATLAS_SYSTEM_PROMPT } from "./default"
|
||||
import { ATLAS_GPT_SYSTEM_PROMPT } from "./gpt"
|
||||
import { ATLAS_GEMINI_SYSTEM_PROMPT } from "./gemini"
|
||||
|
||||
describe("ATLAS prompt checkbox enforcement", () => {
|
||||
describe("default prompt", () => {
|
||||
test("plan should NOT be marked (READ ONLY)", () => {
|
||||
// given
|
||||
const prompt = ATLAS_SYSTEM_PROMPT
|
||||
|
||||
// when / then
|
||||
expect(prompt).not.toMatch(/\(READ ONLY\)/)
|
||||
})
|
||||
|
||||
test("plan description should include EDIT for checkboxes", () => {
|
||||
// given
|
||||
const prompt = ATLAS_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/edit.*checkbox|checkbox.*edit/)
|
||||
})
|
||||
|
||||
test("boundaries should include exception for editing .sisyphus/plans/*.md checkboxes", () => {
|
||||
// given
|
||||
const prompt = ATLAS_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/\.sisyphus\/plans\/\*\.md/)
|
||||
expect(lowerPrompt).toMatch(/checkbox/)
|
||||
})
|
||||
|
||||
test("prompt should include POST-DELEGATION RULE", () => {
|
||||
// given
|
||||
const prompt = ATLAS_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/post-delegation/)
|
||||
})
|
||||
|
||||
test("prompt should include MUST NOT call a new task() before", () => {
|
||||
// given
|
||||
const prompt = ATLAS_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/must not.*call.*new.*task/)
|
||||
})
|
||||
|
||||
test("default prompt should NOT reference .sisyphus/tasks/", () => {
|
||||
// given
|
||||
const prompt = ATLAS_SYSTEM_PROMPT
|
||||
|
||||
// when / then
|
||||
expect(prompt).not.toMatch(/\.sisyphus\/tasks\//)
|
||||
})
|
||||
})
|
||||
|
||||
describe("GPT prompt", () => {
|
||||
test("plan should NOT be marked (READ ONLY)", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GPT_SYSTEM_PROMPT
|
||||
|
||||
// when / then
|
||||
expect(prompt).not.toMatch(/\(READ ONLY\)/)
|
||||
})
|
||||
|
||||
test("plan description should include EDIT for checkboxes", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GPT_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/edit.*checkbox|checkbox.*edit/)
|
||||
})
|
||||
|
||||
test("boundaries should include exception for editing .sisyphus/plans/*.md checkboxes", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GPT_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/\.sisyphus\/plans\/\*\.md/)
|
||||
expect(lowerPrompt).toMatch(/checkbox/)
|
||||
})
|
||||
|
||||
test("prompt should include POST-DELEGATION RULE", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GPT_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/post-delegation/)
|
||||
})
|
||||
|
||||
test("prompt should include MUST NOT call a new task() before", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GPT_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/must not.*call.*new.*task/)
|
||||
})
|
||||
})
|
||||
|
||||
describe("Gemini prompt", () => {
|
||||
test("plan should NOT be marked (READ ONLY)", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
|
||||
|
||||
// when / then
|
||||
expect(prompt).not.toMatch(/\(READ ONLY\)/)
|
||||
})
|
||||
|
||||
test("plan description should include EDIT for checkboxes", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/edit.*checkbox|checkbox.*edit/)
|
||||
})
|
||||
|
||||
test("boundaries should include exception for editing .sisyphus/plans/*.md checkboxes", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/\.sisyphus\/plans\/\*\.md/)
|
||||
expect(lowerPrompt).toMatch(/checkbox/)
|
||||
})
|
||||
|
||||
test("prompt should include POST-DELEGATION RULE", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/post-delegation/)
|
||||
})
|
||||
|
||||
test("prompt should include MUST NOT call a new task() before", () => {
|
||||
// given
|
||||
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
|
||||
const lowerPrompt = prompt.toLowerCase()
|
||||
|
||||
// when / then
|
||||
expect(lowerPrompt).toMatch(/must not.*call.*new.*task/)
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -409,6 +409,123 @@ describe("atlas hook", () => {
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
describe("completion gate output ordering", () => {
|
||||
const COMPLETION_GATE_SESSION = "completion-gate-order-test"
|
||||
|
||||
beforeEach(() => {
|
||||
setupMessageStorage(COMPLETION_GATE_SESSION, "atlas")
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
cleanupMessageStorage(COMPLETION_GATE_SESSION)
|
||||
})
|
||||
|
||||
test("should include completion gate before Subagent Response in transformed boulder output", async () => {
|
||||
// given - Atlas caller with boulder state
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed successfully",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID: COMPLETION_GATE_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// then - completion gate should appear BEFORE Subagent Response
|
||||
const subagentResponseIndex = output.output.indexOf("**Subagent Response:**")
|
||||
const completionGateIndex = output.output.indexOf("COMPLETION GATE")
|
||||
|
||||
expect(completionGateIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(subagentResponseIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(completionGateIndex).toBeLessThan(subagentResponseIndex)
|
||||
})
|
||||
|
||||
test("should include completion gate before verification phase text", async () => {
|
||||
// given - Atlas caller with boulder state
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed successfully",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID: COMPLETION_GATE_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// then - completion gate should appear BEFORE verification phase text
|
||||
const completionGateIndex = output.output.indexOf("COMPLETION GATE")
|
||||
const lyingIndex = output.output.indexOf("LYING")
|
||||
const phase1Index = output.output.indexOf("PHASE 1")
|
||||
|
||||
expect(completionGateIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(lyingIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(completionGateIndex).toBeLessThan(lyingIndex)
|
||||
if (phase1Index !== -1) {
|
||||
expect(completionGateIndex).toBeLessThan(phase1Index)
|
||||
}
|
||||
})
|
||||
|
||||
test("should not contain old STEP 7 MARK COMPLETION IN PLAN FILE text", async () => {
|
||||
// given - Atlas caller with boulder state
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed successfully",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID: COMPLETION_GATE_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// then - old STEP 7 MARK COMPLETION IN PLAN FILE should be absent
|
||||
expect(output.output).not.toContain("STEP 7: MARK COMPLETION IN PLAN FILE")
|
||||
expect(output.output).not.toContain("MARK COMPLETION IN PLAN FILE")
|
||||
})
|
||||
})
|
||||
|
||||
describe("Write/Edit tool direct work reminder", () => {
|
||||
const ORCHESTRATOR_SESSION = "orchestrator-write-test"
|
||||
|
||||
|
||||
37
src/hooks/atlas/system-reminder-templates.test.ts
Normal file
37
src/hooks/atlas/system-reminder-templates.test.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { describe, it, expect } from "bun:test"
|
||||
import { BOULDER_CONTINUATION_PROMPT } from "./system-reminder-templates"
|
||||
|
||||
describe("BOULDER_CONTINUATION_PROMPT", () => {
|
||||
describe("checkbox-first priority rules", () => {
|
||||
it("first rule after RULES: mentions both reading the plan AND marking a still-unchecked completed task", () => {
|
||||
const rulesSection = BOULDER_CONTINUATION_PROMPT.split("RULES:")[1]!
|
||||
const firstRule = rulesSection.split("\n")[1]!.trim()
|
||||
|
||||
expect(firstRule).toContain("Read the plan")
|
||||
expect(firstRule).toContain("mark")
|
||||
expect(firstRule).toContain("completed")
|
||||
})
|
||||
|
||||
it("first rule includes IMMEDIATELY keyword", () => {
|
||||
const rulesSection = BOULDER_CONTINUATION_PROMPT.split("RULES:")[1]!
|
||||
const firstRule = rulesSection.split("\n")[1]!.trim()
|
||||
|
||||
expect(firstRule).toContain("IMMEDIATELY")
|
||||
})
|
||||
|
||||
it("checkbox-marking guidance appears BEFORE Proceed without asking for permission", () => {
|
||||
const rulesSection = BOULDER_CONTINUATION_PROMPT.split("RULES:")[1]!
|
||||
|
||||
const checkboxMarkingMatch = rulesSection.match(/- \[x\]/i)
|
||||
const proceedMatch = rulesSection.match(/Proceed without asking for permission/)
|
||||
|
||||
expect(checkboxMarkingMatch).not.toBeNull()
|
||||
expect(proceedMatch).not.toBeNull()
|
||||
|
||||
const checkboxPosition = checkboxMarkingMatch!.index
|
||||
const proceedPosition = proceedMatch!.index
|
||||
|
||||
expect(checkboxPosition).toBeLessThan(proceedPosition)
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -33,9 +33,8 @@ export const BOULDER_CONTINUATION_PROMPT = `${createSystemDirective(SystemDirect
|
||||
You have an active work plan with incomplete tasks. Continue working.
|
||||
|
||||
RULES:
|
||||
- **FIRST**: Read the plan file NOW to check exact current progress — count remaining \`- [ ]\` tasks
|
||||
- **FIRST**: Read the plan file NOW. If the last completed task is still unchecked, mark it \`- [x]\` IMMEDIATELY before anything else
|
||||
- Proceed without asking for permission
|
||||
- Change \`- [ ]\` to \`- [x]\` in the plan file when done
|
||||
- Use the notepad at .sisyphus/notepads/{PLAN_NAME}/ to record learnings
|
||||
- Do not stop until all tasks are complete
|
||||
- If blocked, document the blocker and move to the next task`
|
||||
|
||||
@@ -7,7 +7,7 @@ import { HOOK_NAME } from "./hook-name"
|
||||
import { DIRECT_WORK_REMINDER } from "./system-reminder-templates"
|
||||
import { isSisyphusPath } from "./sisyphus-path"
|
||||
import { extractSessionIdFromOutput } from "./subagent-session-id"
|
||||
import { buildOrchestratorReminder, buildStandaloneVerificationReminder } from "./verification-reminders"
|
||||
import { buildCompletionGate, buildOrchestratorReminder, buildStandaloneVerificationReminder } from "./verification-reminders"
|
||||
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
|
||||
import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types"
|
||||
|
||||
@@ -76,7 +76,11 @@ export function createToolExecuteAfterHandler(input: {
|
||||
// Preserve original subagent response - critical for debugging failed tasks
|
||||
const originalResponse = toolOutput.output
|
||||
|
||||
toolOutput.output = `
|
||||
toolOutput.output = `
|
||||
<system-reminder>
|
||||
${buildCompletionGate(boulderState.plan_name, subagentSessionId)}
|
||||
</system-reminder>
|
||||
|
||||
## SUBAGENT WORK COMPLETED
|
||||
|
||||
${fileChanges}
|
||||
@@ -88,7 +92,7 @@ ${fileChanges}
|
||||
${originalResponse}
|
||||
|
||||
<system-reminder>
|
||||
${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit)}
|
||||
${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)}
|
||||
</system-reminder>`
|
||||
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
|
||||
plan: boulderState.plan_name,
|
||||
|
||||
94
src/hooks/atlas/verification-reminders.test.ts
Normal file
94
src/hooks/atlas/verification-reminders.test.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import { describe, expect, it } from "bun:test"
|
||||
import { buildOrchestratorReminder, buildCompletionGate } from "./verification-reminders"
|
||||
|
||||
// Test helpers for given/when/then pattern
|
||||
const given = describe
|
||||
const when = describe
|
||||
const then = it
|
||||
|
||||
describe("buildCompletionGate", () => {
|
||||
given("a plan name and session id", () => {
|
||||
const planName = "test-plan"
|
||||
const sessionId = "test-session-123"
|
||||
|
||||
when("buildCompletionGate is called", () => {
|
||||
const gate = buildCompletionGate(planName, sessionId)
|
||||
|
||||
then("completion gate text is present", () => {
|
||||
expect(gate).toContain("COMPLETION GATE")
|
||||
})
|
||||
|
||||
then("gate appears before verification phase text", () => {
|
||||
const gateIndex = gate.indexOf("COMPLETION GATE")
|
||||
const verificationIndex = gate.indexOf("VERIFICATION_REMINDER")
|
||||
expect(gateIndex).toBeLessThan(verificationIndex)
|
||||
})
|
||||
|
||||
then("gate interpolates the plan name path", () => {
|
||||
expect(gate).toContain(planName)
|
||||
expect(gate).toContain(`.sisyphus/plans/${planName}.md`)
|
||||
})
|
||||
|
||||
then("gate includes Edit instructions", () => {
|
||||
expect(gate.toLowerCase()).toContain("edit")
|
||||
})
|
||||
|
||||
then("gate includes Read instructions", () => {
|
||||
expect(gate.toLowerCase()).toContain("read")
|
||||
})
|
||||
|
||||
then("old STEP 7 MARK COMPLETION text is absent", () => {
|
||||
expect(gate).not.toContain("STEP 7")
|
||||
expect(gate).not.toContain("MARK COMPLETION IN PLAN FILE")
|
||||
})
|
||||
|
||||
then("step numbering remains consecutive after removal", () => {
|
||||
const stepMatches = gate.match(/STEP \d+:/g) ?? []
|
||||
if (stepMatches.length > 1) {
|
||||
const numbers = stepMatches.map((s: string) => parseInt(s.match(/\d+/)?.[0] ?? "0"))
|
||||
for (let i = 1; i < numbers.length; i++) {
|
||||
expect(numbers[i]).toBe(numbers[i - 1] + 1)
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("buildOrchestratorReminder", () => {
|
||||
given("progress with completed tasks", () => {
|
||||
const planName = "my-test-plan"
|
||||
const sessionId = "session-abc"
|
||||
const progress = { total: 10, completed: 3 }
|
||||
|
||||
when("buildOrchestratorReminder is called with autoCommit true", () => {
|
||||
const reminder = buildOrchestratorReminder(planName, progress, sessionId, true)
|
||||
|
||||
then("old STEP 7 MARK COMPLETION IN PLAN FILE text is absent", () => {
|
||||
expect(reminder).not.toContain("STEP 7: MARK COMPLETION IN PLAN FILE")
|
||||
})
|
||||
|
||||
then("completion gate appears before verification reminder", () => {
|
||||
const gateIndex = reminder.indexOf("COMPLETION GATE")
|
||||
const verificationIndex = reminder.indexOf("VERIFICATION_REMINDER")
|
||||
expect(gateIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(gateIndex).toBeLessThan(verificationIndex)
|
||||
})
|
||||
})
|
||||
|
||||
when("buildOrchestratorReminder is called with autoCommit false", () => {
|
||||
const reminder = buildOrchestratorReminder(planName, progress, sessionId, false)
|
||||
|
||||
then("old STEP 7 MARK COMPLETION IN PLAN FILE text is absent", () => {
|
||||
expect(reminder).not.toContain("STEP 7: MARK COMPLETION IN PLAN FILE")
|
||||
})
|
||||
|
||||
then("completion gate appears before verification reminder", () => {
|
||||
const gateIndex = reminder.indexOf("COMPLETION GATE")
|
||||
const verificationIndex = reminder.indexOf("VERIFICATION_REMINDER")
|
||||
expect(gateIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(gateIndex).toBeLessThan(verificationIndex)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -1,7 +1,37 @@
|
||||
import { VERIFICATION_REMINDER } from "./system-reminder-templates"
|
||||
|
||||
export function buildCompletionGate(planName: string, sessionId: string): string {
|
||||
return `
|
||||
**COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE**
|
||||
|
||||
Your completion will NOT be recorded until you complete ALL of the following:
|
||||
|
||||
1. **Edit** the plan file \`.sisyphus/plans/${planName}.md\`:
|
||||
- Change \`- [ ]\` to \`- [x]\` for the completed task
|
||||
- Use \`Edit\` tool to modify the checkbox
|
||||
|
||||
2. **Read** the plan file AGAIN:
|
||||
\`\`\`
|
||||
Read(".sisyphus/plans/${planName}.md")
|
||||
\`\`\`
|
||||
- Verify the checkbox count changed (more \`- [x]\` than before)
|
||||
|
||||
3. **DO NOT call \`task()\` again** until you have completed steps 1 and 2 above.
|
||||
|
||||
If anything fails while closing this out, resume the same session immediately:
|
||||
\`\`\`typescript
|
||||
task(session_id="${sessionId}", prompt="fix: checkbox not recorded correctly")
|
||||
\`\`\`
|
||||
|
||||
**Your completion is NOT tracked until the checkbox is marked in the plan file.**
|
||||
|
||||
**VERIFICATION_REMINDER**`
|
||||
}
|
||||
|
||||
function buildVerificationReminder(sessionId: string): string {
|
||||
return `${VERIFICATION_REMINDER}
|
||||
return `**VERIFICATION_REMINDER**
|
||||
|
||||
${VERIFICATION_REMINDER}
|
||||
|
||||
---
|
||||
|
||||
@@ -15,20 +45,21 @@ export function buildOrchestratorReminder(
|
||||
planName: string,
|
||||
progress: { total: number; completed: number },
|
||||
sessionId: string,
|
||||
autoCommit: boolean = true
|
||||
autoCommit: boolean = true,
|
||||
includeCompletionGate: boolean = true
|
||||
): string {
|
||||
const remaining = progress.total - progress.completed
|
||||
|
||||
|
||||
const commitStep = autoCommit
|
||||
? `
|
||||
**STEP 8: COMMIT ATOMIC UNIT**
|
||||
**STEP 7: COMMIT ATOMIC UNIT**
|
||||
|
||||
- Stage ONLY the verified changes
|
||||
- Commit with clear message describing what was done
|
||||
`
|
||||
: ""
|
||||
|
||||
const nextStepNumber = autoCommit ? 9 : 8
|
||||
const nextStepNumber = autoCommit ? 8 : 7
|
||||
|
||||
return `
|
||||
---
|
||||
@@ -37,7 +68,9 @@ export function buildOrchestratorReminder(
|
||||
|
||||
---
|
||||
|
||||
${buildVerificationReminder(sessionId)}
|
||||
${includeCompletionGate ? `${buildCompletionGate(planName, sessionId)}
|
||||
|
||||
` : ""}${buildVerificationReminder(sessionId)}
|
||||
|
||||
**STEP 5: READ SUBAGENT NOTEPAD (LEARNINGS, ISSUES, PROBLEMS)**
|
||||
|
||||
@@ -64,22 +97,13 @@ Read(".sisyphus/plans/${planName}.md")
|
||||
Count exactly: how many \`- [ ]\` remain? How many \`- [x]\` completed?
|
||||
This is YOUR ground truth. Use it to decide what comes next.
|
||||
|
||||
**STEP 7: MARK COMPLETION IN PLAN FILE (IMMEDIATELY)**
|
||||
|
||||
RIGHT NOW - Do not delay. Verification passed → Mark IMMEDIATELY.
|
||||
|
||||
Update the plan file \`.sisyphus/plans/${planName}.md\`:
|
||||
- Change \`- [ ]\` to \`- [x]\` for the completed task
|
||||
- Use \`Edit\` tool to modify the checkbox
|
||||
|
||||
**DO THIS BEFORE ANYTHING ELSE. Unmarked = Untracked = Lost progress.**
|
||||
${commitStep}
|
||||
**STEP ${nextStepNumber}: PROCEED TO NEXT TASK**
|
||||
|
||||
- Read the plan file AGAIN to identify the next \`- [ ]\` task
|
||||
- Start immediately - DO NOT STOP
|
||||
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
|
||||
**${remaining} tasks remain. Keep bouldering.**`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user