fix(atlas): enforce checkbox completion before next task

🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
2026-03-07 18:56:07 +09:00
parent 532143c5f4
commit bf8d0ffcc0
10 changed files with 499 additions and 34 deletions
--- a/src/agents/atlas/default.ts
+++ b/src/agents/atlas/default.ts
@@ -213,7 +213,7 @@ After EVERY delegation, complete ALL of these steps — no shortcuts:

 After verification, READ the plan file directly — every time, no exceptions:
 \`\`\`
-Read(".sisyphus/tasks/{plan-name}.yaml")
+Read(".sisyphus/plans/{plan-name}.md")
 \`\`\`
 Count remaining \`- [ ]\` tasks. This is your ground truth for what comes next.

@@ -335,7 +335,7 @@ task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4..
 \`\`\`

 **Path convention**:
- Plan: \`.sisyphus/plans/{name}.md\` (READ ONLY)
+- Plan: \`.sisyphus/plans/{name}.md\` (you may EDIT to mark checkboxes)
 - Notepad: \`.sisyphus/notepads/{name}/\` (READ/APPEND)
 </notepad_protocol>

@@ -372,6 +372,7 @@ You are the QA gate. Subagents lie. Verify EVERYTHING.
 - Use lsp_diagnostics, grep, glob
 - Manage todos
 - Coordinate and verify
+- **EDIT \`.sisyphus\/plans\/*.md\` to change \`- [ ]\` to \`- [x]\` after verified task completion**

 **YOU DELEGATE**:
 - All code writing/editing
@@ -403,6 +404,20 @@ You are the QA gate. Subagents lie. Verify EVERYTHING.
 - **Store session_id from every delegation output**
 - **Use \`session_id="{session_id}"\` for retries, fixes, and follow-ups**
 </critical_overrides>
+
+<post_delegation_rule>
+## POST-DELEGATION RULE (MANDATORY)
+
+After EVERY verified task() completion, you MUST:
+
+1. **EDIT the plan checkbox**: Change \`- [ ]\` to \`- [x]\` for the completed task in \`.sisyphus/plans/{plan-name}.md\`
+
+2. **READ the plan to confirm**: Read \`.sisyphus/plans/{plan-name}.md\` and verify the checkbox count changed (fewer \`- [ ]\` remaining)
+
+3. **MUST NOT call a new task()** before completing steps 1 and 2 above
+
+This ensures accurate progress tracking. Skip this and you lose visibility into what remains.
+</post_delegation_rule>
 `

 export function getDefaultAtlasPrompt(): string {
--- a/src/agents/atlas/gemini.ts
+++ b/src/agents/atlas/gemini.ts
@@ -309,7 +309,7 @@ task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3..
 - Instruct subagent to append findings (never overwrite)

 **Paths**:
- Plan: \`.sisyphus/plans/{name}.md\` (READ ONLY)
+- Plan: \`.sisyphus\/plans\/{name}.md\` (you may EDIT to mark checkboxes)
 - Notepad: \`.sisyphus/notepads/{name}/\` (READ/APPEND)
 </notepad_protocol>

@@ -343,6 +343,7 @@ Subagents CLAIM "done" when:
 - Use lsp_diagnostics, grep, glob
 - Manage todos
 - Coordinate and verify
+- **EDIT \`.sisyphus\/plans\/*.md\` to change \`- [ ]\` to \`- [x]\` after verified task completion**

 **YOU DELEGATE (NO EXCEPTIONS):**
 - All code writing/editing
@@ -373,6 +374,20 @@ Subagents CLAIM "done" when:
 - Store and reuse session_id for retries
 - **USE TOOL CALLS for verification — not internal reasoning**
 </critical_rules>
+
+<post_delegation_rule>
+## POST-DELEGATION RULE (MANDATORY)
+
+After EVERY verified task() completion, you MUST:
+
+1. **EDIT the plan checkbox**: Change \`- [ ]\` to \`- [x]\` for the completed task in \`.sisyphus/plans/{plan-name}.md\`
+
+2. **READ the plan to confirm**: Read \`.sisyphus/plans/{plan-name}.md\` and verify the checkbox count changed (fewer \`- [ ]\` remaining)
+
+3. **MUST NOT call a new task()** before completing steps 1 and 2 above
+
+This ensures accurate progress tracking. Skip this and you lose visibility into what remains.
+</post_delegation_rule>
 `

 export function getGeminiAtlasPrompt(): string {
--- a/src/agents/atlas/gpt.ts
+++ b/src/agents/atlas/gpt.ts
@@ -313,7 +313,7 @@ task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3..
 - Instruct subagent to append findings (never overwrite)

 **Paths**:
- Plan: \`.sisyphus/plans/{name}.md\` (READ ONLY)
+- Plan: \`.sisyphus/plans/{name}.md\` (you may EDIT to mark checkboxes)
 - Notepad: \`.sisyphus/notepads/{name}/\` (READ/APPEND)
 </notepad_protocol>

@@ -348,6 +348,7 @@ Your job is to CATCH THEM. Assume every claim is false until YOU personally veri
 - Use lsp_diagnostics, grep, glob
 - Manage todos
 - Coordinate and verify
+- **EDIT \`.sisyphus\/plans\/*.md\` to change \`- [ ]\` to \`- [x]\` after verified task completion**

 **YOU DELEGATE**:
 - All code writing/editing
@@ -376,15 +377,19 @@ Your job is to CATCH THEM. Assume every claim is false until YOU personally veri
 - Store and reuse session_id for retries
 </critical_rules>

-<user_updates_spec>
- Send brief updates (1-2 sentences) only when:
-  - Starting a new major phase
-  - Discovering something that changes the plan
- Avoid narrating routine tool calls
- Each update must include a concrete outcome ("Found X", "Verified Y", "Delegated Z")
- Keep updates varied in structure — don't start each the same way
- Do NOT expand task scope; if you notice new work, call it out as optional
-</user_updates_spec>
+<post_delegation_rule>
+## POST-DELEGATION RULE (MANDATORY)
+
+After EVERY verified task() completion, you MUST:
+
+1. **EDIT the plan checkbox**: Change \`- [ ]\` to \`- [x]\` for the completed task in \`.sisyphus/plans/{plan-name}.md\`
+
+2. **READ the plan to confirm**: Read \`.sisyphus/plans/{plan-name}.md\` and verify the checkbox count changed (fewer \`- [ ]\` remaining)
+
+3. **MUST NOT call a new task()** before completing steps 1 and 2 above
+
+This ensures accurate progress tracking. Skip this and you lose visibility into what remains.
+</post_delegation_rule>
 `;

 export function getGptAtlasPrompt(): string {
--- a/src/agents/atlas/prompt-checkbox-enforcement.test.ts
+++ b/src/agents/atlas/prompt-checkbox-enforcement.test.ts
@@ -0,0 +1,155 @@
+import { describe, test, expect } from "bun:test"
+import { ATLAS_SYSTEM_PROMPT } from "./default"
+import { ATLAS_GPT_SYSTEM_PROMPT } from "./gpt"
+import { ATLAS_GEMINI_SYSTEM_PROMPT } from "./gemini"
+
+describe("ATLAS prompt checkbox enforcement", () => {
+  describe("default prompt", () => {
+    test("plan should NOT be marked (READ ONLY)", () => {
+      // given
+      const prompt = ATLAS_SYSTEM_PROMPT
+
+      // when / then
+      expect(prompt).not.toMatch(/\(READ ONLY\)/)
+    })
+
+    test("plan description should include EDIT for checkboxes", () => {
+      // given
+      const prompt = ATLAS_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/edit.*checkbox|checkbox.*edit/)
+    })
+
+    test("boundaries should include exception for editing .sisyphus/plans/*.md checkboxes", () => {
+      // given
+      const prompt = ATLAS_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/\.sisyphus\/plans\/\*\.md/)
+      expect(lowerPrompt).toMatch(/checkbox/)
+    })
+
+    test("prompt should include POST-DELEGATION RULE", () => {
+      // given
+      const prompt = ATLAS_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/post-delegation/)
+    })
+
+    test("prompt should include MUST NOT call a new task() before", () => {
+      // given
+      const prompt = ATLAS_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/must not.*call.*new.*task/)
+    })
+
+    test("default prompt should NOT reference .sisyphus/tasks/", () => {
+      // given
+      const prompt = ATLAS_SYSTEM_PROMPT
+
+      // when / then
+      expect(prompt).not.toMatch(/\.sisyphus\/tasks\//)
+    })
+  })
+
+  describe("GPT prompt", () => {
+    test("plan should NOT be marked (READ ONLY)", () => {
+      // given
+      const prompt = ATLAS_GPT_SYSTEM_PROMPT
+
+      // when / then
+      expect(prompt).not.toMatch(/\(READ ONLY\)/)
+    })
+
+    test("plan description should include EDIT for checkboxes", () => {
+      // given
+      const prompt = ATLAS_GPT_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/edit.*checkbox|checkbox.*edit/)
+    })
+
+    test("boundaries should include exception for editing .sisyphus/plans/*.md checkboxes", () => {
+      // given
+      const prompt = ATLAS_GPT_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/\.sisyphus\/plans\/\*\.md/)
+      expect(lowerPrompt).toMatch(/checkbox/)
+    })
+
+    test("prompt should include POST-DELEGATION RULE", () => {
+      // given
+      const prompt = ATLAS_GPT_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/post-delegation/)
+    })
+
+    test("prompt should include MUST NOT call a new task() before", () => {
+      // given
+      const prompt = ATLAS_GPT_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/must not.*call.*new.*task/)
+    })
+  })
+
+  describe("Gemini prompt", () => {
+    test("plan should NOT be marked (READ ONLY)", () => {
+      // given
+      const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
+
+      // when / then
+      expect(prompt).not.toMatch(/\(READ ONLY\)/)
+    })
+
+    test("plan description should include EDIT for checkboxes", () => {
+      // given
+      const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/edit.*checkbox|checkbox.*edit/)
+    })
+
+    test("boundaries should include exception for editing .sisyphus/plans/*.md checkboxes", () => {
+      // given
+      const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/\.sisyphus\/plans\/\*\.md/)
+      expect(lowerPrompt).toMatch(/checkbox/)
+    })
+
+    test("prompt should include POST-DELEGATION RULE", () => {
+      // given
+      const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/post-delegation/)
+    })
+
+    test("prompt should include MUST NOT call a new task() before", () => {
+      // given
+      const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
+      const lowerPrompt = prompt.toLowerCase()
+
+      // when / then
+      expect(lowerPrompt).toMatch(/must not.*call.*new.*task/)
+    })
+  })
+})
--- a/src/hooks/atlas/index.test.ts
+++ b/src/hooks/atlas/index.test.ts
@@ -409,6 +409,123 @@ describe("atlas hook", () => {
      cleanupMessageStorage(sessionID)
    })

+    describe("completion gate output ordering", () => {
+      const COMPLETION_GATE_SESSION = "completion-gate-order-test"
+
+      beforeEach(() => {
+        setupMessageStorage(COMPLETION_GATE_SESSION, "atlas")
+      })
+
+      afterEach(() => {
+        cleanupMessageStorage(COMPLETION_GATE_SESSION)
+      })
+
+      test("should include completion gate before Subagent Response in transformed boulder output", async () => {
+        // given - Atlas caller with boulder state
+        const planPath = join(TEST_DIR, "test-plan.md")
+        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
+
+        const state: BoulderState = {
+          active_plan: planPath,
+          started_at: "2026-01-02T10:00:00Z",
+          session_ids: ["session-1"],
+          plan_name: "test-plan",
+        }
+        writeBoulderState(TEST_DIR, state)
+
+        const hook = createAtlasHook(createMockPluginInput())
+        const output = {
+          title: "Sisyphus Task",
+          output: "Task completed successfully",
+          metadata: {},
+        }
+
+        // when
+        await hook["tool.execute.after"](
+          { tool: "task", sessionID: COMPLETION_GATE_SESSION },
+          output
+        )
+
+        // then - completion gate should appear BEFORE Subagent Response
+        const subagentResponseIndex = output.output.indexOf("**Subagent Response:**")
+        const completionGateIndex = output.output.indexOf("COMPLETION GATE")
+
+        expect(completionGateIndex).toBeGreaterThanOrEqual(0)
+        expect(subagentResponseIndex).toBeGreaterThanOrEqual(0)
+        expect(completionGateIndex).toBeLessThan(subagentResponseIndex)
+      })
+
+      test("should include completion gate before verification phase text", async () => {
+        // given - Atlas caller with boulder state
+        const planPath = join(TEST_DIR, "test-plan.md")
+        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
+
+        const state: BoulderState = {
+          active_plan: planPath,
+          started_at: "2026-01-02T10:00:00Z",
+          session_ids: ["session-1"],
+          plan_name: "test-plan",
+        }
+        writeBoulderState(TEST_DIR, state)
+
+        const hook = createAtlasHook(createMockPluginInput())
+        const output = {
+          title: "Sisyphus Task",
+          output: "Task completed successfully",
+          metadata: {},
+        }
+
+        // when
+        await hook["tool.execute.after"](
+          { tool: "task", sessionID: COMPLETION_GATE_SESSION },
+          output
+        )
+
+        // then - completion gate should appear BEFORE verification phase text
+        const completionGateIndex = output.output.indexOf("COMPLETION GATE")
+        const lyingIndex = output.output.indexOf("LYING")
+        const phase1Index = output.output.indexOf("PHASE 1")
+
+        expect(completionGateIndex).toBeGreaterThanOrEqual(0)
+        expect(lyingIndex).toBeGreaterThanOrEqual(0)
+        expect(completionGateIndex).toBeLessThan(lyingIndex)
+        if (phase1Index !== -1) {
+          expect(completionGateIndex).toBeLessThan(phase1Index)
+        }
+      })
+
+      test("should not contain old STEP 7 MARK COMPLETION IN PLAN FILE text", async () => {
+        // given - Atlas caller with boulder state
+        const planPath = join(TEST_DIR, "test-plan.md")
+        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
+
+        const state: BoulderState = {
+          active_plan: planPath,
+          started_at: "2026-01-02T10:00:00Z",
+          session_ids: ["session-1"],
+          plan_name: "test-plan",
+        }
+        writeBoulderState(TEST_DIR, state)
+
+        const hook = createAtlasHook(createMockPluginInput())
+        const output = {
+          title: "Sisyphus Task",
+          output: "Task completed successfully",
+          metadata: {},
+        }
+
+        // when
+        await hook["tool.execute.after"](
+          { tool: "task", sessionID: COMPLETION_GATE_SESSION },
+          output
+        )
+
+        // then - old STEP 7 MARK COMPLETION IN PLAN FILE should be absent
+        expect(output.output).not.toContain("STEP 7: MARK COMPLETION IN PLAN FILE")
+        expect(output.output).not.toContain("MARK COMPLETION IN PLAN FILE")
+      })
+    })
+
    describe("Write/Edit tool direct work reminder", () => {
      const ORCHESTRATOR_SESSION = "orchestrator-write-test"

--- a/src/hooks/atlas/system-reminder-templates.test.ts
+++ b/src/hooks/atlas/system-reminder-templates.test.ts
@@ -0,0 +1,37 @@
+import { describe, it, expect } from "bun:test"
+import { BOULDER_CONTINUATION_PROMPT } from "./system-reminder-templates"
+
+describe("BOULDER_CONTINUATION_PROMPT", () => {
+  describe("checkbox-first priority rules", () => {
+    it("first rule after RULES: mentions both reading the plan AND marking a still-unchecked completed task", () => {
+      const rulesSection = BOULDER_CONTINUATION_PROMPT.split("RULES:")[1]!
+      const firstRule = rulesSection.split("\n")[1]!.trim()
+
+      expect(firstRule).toContain("Read the plan")
+      expect(firstRule).toContain("mark")
+      expect(firstRule).toContain("completed")
+    })
+
+    it("first rule includes IMMEDIATELY keyword", () => {
+      const rulesSection = BOULDER_CONTINUATION_PROMPT.split("RULES:")[1]!
+      const firstRule = rulesSection.split("\n")[1]!.trim()
+
+      expect(firstRule).toContain("IMMEDIATELY")
+    })
+
+    it("checkbox-marking guidance appears BEFORE Proceed without asking for permission", () => {
+      const rulesSection = BOULDER_CONTINUATION_PROMPT.split("RULES:")[1]!
+
+      const checkboxMarkingMatch = rulesSection.match(/- \[x\]/i)
+      const proceedMatch = rulesSection.match(/Proceed without asking for permission/)
+
+      expect(checkboxMarkingMatch).not.toBeNull()
+      expect(proceedMatch).not.toBeNull()
+
+      const checkboxPosition = checkboxMarkingMatch!.index
+      const proceedPosition = proceedMatch!.index
+
+      expect(checkboxPosition).toBeLessThan(proceedPosition)
+    })
+  })
+})
--- a/src/hooks/atlas/system-reminder-templates.ts
+++ b/src/hooks/atlas/system-reminder-templates.ts
@@ -33,9 +33,8 @@ export const BOULDER_CONTINUATION_PROMPT = `${createSystemDirective(SystemDirect
 You have an active work plan with incomplete tasks. Continue working.

 RULES:
- **FIRST**: Read the plan file NOW to check exact current progress — count remaining \`- [ ]\` tasks
+- **FIRST**: Read the plan file NOW. If the last completed task is still unchecked, mark it \`- [x]\` IMMEDIATELY before anything else
 - Proceed without asking for permission
- Change \`- [ ]\` to \`- [x]\` in the plan file when done
 - Use the notepad at .sisyphus/notepads/{PLAN_NAME}/ to record learnings
 - Do not stop until all tasks are complete
 - If blocked, document the blocker and move to the next task`
--- a/src/hooks/atlas/tool-execute-after.ts
+++ b/src/hooks/atlas/tool-execute-after.ts
@@ -7,7 +7,7 @@ import { HOOK_NAME } from "./hook-name"
 import { DIRECT_WORK_REMINDER } from "./system-reminder-templates"
 import { isSisyphusPath } from "./sisyphus-path"
 import { extractSessionIdFromOutput } from "./subagent-session-id"
-import { buildOrchestratorReminder, buildStandaloneVerificationReminder } from "./verification-reminders"
+import { buildCompletionGate, buildOrchestratorReminder, buildStandaloneVerificationReminder } from "./verification-reminders"
 import { isWriteOrEditToolName } from "./write-edit-tool-policy"
 import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types"

@@ -76,7 +76,11 @@ export function createToolExecuteAfterHandler(input: {
        // Preserve original subagent response - critical for debugging failed tasks
        const originalResponse = toolOutput.output

-toolOutput.output = `
+        toolOutput.output = `
+<system-reminder>
+${buildCompletionGate(boulderState.plan_name, subagentSessionId)}
+</system-reminder>
+
 ## SUBAGENT WORK COMPLETED

 ${fileChanges}
@@ -88,7 +92,7 @@ ${fileChanges}
 ${originalResponse}

 <system-reminder>
-${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit)}
+${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)}
 </system-reminder>`
        log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
          plan: boulderState.plan_name,
--- a/src/hooks/atlas/verification-reminders.test.ts
+++ b/src/hooks/atlas/verification-reminders.test.ts
@@ -0,0 +1,94 @@
+import { describe, expect, it } from "bun:test"
+import { buildOrchestratorReminder, buildCompletionGate } from "./verification-reminders"
+
+// Test helpers for given/when/then pattern
+const given = describe
+const when = describe
+const then = it
+
+describe("buildCompletionGate", () => {
+  given("a plan name and session id", () => {
+    const planName = "test-plan"
+    const sessionId = "test-session-123"
+
+    when("buildCompletionGate is called", () => {
+      const gate = buildCompletionGate(planName, sessionId)
+
+      then("completion gate text is present", () => {
+        expect(gate).toContain("COMPLETION GATE")
+      })
+
+      then("gate appears before verification phase text", () => {
+        const gateIndex = gate.indexOf("COMPLETION GATE")
+        const verificationIndex = gate.indexOf("VERIFICATION_REMINDER")
+        expect(gateIndex).toBeLessThan(verificationIndex)
+      })
+
+      then("gate interpolates the plan name path", () => {
+        expect(gate).toContain(planName)
+        expect(gate).toContain(`.sisyphus/plans/${planName}.md`)
+      })
+
+      then("gate includes Edit instructions", () => {
+        expect(gate.toLowerCase()).toContain("edit")
+      })
+
+      then("gate includes Read instructions", () => {
+        expect(gate.toLowerCase()).toContain("read")
+      })
+
+      then("old STEP 7 MARK COMPLETION text is absent", () => {
+        expect(gate).not.toContain("STEP 7")
+        expect(gate).not.toContain("MARK COMPLETION IN PLAN FILE")
+      })
+
+      then("step numbering remains consecutive after removal", () => {
+        const stepMatches = gate.match(/STEP \d+:/g) ?? []
+        if (stepMatches.length > 1) {
+          const numbers = stepMatches.map((s: string) => parseInt(s.match(/\d+/)?.[0] ?? "0"))
+          for (let i = 1; i < numbers.length; i++) {
+            expect(numbers[i]).toBe(numbers[i - 1] + 1)
+          }
+        }
+      })
+    })
+  })
+})
+
+describe("buildOrchestratorReminder", () => {
+  given("progress with completed tasks", () => {
+    const planName = "my-test-plan"
+    const sessionId = "session-abc"
+    const progress = { total: 10, completed: 3 }
+
+    when("buildOrchestratorReminder is called with autoCommit true", () => {
+      const reminder = buildOrchestratorReminder(planName, progress, sessionId, true)
+
+      then("old STEP 7 MARK COMPLETION IN PLAN FILE text is absent", () => {
+        expect(reminder).not.toContain("STEP 7: MARK COMPLETION IN PLAN FILE")
+      })
+
+      then("completion gate appears before verification reminder", () => {
+        const gateIndex = reminder.indexOf("COMPLETION GATE")
+        const verificationIndex = reminder.indexOf("VERIFICATION_REMINDER")
+        expect(gateIndex).toBeGreaterThanOrEqual(0)
+        expect(gateIndex).toBeLessThan(verificationIndex)
+      })
+    })
+
+    when("buildOrchestratorReminder is called with autoCommit false", () => {
+      const reminder = buildOrchestratorReminder(planName, progress, sessionId, false)
+
+      then("old STEP 7 MARK COMPLETION IN PLAN FILE text is absent", () => {
+        expect(reminder).not.toContain("STEP 7: MARK COMPLETION IN PLAN FILE")
+      })
+
+      then("completion gate appears before verification reminder", () => {
+        const gateIndex = reminder.indexOf("COMPLETION GATE")
+        const verificationIndex = reminder.indexOf("VERIFICATION_REMINDER")
+        expect(gateIndex).toBeGreaterThanOrEqual(0)
+        expect(gateIndex).toBeLessThan(verificationIndex)
+      })
+    })
+  })
+})
--- a/src/hooks/atlas/verification-reminders.ts
+++ b/src/hooks/atlas/verification-reminders.ts
@@ -1,7 +1,37 @@
 import { VERIFICATION_REMINDER } from "./system-reminder-templates"

+export function buildCompletionGate(planName: string, sessionId: string): string {
+  return `
+**COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE**
+
+Your completion will NOT be recorded until you complete ALL of the following:
+
+1. **Edit** the plan file \`.sisyphus/plans/${planName}.md\`:
+   - Change \`- [ ]\` to \`- [x]\` for the completed task
+   - Use \`Edit\` tool to modify the checkbox
+
+2. **Read** the plan file AGAIN:
+   \`\`\`
+   Read(".sisyphus/plans/${planName}.md")
+   \`\`\`
+   - Verify the checkbox count changed (more \`- [x]\` than before)
+
+3. **DO NOT call \`task()\` again** until you have completed steps 1 and 2 above.
+
+If anything fails while closing this out, resume the same session immediately:
+\`\`\`typescript
+task(session_id="${sessionId}", prompt="fix: checkbox not recorded correctly")
+\`\`\`
+
+**Your completion is NOT tracked until the checkbox is marked in the plan file.**
+
+**VERIFICATION_REMINDER**`
+}
+
 function buildVerificationReminder(sessionId: string): string {
-  return `${VERIFICATION_REMINDER}
+  return `**VERIFICATION_REMINDER**
+
+${VERIFICATION_REMINDER}

 ---

@@ -15,20 +45,21 @@ export function buildOrchestratorReminder(
  planName: string,
  progress: { total: number; completed: number },
  sessionId: string,
-  autoCommit: boolean = true
+  autoCommit: boolean = true,
+  includeCompletionGate: boolean = true
 ): string {
  const remaining = progress.total - progress.completed
-  
+
  const commitStep = autoCommit
    ? `
-**STEP 8: COMMIT ATOMIC UNIT**
+**STEP 7: COMMIT ATOMIC UNIT**

 - Stage ONLY the verified changes
 - Commit with clear message describing what was done
 `
    : ""

-  const nextStepNumber = autoCommit ? 9 : 8
+  const nextStepNumber = autoCommit ? 8 : 7

  return `
 ---
@@ -37,7 +68,9 @@ export function buildOrchestratorReminder(

 ---

-${buildVerificationReminder(sessionId)}
+${includeCompletionGate ? `${buildCompletionGate(planName, sessionId)}
+
+` : ""}${buildVerificationReminder(sessionId)}

 **STEP 5: READ SUBAGENT NOTEPAD (LEARNINGS, ISSUES, PROBLEMS)**

@@ -64,22 +97,13 @@ Read(".sisyphus/plans/${planName}.md")
 Count exactly: how many \`- [ ]\` remain? How many \`- [x]\` completed?
 This is YOUR ground truth. Use it to decide what comes next.

-**STEP 7: MARK COMPLETION IN PLAN FILE (IMMEDIATELY)**
-
-RIGHT NOW - Do not delay. Verification passed → Mark IMMEDIATELY.
-
-Update the plan file \`.sisyphus/plans/${planName}.md\`:
- Change \`- [ ]\` to \`- [x]\` for the completed task
- Use \`Edit\` tool to modify the checkbox
-
-**DO THIS BEFORE ANYTHING ELSE. Unmarked = Untracked = Lost progress.**
 ${commitStep}
 **STEP ${nextStepNumber}: PROCEED TO NEXT TASK**

 - Read the plan file AGAIN to identify the next \`- [ ]\` task
 - Start immediately - DO NOT STOP

-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

 **${remaining} tasks remain. Keep bouldering.**`
 }