From 9bed597e464e2ff921b1aad1364d78365933b0f1 Mon Sep 17 00:00:00 2001 From: justsisyphus Date: Thu, 15 Jan 2026 19:40:50 +0900 Subject: [PATCH] feat(prompts): strengthen post-task reminders with actionable guidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rewrite VERIFICATION_REMINDER with 3-step action flow (verify → determine QA → add to todo) - Add explicit BLOCKING directive to prevent premature task progression - Enhance buildOrchestratorReminder with clear post-verification actions - Improve capture-pane block message with concrete Bash examples --- src/hooks/sisyphus-orchestrator/index.ts | 77 ++++++++++++++++-------- src/tools/interactive-bash/tools.ts | 24 +++++++- 2 files changed, 76 insertions(+), 25 deletions(-) diff --git a/src/hooks/sisyphus-orchestrator/index.ts b/src/hooks/sisyphus-orchestrator/index.ts index 7e3be54aa..e79bf271b 100644 --- a/src/hooks/sisyphus-orchestrator/index.ts +++ b/src/hooks/sisyphus-orchestrator/index.ts @@ -63,34 +63,45 @@ RULES: - Do not stop until all tasks are complete - If blocked, document the blocker and move to the next task` -const VERIFICATION_REMINDER = `**MANDATORY VERIFICATION - SUBAGENTS LIE** +const VERIFICATION_REMINDER = `**MANDATORY: WHAT YOU MUST DO RIGHT NOW** -Subagents FREQUENTLY claim completion when: -- Tests are actually FAILING -- Code has type/lint ERRORS -- Implementation is INCOMPLETE -- Patterns were NOT followed +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -**YOU MUST VERIFY EVERYTHING YOURSELF:** +⚠️ CRITICAL: Subagents FREQUENTLY LIE about completion. +Tests FAILING, code has ERRORS, implementation INCOMPLETE - but they say "done". -1. Run \`lsp_diagnostics\` on changed files - Must be CLEAN -2. Run tests yourself - Must PASS (not "agent said it passed") -3. Read the actual code - Must match requirements -4. Check build/typecheck - Must succeed +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -DO NOT TRUST THE AGENT'S SELF-REPORT. -VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS. +**STEP 1: VERIFY WITH YOUR OWN TOOL CALLS (DO THIS NOW)** -**HANDS-ON QA REQUIRED (after ALL tasks complete):** +Run these commands YOURSELF - do NOT trust agent's claims: +1. \`lsp_diagnostics\` on changed files → Must be CLEAN +2. \`bash\` to run tests → Must PASS +3. \`bash\` to run build/typecheck → Must succeed +4. \`Read\` the actual code → Must match requirements -| Deliverable Type | Verification Tool | Action | -|------------------|-------------------|--------| -| **Frontend/UI** | \`/playwright\` skill | Navigate, interact, screenshot evidence | -| **TUI/CLI** | \`interactive_bash\` (tmux) | Run interactively, verify output | -| **API/Backend** | \`bash\` with curl | Send requests, verify responses | +**STEP 2: DETERMINE IF HANDS-ON QA IS NEEDED** -Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems. -**FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**` +| Deliverable Type | QA Method | Tool | +|------------------|-----------|------| +| **Frontend/UI** | Browser interaction | \`/playwright\` skill | +| **TUI/CLI** | Run interactively | \`interactive_bash\` (tmux) | +| **API/Backend** | Send real requests | \`bash\` with curl | + +Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages. + +**STEP 3: IF QA IS NEEDED - ADD TO TODO IMMEDIATELY** + +\`\`\` +todowrite([ + { id: "qa-X", content: "HANDS-ON QA: [specific verification action]", status: "pending", priority: "high" } +]) +\`\`\` + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +**BLOCKING: DO NOT proceed to next task until Steps 1-3 are complete.** +**FAILURE TO DO QA = INCOMPLETE WORK = USER WILL REJECT.**` const ORCHESTRATOR_DELEGATION_REQUIRED = ` @@ -183,20 +194,38 @@ function buildOrchestratorReminder(planName: string, progress: { total: number; return ` --- -**State:** Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left +**BOULDER STATE:** Plan: \`${planName}\` | ✅ ${progress.completed}/${progress.total} done | ⏳ ${remaining} remaining --- ${buildVerificationReminder(sessionId)} -ALL pass? → commit atomic unit, mark \`[x]\`, next task.` +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +**AFTER VERIFICATION PASSES - YOUR NEXT ACTIONS (IN ORDER):** + +1. **COMMIT** atomic unit (only verified changes) +2. **MARK** \`[x]\` in plan file for completed task +3. **PROCEED** to next task immediately + +**DO NOT STOP. ${remaining} tasks remain. Keep bouldering.**` } function buildStandaloneVerificationReminder(sessionId: string): string { return ` --- -${buildVerificationReminder(sessionId)}` +${buildVerificationReminder(sessionId)} + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +**AFTER VERIFICATION - CHECK YOUR TODO LIST:** + +1. Run \`todoread\` to see remaining tasks +2. If QA tasks exist → execute them BEFORE marking complete +3. Mark completed tasks → proceed to next pending task + +**NO TODO = NO TRACKING = INCOMPLETE WORK. Use todowrite aggressively.**` } function extractSessionIdFromOutput(output: string): string { diff --git a/src/tools/interactive-bash/tools.ts b/src/tools/interactive-bash/tools.ts index 1628d6d12..5a1e2d531 100644 --- a/src/tools/interactive-bash/tools.ts +++ b/src/tools/interactive-bash/tools.ts @@ -64,7 +64,29 @@ export const interactive_bash: ToolDefinition = tool({ const subcommand = parts[0].toLowerCase() if (BLOCKED_TMUX_SUBCOMMANDS.includes(subcommand)) { - return `Error: '${parts[0]}' is blocked. Use bash tool instead for capturing/printing terminal output.` + const sessionIdx = parts.findIndex(p => p === "-t" || p.startsWith("-t")) + let sessionName = "omo-session" + if (sessionIdx !== -1) { + if (parts[sessionIdx] === "-t" && parts[sessionIdx + 1]) { + sessionName = parts[sessionIdx + 1] + } else if (parts[sessionIdx].startsWith("-t")) { + sessionName = parts[sessionIdx].slice(2) + } + } + + return `Error: '${parts[0]}' is blocked in interactive_bash. + +**USE BASH TOOL INSTEAD:** + +\`\`\`bash +# Capture terminal output +tmux capture-pane -p -t ${sessionName} + +# Or capture with history (last 1000 lines) +tmux capture-pane -p -t ${sessionName} -S -1000 +\`\`\` + +The Bash tool can execute these commands directly. Do NOT retry with interactive_bash.` } const proc = Bun.spawn([tmuxPath, ...parts], {