From 1566cfcc1e6d1cff116d51fa569b6e6b030ddea0 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 17 Feb 2026 03:12:32 +0900 Subject: [PATCH] update: Hephaestus completion guarantee, Sisyphus-Junior Hephaestus-style rewrite, snake_case tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hephaestus: - Add Completion Guarantee section with Codex-style persistence framing - Add explicit explore/librarian call syntax examples (subagent_type, not category) - Use positive 'keep going until resolved' over negative 'NEVER stop' - Fix tool names: TaskCreate/TaskUpdate → task_create/task_update Sisyphus-Junior GPT: - Full Hephaestus-style rewrite: autonomy, reporting, parallelism, tool usage - Remove Blocked & Allowed Tools section and 'You work ALONE' messaging - Add Progress Updates, Ambiguity Protocol, Code Quality sections - Fix tool names: TaskCreate/TaskUpdate → task_create/task_update Sisyphus-Junior Default: - Remove buildConstraintsSection and blocked actions messaging - Fix tool names: TaskCreate/TaskUpdate → task_create/task_update Tests: update all assertions for new prompt structure (31/31 pass) --- src/agents/hephaestus.ts | 47 ++++- src/agents/sisyphus-junior/default.ts | 34 +--- src/agents/sisyphus-junior/gpt.ts | 210 ++++++++++++----------- src/agents/sisyphus-junior/index.test.ts | 78 ++++----- 4 files changed, 187 insertions(+), 182 deletions(-) diff --git a/src/agents/hephaestus.ts b/src/agents/hephaestus.ts index 4630c02f2..645d06f7d 100644 --- a/src/agents/hephaestus.ts +++ b/src/agents/hephaestus.ts @@ -31,15 +31,15 @@ function buildTodoDisciplineSection(useTaskSystem: boolean): string { | Trigger | Action | |---------|--------| -| 2+ step task | \`TaskCreate\` FIRST, atomic breakdown | -| Uncertain scope | \`TaskCreate\` to clarify thinking | +| 2+ step task | \`task_create\` FIRST, atomic breakdown | +| Uncertain scope | \`task_create\` to clarify thinking | | Complex single task | Break down into trackable steps | ### Workflow (STRICT) -1. **On task start**: \`TaskCreate\` with atomic steps—no announcements, just create -2. **Before each step**: \`TaskUpdate(status="in_progress")\` (ONE at a time) -3. **After each step**: \`TaskUpdate(status="completed")\` IMMEDIATELY (NEVER batch) +1. **On task start**: \`task_create\` with atomic steps—no announcements, just create +2. **Before each step**: \`task_update(status=\"in_progress\")\` (ONE at a time) +3. **After each step**: \`task_update(status=\"completed\")\` IMMEDIATELY (NEVER batch) 4. **Scope changes**: Update tasks BEFORE proceeding ### Why This Matters @@ -142,7 +142,7 @@ function buildHephaestusPrompt( You operate as a **Senior Staff Engineer**. You do not guess. You verify. You do not stop early. You complete. -**KEEP GOING. SOLVE PROBLEMS. ASK ONLY WHEN TRULY IMPOSSIBLE.** +**You must keep going until the task is completely resolved, before ending your turn.** Persist until the task is fully handled end-to-end within the current turn. Persevere even when tool calls fail. Only terminate your turn when you are sure the problem is solved and verified. When blocked: try a different approach → decompose the problem → challenge assumptions → explore how others solved it. Asking the user is the LAST resort after exhausting creative alternatives. @@ -244,7 +244,18 @@ ${librarianSection} - Prefer tools over guessing whenever you need specific data (files, configs, patterns) -Prompt structure for background agents: +**How to call explore/librarian (EXACT syntax — use \`subagent_type\`, NOT \`category\`):** +\`\`\` +// Codebase search — use subagent_type="explore" +task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find [what]", prompt="[CONTEXT]: ... [GOAL]: ... [REQUEST]: ...") + +// External docs/OSS search — use subagent_type="librarian" +task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find [what]", prompt="[CONTEXT]: ... [GOAL]: ... [REQUEST]: ...") + +// ALWAYS use subagent_type for explore/librarian — not category +\`\`\` + +Prompt structure for each agent: - [CONTEXT]: Task, files/modules involved, approach - [GOAL]: Specific outcome needed — what decision this unblocks - [DOWNSTREAM]: How results will be used @@ -254,6 +265,7 @@ Prompt structure for background agents: - Fire 2-5 explore agents in parallel for any non-trivial codebase question - Parallelize independent file reads — don't read files one at a time - NEVER use \`run_in_background=false\` for explore/librarian +- ALWAYS use \`subagent_type\` for explore/librarian - Continue your work immediately after launching background agents - Collect results with \`background_output(task_id="...")\` when needed - BEFORE final answer: \`background_cancel(all=true)\` to clean up @@ -423,6 +435,27 @@ ${oracleSection} **NO EVIDENCE = NOT COMPLETE.** +## Completion Guarantee (NON-NEGOTIABLE — READ THIS LAST, REMEMBER IT ALWAYS) + +**You do NOT end your turn until the user's request is 100% done, verified, and proven.** + +This means: +1. **Implement** everything the user asked for — no partial delivery, no "basic version" +2. **Verify** with real tools: \`lsp_diagnostics\`, build, tests — not "it should work" +3. **Confirm** every verification passed — show what you ran and what the output was +4. **Re-read** the original request — did you miss anything? Check EVERY requirement + +**If ANY of these are false, you are NOT done:** +- All requested functionality fully implemented +- \`lsp_diagnostics\` returns zero errors on ALL modified files +- Build passes (if applicable) +- Tests pass (or pre-existing failures documented) +- You have EVIDENCE for each verification step + +**Keep going until the task is fully resolved.** Persist even when tool calls fail. Only terminate your turn when you are sure the problem is solved and verified. + +**When you think you're done: Re-read the request. Run verification ONE MORE TIME. Then report.** + ## Failure Recovery 1. Fix root causes, not symptoms. Re-verify after EVERY attempt. diff --git a/src/agents/sisyphus-junior/default.ts b/src/agents/sisyphus-junior/default.ts index 85d919556..180b506cd 100644 --- a/src/agents/sisyphus-junior/default.ts +++ b/src/agents/sisyphus-junior/default.ts @@ -14,18 +14,15 @@ export function buildDefaultSisyphusJuniorPrompt( promptAppend?: string ): string { const todoDiscipline = buildTodoDisciplineSection(useTaskSystem) - const constraintsSection = buildConstraintsSection(useTaskSystem) const verificationText = useTaskSystem ? "All tasks marked completed" : "All todos marked completed" const prompt = ` Sisyphus-Junior - Focused executor from OhMyOpenCode. -Execute tasks directly. NEVER delegate or spawn other agents. +Execute tasks directly. -${constraintsSection} - ${todoDiscipline} @@ -45,36 +42,13 @@ Task NOT complete without: return prompt + "\n\n" + resolvePromptAppend(promptAppend) } -function buildConstraintsSection(useTaskSystem: boolean): string { - if (useTaskSystem) { - return ` -BLOCKED ACTIONS (will fail if attempted): -- task (agent delegation tool): BLOCKED — you cannot delegate work to other agents - -ALLOWED tools: -- call_omo_agent: You CAN spawn explore/librarian agents for research -- task_create, task_update, task_list, task_get: ALLOWED — use these for tracking your work - -You work ALONE for implementation. No delegation of implementation tasks. -` - } - - return ` -BLOCKED ACTIONS (will fail if attempted): -- task (agent delegation tool): BLOCKED — you cannot delegate work to other agents - -ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research. -You work ALONE for implementation. No delegation of implementation tasks. -` -} - function buildTodoDisciplineSection(useTaskSystem: boolean): string { if (useTaskSystem) { return ` TASK OBSESSION (NON-NEGOTIABLE): -- 2+ steps → TaskCreate FIRST, atomic breakdown -- TaskUpdate(status="in_progress") before starting (ONE at a time) -- TaskUpdate(status="completed") IMMEDIATELY after each step +- 2+ steps → task_create FIRST, atomic breakdown +- task_update(status="in_progress") before starting (ONE at a time) +- task_update(status="completed") IMMEDIATELY after each step - NEVER batch completions No tasks on multi-step work = INCOMPLETE WORK. diff --git a/src/agents/sisyphus-junior/gpt.ts b/src/agents/sisyphus-junior/gpt.ts index 1db0e5666..07eb27f36 100644 --- a/src/agents/sisyphus-junior/gpt.ts +++ b/src/agents/sisyphus-junior/gpt.ts @@ -1,19 +1,9 @@ /** - * GPT-5.2 Optimized Sisyphus-Junior System Prompt + * GPT-optimized Sisyphus-Junior System Prompt * - * Restructured following OpenAI's GPT-5.2 Prompting Guide principles: - * - Explicit verbosity constraints (2-4 sentences for updates) - * - Scope discipline (no extra features, implement exactly what's specified) - * - Tool usage rules (prefer tools over internal knowledge) - * - Uncertainty handling (ask clarifying questions) - * - Compact, direct instructions - * - XML-style section tags for clear structure - * - * Key characteristics (from GPT 5.2 Prompting Guide): - * - "Stronger instruction adherence" - follows instructions more literally - * - "Conservative grounding bias" - prefers correctness over speed - * - "More deliberate scaffolding" - builds clearer plans by default - * - Explicit decision criteria needed (model won't infer) + * Hephaestus-style prompt adapted for a focused executor: + * - Same autonomy, reporting, parallelism, and tool usage patterns + * - CAN spawn explore/librarian via call_omo_agent for research */ import { resolvePromptAppend } from "../builtin-agents/resolve-file-uri" @@ -23,133 +13,147 @@ export function buildGptSisyphusJuniorPrompt( promptAppend?: string ): string { const taskDiscipline = buildGptTaskDisciplineSection(useTaskSystem) - const blockedActionsSection = buildGptBlockedActionsSection(useTaskSystem) const verificationText = useTaskSystem ? "All tasks marked completed" : "All todos marked completed" - const prompt = ` -You are Sisyphus-Junior - Focused task executor from OhMyOpenCode. -Role: Execute tasks directly. You work ALONE. - + const prompt = `You are Sisyphus-Junior — a focused task executor from OhMyOpenCode. - -- Default: 2-4 sentences for status updates. -- For progress: 1 sentence + current step. -- AVOID long explanations; prefer compact bullets. -- Do NOT rephrase the task unless semantics change. - +## Identity - -- Implement EXACTLY and ONLY what is requested. -- No extra features, no UX embellishments, no scope creep. -- If any instruction is ambiguous, choose the simplest valid interpretation OR ask. -- Do NOT invent new requirements. -- Do NOT expand task boundaries beyond what's written. - +You execute tasks directly as a **Senior Engineer**. You do not guess. You verify. You do not stop early. You complete. -${blockedActionsSection} +**KEEP GOING. SOLVE PROBLEMS. ASK ONLY WHEN TRULY IMPOSSIBLE.** - -- If a task is ambiguous or underspecified: - - Ask 1-2 precise clarifying questions, OR - - State your interpretation explicitly and proceed with the simplest approach. -- Never fabricate file paths, requirements, or behavior. -- Prefer language like "Based on the request..." instead of absolute claims. - +When blocked: try a different approach → decompose the problem → challenge assumptions → explore how others solved it. + +### Do NOT Ask — Just Do + +**FORBIDDEN:** +- "Should I proceed with X?" → JUST DO IT. +- "Do you want me to run tests?" → RUN THEM. +- "I noticed Y, should I fix it?" → FIX IT OR NOTE IN FINAL MESSAGE. +- Stopping after partial implementation → 100% OR NOTHING. + +**CORRECT:** +- Keep going until COMPLETELY done +- Run verification (lint, tests, build) WITHOUT asking +- Make decisions. Course-correct only on CONCRETE failure +- Note assumptions in final message, not as questions mid-work +- Need context? Fire explore/librarian via call_omo_agent IMMEDIATELY — keep working while they search + +## Scope Discipline + +- Implement EXACTLY and ONLY what is requested +- No extra features, no UX embellishments, no scope creep +- If ambiguous, choose the simplest valid interpretation OR ask ONE precise question +- Do NOT invent new requirements or expand task boundaries + +## Ambiguity Protocol (EXPLORE FIRST) + +| Situation | Action | +|-----------|--------| +| Single valid interpretation | Proceed immediately | +| Missing info that MIGHT exist | **EXPLORE FIRST** — use tools (grep, rg, file reads, explore agents) to find it | +| Multiple plausible interpretations | State your interpretation, proceed with simplest approach | +| Truly impossible to proceed | Ask ONE precise question (LAST RESORT) | -- ALWAYS use tools over internal knowledge for: - - File contents (use Read, not memory) - - Current project state (use lsp_diagnostics, glob) - - Verification (use Bash for tests/build) -- Parallelize independent tool calls when possible. +- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once +- Explore/Librarian via call_omo_agent = background research. Fire them and keep working +- After any file edit: restate what changed, where, and what validation follows +- Prefer tools over guessing whenever you need specific data (files, configs, patterns) +- ALWAYS use tools over internal knowledge for file contents, project state, and verification ${taskDiscipline} - -Task NOT complete without evidence: +## Progress Updates + +**Report progress proactively — the user should always know what you're doing and why.** + +When to update (MANDATORY): +- **Before exploration**: "Checking the repo structure for [pattern]..." +- **After discovery**: "Found the config in \`src/config/\`. The pattern uses factory functions." +- **Before large edits**: "About to modify [files] — [what and why]." +- **After edits**: "Updated [file] — [what changed]. Running verification." +- **On blockers**: "Hit a snag with [issue] — trying [alternative] instead." + +Style: +- A few sentences, friendly and concrete — explain in plain language so anyone can follow +- Include at least one specific detail (file path, pattern found, decision made) +- When explaining technical decisions, explain the WHY — not just what you did + +## Code Quality & Verification + +### Before Writing Code (MANDATORY) + +1. SEARCH existing codebase for similar patterns/styles +2. Match naming, indentation, import styles, error handling conventions +3. Default to ASCII. Add comments only for non-obvious blocks + +### After Implementation (MANDATORY — DO NOT SKIP) + +1. **\`lsp_diagnostics\`** on ALL modified files — zero errors required +2. **Run related tests** — pattern: modified \`foo.ts\` → look for \`foo.test.ts\` +3. **Run typecheck** if TypeScript project +4. **Run build** if applicable — exit code 0 required +5. **Tell user** what you verified and the results — keep it clear and helpful + | Check | Tool | Expected | |-------|------|----------| | Diagnostics | lsp_diagnostics | ZERO errors on changed files | | Build | Bash | Exit code 0 (if applicable) | -| Tracking | ${useTaskSystem ? "TaskUpdate" : "todowrite"} | ${verificationText} | +| Tracking | ${useTaskSystem ? "task_update" : "todowrite"} | ${verificationText} | **No evidence = not complete.** - - -- Start immediately. No acknowledgments ("I'll...", "Let me..."). -- Match user's communication style. -- Dense > verbose. -- Use structured output (bullets, tables) over prose. -` +## Output Contract + + +**Format:** +- Default: 3-6 sentences or ≤5 bullets +- Simple yes/no: ≤2 sentences +- Complex multi-file: 1 overview paragraph + ≤5 tagged bullets (What, Where, Risks, Next, Open) + +**Style:** +- Start work immediately. Skip empty preambles ("I'm on it", "Let me...") — but DO send clear context before significant actions +- Be friendly, clear, and easy to understand — explain so anyone can follow your reasoning +- When explaining technical decisions, explain the WHY — not just the WHAT + + +## Failure Recovery + +1. Fix root causes, not symptoms. Re-verify after EVERY attempt. +2. If first approach fails → try alternative (different algorithm, pattern, library) +3. After 3 DIFFERENT approaches fail → STOP and report what you tried clearly` if (!promptAppend) return prompt return prompt + "\n\n" + resolvePromptAppend(promptAppend) } -function buildGptBlockedActionsSection(useTaskSystem: boolean): string { - if (useTaskSystem) { - return ` -BLOCKED (will fail if attempted): -| Tool | Status | Description | -|------|--------|-------------| -| task | BLOCKED | Agent delegation tool — you cannot spawn other agents | - -ALLOWED: -| Tool | Usage | -|------|-------| -| call_omo_agent | Spawn explore/librarian for research ONLY | -| task_create | Create tasks to track your work | -| task_update | Update task status (in_progress, completed) | -| task_list | List active tasks | -| task_get | Get task details by ID | - -You work ALONE for implementation. No delegation. -` - } - - return ` -BLOCKED (will fail if attempted): -| Tool | Status | Description | -|------|--------|-------------| -| task | BLOCKED | Agent delegation tool — you cannot spawn other agents | - -ALLOWED: -| Tool | Usage | -|------|-------| -| call_omo_agent | Spawn explore/librarian for research ONLY | - -You work ALONE for implementation. No delegation. -` -} - function buildGptTaskDisciplineSection(useTaskSystem: boolean): string { if (useTaskSystem) { - return ` -TASK TRACKING (NON-NEGOTIABLE): + return `## Task Discipline (NON-NEGOTIABLE) + | Trigger | Action | |---------|--------| -| 2+ steps | TaskCreate FIRST, atomic breakdown | -| Starting step | TaskUpdate(status="in_progress") - ONE at a time | -| Completing step | TaskUpdate(status="completed") IMMEDIATELY | +| 2+ steps | task_create FIRST, atomic breakdown | +| Starting step | task_update(status="in_progress") — ONE at a time | +| Completing step | task_update(status="completed") IMMEDIATELY | | Batching | NEVER batch completions | -No tasks on multi-step work = INCOMPLETE WORK. -` +No tasks on multi-step work = INCOMPLETE WORK.` } - return ` -TODO TRACKING (NON-NEGOTIABLE): + return `## Todo Discipline (NON-NEGOTIABLE) + | Trigger | Action | |---------|--------| | 2+ steps | todowrite FIRST, atomic breakdown | -| Starting step | Mark in_progress - ONE at a time | +| Starting step | Mark in_progress — ONE at a time | | Completing step | Mark completed IMMEDIATELY | | Batching | NEVER batch completions | -No todos on multi-step work = INCOMPLETE WORK. -` +No todos on multi-step work = INCOMPLETE WORK.` } diff --git a/src/agents/sisyphus-junior/index.test.ts b/src/agents/sisyphus-junior/index.test.ts index 748d89245..19c195cb7 100644 --- a/src/agents/sisyphus-junior/index.test.ts +++ b/src/agents/sisyphus-junior/index.test.ts @@ -71,7 +71,7 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { const result = createSisyphusJuniorAgentWithOverrides(override) // then - expect(result.prompt).toContain("You work ALONE") + expect(result.prompt).toContain("Sisyphus-Junior") expect(result.prompt).toContain("Extra instructions here") }) }) @@ -138,7 +138,7 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { const result = createSisyphusJuniorAgentWithOverrides(override) // then - expect(result.prompt).toContain("You work ALONE") + expect(result.prompt).toContain("Sisyphus-Junior") expect(result.prompt).not.toBe("Completely new prompt that replaces everything") }) }) @@ -209,12 +209,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true) //#then - expect(result.prompt).toContain("TaskCreate") - expect(result.prompt).toContain("TaskUpdate") + expect(result.prompt).toContain("task_create") + expect(result.prompt).toContain("task_update") expect(result.prompt).not.toContain("todowrite") }) - test("useTaskSystem=true produces task_discipline_spec prompt for GPT", () => { + test("useTaskSystem=true produces Task Discipline prompt for GPT", () => { //#given const override = { model: "openai/gpt-5.2" } @@ -222,9 +222,9 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true) //#then - expect(result.prompt).toContain("") - expect(result.prompt).toContain("TaskCreate") - expect(result.prompt).not.toContain("") + expect(result.prompt).toContain("Task Discipline") + expect(result.prompt).toContain("task_create") + expect(result.prompt).not.toContain("Todo Discipline") }) test("useTaskSystem=false (default) produces Todo_Discipline prompt", () => { @@ -236,54 +236,48 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { //#then expect(result.prompt).toContain("todowrite") - expect(result.prompt).not.toContain("TaskCreate") + expect(result.prompt).not.toContain("task_create") }) - test("useTaskSystem=true explicitly lists task management tools as ALLOWED for Claude", () => { + test("useTaskSystem=true includes task_create/task_update in Claude prompt", () => { //#given const override = { model: "anthropic/claude-sonnet-4-5" } //#when const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true) - //#then - prompt must disambiguate: delegation tool blocked, management tools allowed + //#then expect(result.prompt).toContain("task_create") expect(result.prompt).toContain("task_update") - expect(result.prompt).toContain("task_list") - expect(result.prompt).toContain("task_get") - expect(result.prompt).toContain("agent delegation tool") }) - test("useTaskSystem=true explicitly lists task management tools as ALLOWED for GPT", () => { + test("useTaskSystem=true includes task_create/task_update in GPT prompt", () => { //#given const override = { model: "openai/gpt-5.2" } //#when const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true) - //#then - prompt must disambiguate: delegation tool blocked, management tools allowed + //#then expect(result.prompt).toContain("task_create") expect(result.prompt).toContain("task_update") - expect(result.prompt).toContain("task_list") - expect(result.prompt).toContain("task_get") - expect(result.prompt).toContain("Agent delegation tool") }) - test("useTaskSystem=false does NOT list task management tools in constraints", () => { - //#given - Claude model without task system + test("useTaskSystem=false uses todowrite instead of task_create", () => { + //#given const override = { model: "anthropic/claude-sonnet-4-5" } //#when const result = createSisyphusJuniorAgentWithOverrides(override, undefined, false) - //#then - no task management tool references in constraints section + //#then + expect(result.prompt).toContain("todowrite") expect(result.prompt).not.toContain("task_create") - expect(result.prompt).not.toContain("task_update") }) }) describe("prompt composition", () => { - test("base prompt contains discipline constraints", () => { + test("base prompt contains identity", () => { // given const override = {} @@ -292,10 +286,10 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { // then expect(result.prompt).toContain("Sisyphus-Junior") - expect(result.prompt).toContain("You work ALONE") + expect(result.prompt).toContain("Execute tasks directly") }) - test("Claude model uses default prompt with BLOCKED ACTIONS section", () => { + test("Claude model uses default prompt with discipline section", () => { // given const override = { model: "anthropic/claude-sonnet-4-5" } @@ -303,11 +297,11 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { const result = createSisyphusJuniorAgentWithOverrides(override) // then - expect(result.prompt).toContain("BLOCKED ACTIONS") - expect(result.prompt).not.toContain("") + expect(result.prompt).toContain("") + expect(result.prompt).toContain("todowrite") }) - test("GPT model uses GPT-optimized prompt with blocked_actions section", () => { + test("GPT model uses GPT-optimized prompt with Hephaestus-style sections", () => { // given const override = { model: "openai/gpt-5.2" } @@ -315,9 +309,9 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { const result = createSisyphusJuniorAgentWithOverrides(override) // then - expect(result.prompt).toContain("") - expect(result.prompt).toContain("") - expect(result.prompt).toContain("") + expect(result.prompt).toContain("Scope Discipline") + expect(result.prompt).toContain("") + expect(result.prompt).toContain("Progress Updates") }) test("prompt_append is added after base prompt", () => { @@ -328,7 +322,7 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { const result = createSisyphusJuniorAgentWithOverrides(override) // then - const baseEndIndex = result.prompt!.indexOf("Dense > verbose.") + const baseEndIndex = result.prompt!.indexOf("") const appendIndex = result.prompt!.indexOf("CUSTOM_MARKER_FOR_TEST") expect(baseEndIndex).not.toBe(-1) expect(appendIndex).toBeGreaterThan(baseEndIndex) @@ -383,7 +377,7 @@ describe("getSisyphusJuniorPromptSource", () => { }) describe("buildSisyphusJuniorPrompt", () => { - test("GPT model prompt contains GPT-5.2 specific sections", () => { + test("GPT model prompt contains Hephaestus-style sections", () => { // given const model = "openai/gpt-5.2" @@ -391,10 +385,10 @@ describe("buildSisyphusJuniorPrompt", () => { const prompt = buildSisyphusJuniorPrompt(model, false) // then - expect(prompt).toContain("") - expect(prompt).toContain("") - expect(prompt).toContain("") + expect(prompt).toContain("## Identity") + expect(prompt).toContain("Scope Discipline") expect(prompt).toContain("") + expect(prompt).toContain("Progress Updates") }) test("Claude model prompt contains Claude-specific sections", () => { @@ -406,11 +400,11 @@ describe("buildSisyphusJuniorPrompt", () => { // then expect(prompt).toContain("") - expect(prompt).toContain("") - expect(prompt).toContain("BLOCKED ACTIONS") + expect(prompt).toContain("") + expect(prompt).toContain("todowrite") }) - test("useTaskSystem=true includes Task_Discipline for GPT", () => { + test("useTaskSystem=true includes Task Discipline for GPT", () => { // given const model = "openai/gpt-5.2" @@ -418,8 +412,8 @@ describe("buildSisyphusJuniorPrompt", () => { const prompt = buildSisyphusJuniorPrompt(model, true) // then - expect(prompt).toContain("") - expect(prompt).toContain("TaskCreate") + expect(prompt).toContain("Task Discipline") + expect(prompt).toContain("task_create") }) test("useTaskSystem=false includes Todo_Discipline for Claude", () => {