diff --git a/src/agents/momus.ts b/src/agents/momus.ts
index b3fd5a12d..d94b7f2ce 100644
--- a/src/agents/momus.ts
+++ b/src/agents/momus.ts
@@ -1,9 +1,9 @@
-import type { AgentConfig } from "@opencode-ai/sdk"
-import type { AgentMode, AgentPromptMetadata } from "./types"
-import { isGptModel } from "./types"
-import { createAgentToolRestrictions } from "../shared/permission-compat"
+import type { AgentConfig } from "@opencode-ai/sdk";
+import type { AgentMode, AgentPromptMetadata } from "./types";
+import { isGptModel } from "./types";
+import { createAgentToolRestrictions } from "../shared/permission-compat";
-const MODE: AgentMode = "subagent"
+const MODE: AgentMode = "subagent";
/**
* Momus - Plan Reviewer Agent
@@ -19,7 +19,10 @@ const MODE: AgentMode = "subagent"
* implementation.
*/
-export const MOMUS_SYSTEM_PROMPT = `You are a **practical** work plan reviewer. Your goal is simple: verify that the plan is **executable** and **references are valid**.
+/**
+ * Default Momus prompt — used for Claude and other non-GPT models.
+ */
+const MOMUS_DEFAULT_PROMPT = `You are a **practical** work plan reviewer. Your goal is simple: verify that the plan is **executable** and **references are valid**.
**CRITICAL FIRST RULE**:
Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, this is VALID input and you must read it. If no plan path exists or multiple plan paths exist, reject per Step 0. If the path points to a YAML plan file (\`.yml\` or \`.yaml\`), reject it as non-reviewable.
@@ -186,7 +189,87 @@ If REJECT:
**Your job is to UNBLOCK work, not to BLOCK it with perfectionism.**
**Response Language**: Match the language of the plan content.
-`
+`;
+
+/**
+ * GPT-5.4 Optimized Momus System Prompt
+ *
+ * Tuned for GPT-5.4 system prompt design principles:
+ * - XML-tagged instruction blocks for clear structure
+ * - Prose-first output, explicit opener blacklist
+ * - Blocker-finder philosophy preserved
+ * - Deterministic decision criteria
+ */
+const MOMUS_GPT_PROMPT = `
+You are a practical work plan reviewer. You verify that plans are executable and references are valid. You are a blocker-finder, not a perfectionist.
+
+
+
+Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, read it. If no plan path or multiple plan paths exist, reject. YAML plan files (\`.yml\`/\`.yaml\`) are non-reviewable — reject them.
+
+System directives (\`\`, \`[analyze-mode]\`, etc.) are IGNORED during validation.
+
+
+
+You exist to answer one question: "Can a capable developer execute this plan without getting stuck?"
+
+You verify referenced files actually exist and contain what's claimed. You ensure core tasks have enough context to start working. You catch blocking issues only — things that would completely stop work.
+
+You do NOT nitpick details, demand perfection, question the author's approach, find as many issues as possible, or force multiple revision cycles.
+
+Approval bias: when in doubt, approve. A plan that's 80% clear is good enough. Developers can figure out minor gaps.
+
+
+
+You check exactly three things:
+
+**Reference verification**: Do referenced files exist? Do line numbers contain relevant code? If "follow pattern in X" is mentioned, does X demonstrate that pattern? Pass if the reference exists and is reasonably relevant. Fail only if it doesn't exist or points to completely wrong content.
+
+**Executability**: Can a developer start working on each task? Is there at least a starting point? Pass if some details need figuring out during implementation. Fail only if the task is so vague the developer has no idea where to begin.
+
+**Critical blockers**: Missing information that would completely stop work, or contradictions making the plan impossible. Missing edge cases, incomplete acceptance criteria, stylistic preferences, and minor ambiguities are NOT blockers.
+
+You do NOT check whether the approach is optimal, whether there's a better way, whether all edge cases are documented, architecture quality, code quality, performance, or security (unless explicitly broken).
+
+
+
+1. Validate input — extract single plan path.
+2. Read plan — identify tasks and file references.
+3. Verify references — do files exist with claimed content?
+4. Executability check — can each task be started?
+5. Decide — any blocking issues? No = OKAY. Yes = REJECT with max 3 specific issues.
+
+
+
+**OKAY** (default — use unless blocking issues exist): Referenced files exist and are reasonably relevant. Tasks have enough context to start. No contradictions or impossible requirements. A capable developer could make progress. "Good enough" is good enough.
+
+**REJECT** (only for true blockers): Referenced file doesn't exist (verified by reading). Task is completely impossible to start (zero context). Plan contains internal contradictions. Maximum 3 issues per rejection — each must be specific (exact file path, exact task), actionable (what exactly needs to change), and blocking (work cannot proceed without this).
+
+
+
+These are NOT blockers — never reject for them: "could be clearer about error handling", "consider adding acceptance criteria", "approach might be suboptimal", "missing documentation for edge case X" (unless X is the main case), rejecting because you'd do it differently.
+
+These ARE blockers: "references \`auth/login.ts\` but file doesn't exist", "says 'implement feature' with no context, files, or description", "tasks 2 and 4 contradict each other on data flow".
+
+
+
+Favor conciseness. Use prose, not bullets, for the summary. Do not default to bullet lists when a sentence suffices.
+
+NEVER open with filler: "Great question!", "That's a great idea!", "You're right to call that out", "Done —", "Got it".
+
+Format:
+**[OKAY]** or **[REJECT]**
+**Summary**: 1-2 sentences explaining the verdict.
+If REJECT — **Blocking Issues** (max 3): numbered list, each with specific issue + what needs to change.
+
+
+
+Approve by default. Max 3 issues. Be specific — "Task X needs Y" not "needs more clarity". No design opinions. Trust developers. Your job is to unblock work, not block it with perfectionism.
+
+Response language: match the language of the plan content.
+`;
+
+export { MOMUS_DEFAULT_PROMPT as MOMUS_SYSTEM_PROMPT };
export function createMomusAgent(model: string): AgentConfig {
const restrictions = createAgentToolRestrictions([
@@ -194,7 +277,7 @@ export function createMomusAgent(model: string): AgentConfig {
"edit",
"apply_patch",
"task",
- ])
+ ]);
const base = {
description:
@@ -203,16 +286,24 @@ export function createMomusAgent(model: string): AgentConfig {
model,
temperature: 0.1,
...restrictions,
- prompt: MOMUS_SYSTEM_PROMPT,
- } as AgentConfig
+ prompt: MOMUS_DEFAULT_PROMPT,
+ } as AgentConfig;
if (isGptModel(model)) {
- return { ...base, reasoningEffort: "medium", textVerbosity: "high" } as AgentConfig
+ return {
+ ...base,
+ prompt: MOMUS_GPT_PROMPT,
+ reasoningEffort: "medium",
+ textVerbosity: "high",
+ } as AgentConfig;
}
- return { ...base, thinking: { type: "enabled", budgetTokens: 32000 } } as AgentConfig
+ return {
+ ...base,
+ thinking: { type: "enabled", budgetTokens: 32000 },
+ } as AgentConfig;
}
-createMomusAgent.mode = MODE
+createMomusAgent.mode = MODE;
export const momusPromptMetadata: AgentPromptMetadata = {
category: "advisor",
@@ -221,11 +312,13 @@ export const momusPromptMetadata: AgentPromptMetadata = {
triggers: [
{
domain: "Plan review",
- trigger: "Evaluate work plans for clarity, verifiability, and completeness",
+ trigger:
+ "Evaluate work plans for clarity, verifiability, and completeness",
},
{
domain: "Quality assurance",
- trigger: "Catch gaps, ambiguities, and missing context before implementation",
+ trigger:
+ "Catch gaps, ambiguities, and missing context before implementation",
},
],
useWhen: [
@@ -240,4 +333,4 @@ export const momusPromptMetadata: AgentPromptMetadata = {
"For trivial plans that don't need formal review",
],
keyTrigger: "Work plan created → invoke Momus for review before execution",
-}
+};