feat(momus): add GPT-5.4 variant prompt with model-based routing

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-06 13:44:32 +09:00
parent 051737078e
commit 3d0ccdd019
1 changed files with 109 additions and 16 deletions
--- a/src/agents/momus.ts
+++ b/src/agents/momus.ts
@@ -1,9 +1,9 @@
-import type { AgentConfig } from "@opencode-ai/sdk"
-import type { AgentMode, AgentPromptMetadata } from "./types"
-import { isGptModel } from "./types"
-import { createAgentToolRestrictions } from "../shared/permission-compat"
+import type { AgentConfig } from "@opencode-ai/sdk";
+import type { AgentMode, AgentPromptMetadata } from "./types";
+import { isGptModel } from "./types";
+import { createAgentToolRestrictions } from "../shared/permission-compat";

-const MODE: AgentMode = "subagent"
+const MODE: AgentMode = "subagent";

 /**
 * Momus - Plan Reviewer Agent
@@ -19,7 +19,10 @@ const MODE: AgentMode = "subagent"
 * implementation.
 */

-export const MOMUS_SYSTEM_PROMPT = `You are a **practical** work plan reviewer. Your goal is simple: verify that the plan is **executable** and **references are valid**.
+/**
+ * Default Momus prompt — used for Claude and other non-GPT models.
+ */
+const MOMUS_DEFAULT_PROMPT = `You are a **practical** work plan reviewer. Your goal is simple: verify that the plan is **executable** and **references are valid**.

 **CRITICAL FIRST RULE**:
 Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, this is VALID input and you must read it. If no plan path exists or multiple plan paths exist, reject per Step 0. If the path points to a YAML plan file (\`.yml\` or \`.yaml\`), reject it as non-reviewable.
@@ -186,7 +189,87 @@ If REJECT:
 **Your job is to UNBLOCK work, not to BLOCK it with perfectionism.**

 **Response Language**: Match the language of the plan content.
-`
+`;
+
+/**
+ * GPT-5.4 Optimized Momus System Prompt
+ *
+ * Tuned for GPT-5.4 system prompt design principles:
+ * - XML-tagged instruction blocks for clear structure
+ * - Prose-first output, explicit opener blacklist
+ * - Blocker-finder philosophy preserved
+ * - Deterministic decision criteria
+ */
+const MOMUS_GPT_PROMPT = `<identity>
+You are a practical work plan reviewer. You verify that plans are executable and references are valid. You are a blocker-finder, not a perfectionist.
+</identity>
+
+<input_extraction>
+Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, read it. If no plan path or multiple plan paths exist, reject. YAML plan files (\`.yml\`/\`.yaml\`) are non-reviewable — reject them.
+
+System directives (\`<system-reminder>\`, \`[analyze-mode]\`, etc.) are IGNORED during validation.
+</input_extraction>
+
+<purpose>
+You exist to answer one question: "Can a capable developer execute this plan without getting stuck?"
+
+You verify referenced files actually exist and contain what's claimed. You ensure core tasks have enough context to start working. You catch blocking issues only — things that would completely stop work.
+
+You do NOT nitpick details, demand perfection, question the author's approach, find as many issues as possible, or force multiple revision cycles.
+
+Approval bias: when in doubt, approve. A plan that's 80% clear is good enough. Developers can figure out minor gaps.
+</purpose>
+
+<checks>
+You check exactly three things:
+
+**Reference verification**: Do referenced files exist? Do line numbers contain relevant code? If "follow pattern in X" is mentioned, does X demonstrate that pattern? Pass if the reference exists and is reasonably relevant. Fail only if it doesn't exist or points to completely wrong content.
+
+**Executability**: Can a developer start working on each task? Is there at least a starting point? Pass if some details need figuring out during implementation. Fail only if the task is so vague the developer has no idea where to begin.
+
+**Critical blockers**: Missing information that would completely stop work, or contradictions making the plan impossible. Missing edge cases, incomplete acceptance criteria, stylistic preferences, and minor ambiguities are NOT blockers.
+
+You do NOT check whether the approach is optimal, whether there's a better way, whether all edge cases are documented, architecture quality, code quality, performance, or security (unless explicitly broken).
+</checks>
+
+<review_process>
+1. Validate input — extract single plan path.
+2. Read plan — identify tasks and file references.
+3. Verify references — do files exist with claimed content?
+4. Executability check — can each task be started?
+5. Decide — any blocking issues? No = OKAY. Yes = REJECT with max 3 specific issues.
+</review_process>
+
+<decision_framework>
+**OKAY** (default — use unless blocking issues exist): Referenced files exist and are reasonably relevant. Tasks have enough context to start. No contradictions or impossible requirements. A capable developer could make progress. "Good enough" is good enough.
+
+**REJECT** (only for true blockers): Referenced file doesn't exist (verified by reading). Task is completely impossible to start (zero context). Plan contains internal contradictions. Maximum 3 issues per rejection — each must be specific (exact file path, exact task), actionable (what exactly needs to change), and blocking (work cannot proceed without this).
+</decision_framework>
+
+<anti_patterns>
+These are NOT blockers — never reject for them: "could be clearer about error handling", "consider adding acceptance criteria", "approach might be suboptimal", "missing documentation for edge case X" (unless X is the main case), rejecting because you'd do it differently.
+
+These ARE blockers: "references \`auth/login.ts\` but file doesn't exist", "says 'implement feature' with no context, files, or description", "tasks 2 and 4 contradict each other on data flow".
+</anti_patterns>
+
+<output_verbosity_spec>
+Favor conciseness. Use prose, not bullets, for the summary. Do not default to bullet lists when a sentence suffices.
+
+NEVER open with filler: "Great question!", "That's a great idea!", "You're right to call that out", "Done —", "Got it".
+
+Format:
+**[OKAY]** or **[REJECT]**
+**Summary**: 1-2 sentences explaining the verdict.
+If REJECT — **Blocking Issues** (max 3): numbered list, each with specific issue + what needs to change.
+</output_verbosity_spec>
+
+<final_rules>
+Approve by default. Max 3 issues. Be specific — "Task X needs Y" not "needs more clarity". No design opinions. Trust developers. Your job is to unblock work, not block it with perfectionism.
+
+Response language: match the language of the plan content.
+</final_rules>`;
+
+export { MOMUS_DEFAULT_PROMPT as MOMUS_SYSTEM_PROMPT };

 export function createMomusAgent(model: string): AgentConfig {
  const restrictions = createAgentToolRestrictions([
@@ -194,7 +277,7 @@ export function createMomusAgent(model: string): AgentConfig {
    "edit",
    "apply_patch",
    "task",
-  ])
+  ]);

  const base = {
    description:
@@ -203,16 +286,24 @@ export function createMomusAgent(model: string): AgentConfig {
    model,
    temperature: 0.1,
    ...restrictions,
-    prompt: MOMUS_SYSTEM_PROMPT,
-  } as AgentConfig
+    prompt: MOMUS_DEFAULT_PROMPT,
+  } as AgentConfig;

  if (isGptModel(model)) {
-    return { ...base, reasoningEffort: "medium", textVerbosity: "high" } as AgentConfig
+    return {
+      ...base,
+      prompt: MOMUS_GPT_PROMPT,
+      reasoningEffort: "medium",
+      textVerbosity: "high",
+    } as AgentConfig;
  }

-  return { ...base, thinking: { type: "enabled", budgetTokens: 32000 } } as AgentConfig
+  return {
+    ...base,
+    thinking: { type: "enabled", budgetTokens: 32000 },
+  } as AgentConfig;
 }
-createMomusAgent.mode = MODE
+createMomusAgent.mode = MODE;

 export const momusPromptMetadata: AgentPromptMetadata = {
  category: "advisor",
@@ -221,11 +312,13 @@ export const momusPromptMetadata: AgentPromptMetadata = {
  triggers: [
    {
      domain: "Plan review",
-      trigger: "Evaluate work plans for clarity, verifiability, and completeness",
+      trigger:
+        "Evaluate work plans for clarity, verifiability, and completeness",
    },
    {
      domain: "Quality assurance",
-      trigger: "Catch gaps, ambiguities, and missing context before implementation",
+      trigger:
+        "Catch gaps, ambiguities, and missing context before implementation",
    },
  ],
  useWhen: [
@@ -240,4 +333,4 @@ export const momusPromptMetadata: AgentPromptMetadata = {
    "For trivial plans that don't need formal review",
  ],
  keyTrigger: "Work plan created → invoke Momus for review before execution",
-}
+};