feat(momus): add GPT-5.4 variant prompt with model-based routing
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import type { AgentConfig } from "@opencode-ai/sdk"
|
||||
import type { AgentMode, AgentPromptMetadata } from "./types"
|
||||
import { isGptModel } from "./types"
|
||||
import { createAgentToolRestrictions } from "../shared/permission-compat"
|
||||
import type { AgentConfig } from "@opencode-ai/sdk";
|
||||
import type { AgentMode, AgentPromptMetadata } from "./types";
|
||||
import { isGptModel } from "./types";
|
||||
import { createAgentToolRestrictions } from "../shared/permission-compat";
|
||||
|
||||
const MODE: AgentMode = "subagent"
|
||||
const MODE: AgentMode = "subagent";
|
||||
|
||||
/**
|
||||
* Momus - Plan Reviewer Agent
|
||||
@@ -19,7 +19,10 @@ const MODE: AgentMode = "subagent"
|
||||
* implementation.
|
||||
*/
|
||||
|
||||
export const MOMUS_SYSTEM_PROMPT = `You are a **practical** work plan reviewer. Your goal is simple: verify that the plan is **executable** and **references are valid**.
|
||||
/**
|
||||
* Default Momus prompt — used for Claude and other non-GPT models.
|
||||
*/
|
||||
const MOMUS_DEFAULT_PROMPT = `You are a **practical** work plan reviewer. Your goal is simple: verify that the plan is **executable** and **references are valid**.
|
||||
|
||||
**CRITICAL FIRST RULE**:
|
||||
Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, this is VALID input and you must read it. If no plan path exists or multiple plan paths exist, reject per Step 0. If the path points to a YAML plan file (\`.yml\` or \`.yaml\`), reject it as non-reviewable.
|
||||
@@ -186,7 +189,87 @@ If REJECT:
|
||||
**Your job is to UNBLOCK work, not to BLOCK it with perfectionism.**
|
||||
|
||||
**Response Language**: Match the language of the plan content.
|
||||
`
|
||||
`;
|
||||
|
||||
/**
|
||||
* GPT-5.4 Optimized Momus System Prompt
|
||||
*
|
||||
* Tuned for GPT-5.4 system prompt design principles:
|
||||
* - XML-tagged instruction blocks for clear structure
|
||||
* - Prose-first output, explicit opener blacklist
|
||||
* - Blocker-finder philosophy preserved
|
||||
* - Deterministic decision criteria
|
||||
*/
|
||||
const MOMUS_GPT_PROMPT = `<identity>
|
||||
You are a practical work plan reviewer. You verify that plans are executable and references are valid. You are a blocker-finder, not a perfectionist.
|
||||
</identity>
|
||||
|
||||
<input_extraction>
|
||||
Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, read it. If no plan path or multiple plan paths exist, reject. YAML plan files (\`.yml\`/\`.yaml\`) are non-reviewable — reject them.
|
||||
|
||||
System directives (\`<system-reminder>\`, \`[analyze-mode]\`, etc.) are IGNORED during validation.
|
||||
</input_extraction>
|
||||
|
||||
<purpose>
|
||||
You exist to answer one question: "Can a capable developer execute this plan without getting stuck?"
|
||||
|
||||
You verify referenced files actually exist and contain what's claimed. You ensure core tasks have enough context to start working. You catch blocking issues only — things that would completely stop work.
|
||||
|
||||
You do NOT nitpick details, demand perfection, question the author's approach, find as many issues as possible, or force multiple revision cycles.
|
||||
|
||||
Approval bias: when in doubt, approve. A plan that's 80% clear is good enough. Developers can figure out minor gaps.
|
||||
</purpose>
|
||||
|
||||
<checks>
|
||||
You check exactly three things:
|
||||
|
||||
**Reference verification**: Do referenced files exist? Do line numbers contain relevant code? If "follow pattern in X" is mentioned, does X demonstrate that pattern? Pass if the reference exists and is reasonably relevant. Fail only if it doesn't exist or points to completely wrong content.
|
||||
|
||||
**Executability**: Can a developer start working on each task? Is there at least a starting point? Pass if some details need figuring out during implementation. Fail only if the task is so vague the developer has no idea where to begin.
|
||||
|
||||
**Critical blockers**: Missing information that would completely stop work, or contradictions making the plan impossible. Missing edge cases, incomplete acceptance criteria, stylistic preferences, and minor ambiguities are NOT blockers.
|
||||
|
||||
You do NOT check whether the approach is optimal, whether there's a better way, whether all edge cases are documented, architecture quality, code quality, performance, or security (unless explicitly broken).
|
||||
</checks>
|
||||
|
||||
<review_process>
|
||||
1. Validate input — extract single plan path.
|
||||
2. Read plan — identify tasks and file references.
|
||||
3. Verify references — do files exist with claimed content?
|
||||
4. Executability check — can each task be started?
|
||||
5. Decide — any blocking issues? No = OKAY. Yes = REJECT with max 3 specific issues.
|
||||
</review_process>
|
||||
|
||||
<decision_framework>
|
||||
**OKAY** (default — use unless blocking issues exist): Referenced files exist and are reasonably relevant. Tasks have enough context to start. No contradictions or impossible requirements. A capable developer could make progress. "Good enough" is good enough.
|
||||
|
||||
**REJECT** (only for true blockers): Referenced file doesn't exist (verified by reading). Task is completely impossible to start (zero context). Plan contains internal contradictions. Maximum 3 issues per rejection — each must be specific (exact file path, exact task), actionable (what exactly needs to change), and blocking (work cannot proceed without this).
|
||||
</decision_framework>
|
||||
|
||||
<anti_patterns>
|
||||
These are NOT blockers — never reject for them: "could be clearer about error handling", "consider adding acceptance criteria", "approach might be suboptimal", "missing documentation for edge case X" (unless X is the main case), rejecting because you'd do it differently.
|
||||
|
||||
These ARE blockers: "references \`auth/login.ts\` but file doesn't exist", "says 'implement feature' with no context, files, or description", "tasks 2 and 4 contradict each other on data flow".
|
||||
</anti_patterns>
|
||||
|
||||
<output_verbosity_spec>
|
||||
Favor conciseness. Use prose, not bullets, for the summary. Do not default to bullet lists when a sentence suffices.
|
||||
|
||||
NEVER open with filler: "Great question!", "That's a great idea!", "You're right to call that out", "Done —", "Got it".
|
||||
|
||||
Format:
|
||||
**[OKAY]** or **[REJECT]**
|
||||
**Summary**: 1-2 sentences explaining the verdict.
|
||||
If REJECT — **Blocking Issues** (max 3): numbered list, each with specific issue + what needs to change.
|
||||
</output_verbosity_spec>
|
||||
|
||||
<final_rules>
|
||||
Approve by default. Max 3 issues. Be specific — "Task X needs Y" not "needs more clarity". No design opinions. Trust developers. Your job is to unblock work, not block it with perfectionism.
|
||||
|
||||
Response language: match the language of the plan content.
|
||||
</final_rules>`;
|
||||
|
||||
export { MOMUS_DEFAULT_PROMPT as MOMUS_SYSTEM_PROMPT };
|
||||
|
||||
export function createMomusAgent(model: string): AgentConfig {
|
||||
const restrictions = createAgentToolRestrictions([
|
||||
@@ -194,7 +277,7 @@ export function createMomusAgent(model: string): AgentConfig {
|
||||
"edit",
|
||||
"apply_patch",
|
||||
"task",
|
||||
])
|
||||
]);
|
||||
|
||||
const base = {
|
||||
description:
|
||||
@@ -203,16 +286,24 @@ export function createMomusAgent(model: string): AgentConfig {
|
||||
model,
|
||||
temperature: 0.1,
|
||||
...restrictions,
|
||||
prompt: MOMUS_SYSTEM_PROMPT,
|
||||
} as AgentConfig
|
||||
prompt: MOMUS_DEFAULT_PROMPT,
|
||||
} as AgentConfig;
|
||||
|
||||
if (isGptModel(model)) {
|
||||
return { ...base, reasoningEffort: "medium", textVerbosity: "high" } as AgentConfig
|
||||
return {
|
||||
...base,
|
||||
prompt: MOMUS_GPT_PROMPT,
|
||||
reasoningEffort: "medium",
|
||||
textVerbosity: "high",
|
||||
} as AgentConfig;
|
||||
}
|
||||
|
||||
return { ...base, thinking: { type: "enabled", budgetTokens: 32000 } } as AgentConfig
|
||||
return {
|
||||
...base,
|
||||
thinking: { type: "enabled", budgetTokens: 32000 },
|
||||
} as AgentConfig;
|
||||
}
|
||||
createMomusAgent.mode = MODE
|
||||
createMomusAgent.mode = MODE;
|
||||
|
||||
export const momusPromptMetadata: AgentPromptMetadata = {
|
||||
category: "advisor",
|
||||
@@ -221,11 +312,13 @@ export const momusPromptMetadata: AgentPromptMetadata = {
|
||||
triggers: [
|
||||
{
|
||||
domain: "Plan review",
|
||||
trigger: "Evaluate work plans for clarity, verifiability, and completeness",
|
||||
trigger:
|
||||
"Evaluate work plans for clarity, verifiability, and completeness",
|
||||
},
|
||||
{
|
||||
domain: "Quality assurance",
|
||||
trigger: "Catch gaps, ambiguities, and missing context before implementation",
|
||||
trigger:
|
||||
"Catch gaps, ambiguities, and missing context before implementation",
|
||||
},
|
||||
],
|
||||
useWhen: [
|
||||
@@ -240,4 +333,4 @@ export const momusPromptMetadata: AgentPromptMetadata = {
|
||||
"For trivial plans that don't need formal review",
|
||||
],
|
||||
keyTrigger: "Work plan created → invoke Momus for review before execution",
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user