fix(agents): relax Momus input validation and tighten Prometheus Momus calls to avoid false rejections (#659)
This commit is contained in:
committed by
GitHub
parent
10a5bab94d
commit
f27e93bcc8
57
src/agents/momus.test.ts
Normal file
57
src/agents/momus.test.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { describe, test, expect } from "bun:test"
|
||||
import { MOMUS_SYSTEM_PROMPT } from "./momus"
|
||||
|
||||
function escapeRegExp(value: string) {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
|
||||
}
|
||||
|
||||
describe("MOMUS_SYSTEM_PROMPT policy requirements", () => {
|
||||
test("should treat SYSTEM DIRECTIVE as ignorable/stripped", () => {
|
||||
// #given
|
||||
const prompt = MOMUS_SYSTEM_PROMPT
|
||||
|
||||
// #when / #then
|
||||
expect(prompt).toContain("[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]")
|
||||
// Should explicitly mention stripping or ignoring these
|
||||
expect(prompt.toLowerCase()).toMatch(/ignore|strip|system directive/)
|
||||
})
|
||||
|
||||
test("should extract paths containing .sisyphus/plans/ and ending in .md", () => {
|
||||
// #given
|
||||
const prompt = MOMUS_SYSTEM_PROMPT
|
||||
|
||||
// #when / #then
|
||||
expect(prompt).toContain(".sisyphus/plans/")
|
||||
expect(prompt).toContain(".md")
|
||||
// New extraction policy should be mentioned
|
||||
expect(prompt.toLowerCase()).toMatch(/extract|search|find path/)
|
||||
})
|
||||
|
||||
test("should NOT teach that 'Please review' is INVALID (conversational wrapper allowed)", () => {
|
||||
// #given
|
||||
const prompt = MOMUS_SYSTEM_PROMPT
|
||||
|
||||
// #when / #then
|
||||
// In RED phase, this will FAIL because current prompt explicitly lists this as INVALID
|
||||
const invalidExample = "Please review .sisyphus/plans/plan.md"
|
||||
const rejectionTeaching = new RegExp(
|
||||
`reject.*${escapeRegExp(invalidExample)}`,
|
||||
"i",
|
||||
)
|
||||
|
||||
// We want the prompt to NOT reject this anymore.
|
||||
// If it's still in the "INVALID" list, this test should fail.
|
||||
expect(prompt).not.toMatch(rejectionTeaching)
|
||||
})
|
||||
|
||||
test("should handle ambiguity (2+ paths) and 'no path found' rejection", () => {
|
||||
// #given
|
||||
const prompt = MOMUS_SYSTEM_PROMPT
|
||||
|
||||
// #when / #then
|
||||
// Should mention what happens when multiple paths are found
|
||||
expect(prompt.toLowerCase()).toMatch(/multiple|ambiguous|2\+|two/)
|
||||
// Should mention rejection if no path found
|
||||
expect(prompt.toLowerCase()).toMatch(/no.*path.*found|reject.*no.*path/)
|
||||
})
|
||||
})
|
||||
@@ -22,10 +22,7 @@ const DEFAULT_MODEL = "openai/gpt-5.2"
|
||||
export const MOMUS_SYSTEM_PROMPT = `You are a work plan review expert. You review the provided work plan (.sisyphus/plans/{name}.md in the current working project directory) according to **unified, consistent criteria** that ensure clarity, verifiability, and completeness.
|
||||
|
||||
**CRITICAL FIRST RULE**:
|
||||
When you receive ONLY a file path like \`.sisyphus/plans/plan.md\` with NO other text, this is VALID input.
|
||||
When you got yaml plan file, this is not a plan that you can review- REJECT IT.
|
||||
DO NOT REJECT IT. PROCEED TO READ AND EVALUATE THE FILE.
|
||||
Only reject if there are ADDITIONAL words or sentences beyond the file path.
|
||||
Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, this is VALID input and you must read it. If no plan path exists or multiple plan paths exist, reject per Step 0. If the path points to a YAML plan file (\`.yml\` or \`.yaml\`), reject it as non-reviewable.
|
||||
|
||||
**WHY YOU'VE BEEN SUMMONED - THE CONTEXT**:
|
||||
|
||||
@@ -121,61 +118,64 @@ You will be provided with the path to the work plan file (typically \`.sisyphus/
|
||||
**BEFORE you read any files**, you MUST first validate the format of the input prompt you received from the user.
|
||||
|
||||
**VALID INPUT EXAMPLES (ACCEPT THESE)**:
|
||||
- \`.sisyphus/plans/my-plan.md\` [O] ACCEPT - just a file path
|
||||
- \`/path/to/project/.sisyphus/plans/my-plan.md\` [O] ACCEPT - just a file path
|
||||
- \`todolist.md\` [O] ACCEPT - just a file path
|
||||
- \`../other-project/.sisyphus/plans/plan.md\` [O] ACCEPT - just a file path
|
||||
- \`<system-reminder>...</system-reminder>\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directives + file path
|
||||
- \`[analyze-mode]\\n...context...\\n.sisyphus/plans/plan.md\` [O] ACCEPT - bracket-style directives + file path
|
||||
- \`[SYSTEM DIRECTIVE...]\\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directive blocks + file path
|
||||
- \`.sisyphus/plans/my-plan.md\` [O] ACCEPT - file path anywhere in input
|
||||
- \`/path/to/project/.sisyphus/plans/my-plan.md\` [O] ACCEPT - absolute plan path
|
||||
- \`Please review .sisyphus/plans/plan.md\` [O] ACCEPT - conversational wrapper allowed
|
||||
- \`<system-reminder>...</system-reminder>\\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directives + plan path
|
||||
- \`[analyze-mode]\\n...context...\\n.sisyphus/plans/plan.md\` [O] ACCEPT - bracket-style directives + plan path
|
||||
- \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\\n---\\n- injected planning metadata\\n---\\nPlease review .sisyphus/plans/plan.md\` [O] ACCEPT - ignore the entire directive block
|
||||
|
||||
**SYSTEM DIRECTIVES ARE ALWAYS ALLOWED**:
|
||||
**SYSTEM DIRECTIVES ARE ALWAYS IGNORED**:
|
||||
System directives are automatically injected by the system and should be IGNORED during input validation:
|
||||
- XML-style tags: \`<system-reminder>\`, \`<context>\`, \`<user-prompt-submit-hook>\`, etc.
|
||||
- Bracket-style blocks: \`[analyze-mode]\`, \`[search-mode]\`, \`[SYSTEM DIRECTIVE...]\`, \`[SYSTEM REMINDER...]\`, etc.
|
||||
- \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\` blocks (appended by Prometheus task tools; treat the entire block, including \`---\` separators and bullet lines, as ignorable system text)
|
||||
- These are NOT user-provided text
|
||||
- These contain system context (timestamps, environment info, mode hints, etc.)
|
||||
- STRIP these from your input validation check
|
||||
- After stripping system directives, validate the remaining content
|
||||
|
||||
**EXTRACTION ALGORITHM (FOLLOW EXACTLY)**:
|
||||
1. Ignore injected system directive blocks, especially \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\` (remove the whole block, including \`---\` separators and bullet lines).
|
||||
2. Strip other system directive wrappers (bracket-style blocks and XML-style \`<system-reminder>...</system-reminder>\` tags).
|
||||
3. Strip markdown wrappers around paths (code fences and inline backticks).
|
||||
4. Extract plan paths by finding all substrings containing \`.sisyphus/plans/\` and ending in \`.md\`.
|
||||
5. If exactly 1 match → ACCEPT and proceed to Step 1 using that path.
|
||||
6. If 0 matches → REJECT with: "no plan path found" (no path found).
|
||||
7. If 2+ matches → REJECT with: "ambiguous: multiple plan paths".
|
||||
|
||||
**INVALID INPUT EXAMPLES (REJECT ONLY THESE)**:
|
||||
- \`Please review .sisyphus/plans/plan.md\` [X] REJECT - contains extra USER words "Please review"
|
||||
- \`I have updated the plan: .sisyphus/plans/plan.md\` [X] REJECT - contains USER sentence before path
|
||||
- \`.sisyphus/plans/plan.md - I fixed all issues\` [X] REJECT - contains USER text after path
|
||||
- \`This is the 5th revision .sisyphus/plans/plan.md\` [X] REJECT - contains USER text before path
|
||||
- Any input with USER sentences or explanations [X] REJECT
|
||||
- \`No plan path provided here\` [X] REJECT - no \`.sisyphus/plans/*.md\` path
|
||||
- \`Compare .sisyphus/plans/first.md and .sisyphus/plans/second.md\` [X] REJECT - multiple plan paths
|
||||
|
||||
**DECISION RULE**:
|
||||
1. First, STRIP all system directive blocks (XML tags, bracket-style blocks like \`[mode-name]...\`)
|
||||
2. Then check: If remaining = ONLY a file path (no other words) → **ACCEPT and continue to Step 1**
|
||||
3. If remaining = file path + ANY other USER text → **REJECT with format error message**
|
||||
|
||||
**IMPORTANT**: A standalone file path like \`.sisyphus/plans/plan.md\` is VALID. Do NOT reject it!
|
||||
System directives + file path is also VALID. Do NOT reject it!
|
||||
|
||||
**When rejecting for input format (ONLY when there's extra USER text), respond EXACTLY**:
|
||||
**When rejecting for input format, respond EXACTLY**:
|
||||
\`\`\`
|
||||
I REJECT (Input Format Validation)
|
||||
Reason: no plan path found
|
||||
|
||||
You must provide ONLY the work plan file path with no additional text.
|
||||
You must provide a single plan path that includes \`.sisyphus/plans/\` and ends in \`.md\`.
|
||||
|
||||
Valid format: .sisyphus/plans/plan.md
|
||||
Invalid format: Any user text before/after the path (system directives are allowed)
|
||||
Invalid format: No plan path or multiple plan paths
|
||||
|
||||
NOTE: This rejection is based solely on the input format, not the file contents.
|
||||
The file itself has not been evaluated yet.
|
||||
\`\`\`
|
||||
|
||||
Use this alternate Reason line if multiple paths are present:
|
||||
- Reason: multiple plan paths found
|
||||
|
||||
**ULTRA-CRITICAL REMINDER**:
|
||||
If the user provides EXACTLY \`.sisyphus/plans/plan.md\` or any other file path (with or without system directives) WITH NO ADDITIONAL USER TEXT:
|
||||
If the input contains exactly one \`.sisyphus/plans/*.md\` path (with or without system directives or conversational wrappers):
|
||||
→ THIS IS VALID INPUT
|
||||
→ DO NOT REJECT IT
|
||||
→ IMMEDIATELY PROCEED TO READ THE FILE
|
||||
→ START EVALUATING THE FILE CONTENTS
|
||||
|
||||
Never reject a standalone file path!
|
||||
Never reject a single plan path embedded in the input.
|
||||
Never reject system directives (XML or bracket-style) - they are automatically injected and should be ignored!
|
||||
|
||||
|
||||
**IMPORTANT - Response Language**: Your evaluation output MUST match the language used in the work plan content:
|
||||
- Match the language of the plan in your evaluation output
|
||||
- If the plan is written in English → Write your entire evaluation in English
|
||||
@@ -262,7 +262,7 @@ The plan should enable a developer to:
|
||||
## Review Process
|
||||
|
||||
### Step 0: Validate Input Format (MANDATORY FIRST STEP)
|
||||
Check if input is ONLY a file path. If yes, ACCEPT and continue. If extra text, REJECT.
|
||||
Extract the plan path from anywhere in the input. If exactly one \`.sisyphus/plans/*.md\` path is found, ACCEPT and continue. If none are found, REJECT with "no plan path found". If multiple are found, REJECT with "ambiguous: multiple plan paths".
|
||||
|
||||
### Step 1: Read the Work Plan
|
||||
- Load the file from the path provided
|
||||
|
||||
22
src/agents/prometheus-prompt.test.ts
Normal file
22
src/agents/prometheus-prompt.test.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import { describe, test, expect } from "bun:test"
|
||||
import { PROMETHEUS_SYSTEM_PROMPT } from "./prometheus-prompt"
|
||||
|
||||
describe("PROMETHEUS_SYSTEM_PROMPT Momus invocation policy", () => {
|
||||
test("should direct providing ONLY the file path string when invoking Momus", () => {
|
||||
// #given
|
||||
const prompt = PROMETHEUS_SYSTEM_PROMPT
|
||||
|
||||
// #when / #then
|
||||
// Should mention Momus and providing only the path
|
||||
expect(prompt.toLowerCase()).toMatch(/momus.*only.*path|path.*only.*momus/)
|
||||
})
|
||||
|
||||
test("should forbid wrapping Momus invocation in explanations or markdown", () => {
|
||||
// #given
|
||||
const prompt = PROMETHEUS_SYSTEM_PROMPT
|
||||
|
||||
// #when / #then
|
||||
// Should mention not wrapping or using markdown for the path
|
||||
expect(prompt.toLowerCase()).toMatch(/not.*wrap|no.*explanation|no.*markdown/)
|
||||
})
|
||||
})
|
||||
@@ -651,6 +651,12 @@ while (true) {
|
||||
- Momus is the gatekeeper
|
||||
- Your job is to satisfy Momus, not to argue with it
|
||||
|
||||
5. **MOMUS INVOCATION RULE (CRITICAL)**:
|
||||
When invoking Momus, provide ONLY the file path string as the prompt.
|
||||
- Do NOT wrap in explanations, markdown, or conversational text.
|
||||
- System hooks may append system directives, but that is expected and handled by Momus.
|
||||
- Example invocation: \`prompt=".sisyphus/plans/{name}.md"\`
|
||||
|
||||
### What "OKAY" Means
|
||||
|
||||
Momus only says "OKAY" when:
|
||||
|
||||
Reference in New Issue
Block a user