Compare commits
1 Commits
fix/issue-
...
fix/subage
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
21ddd55162 |
0
.openchrome/hints/hints-2026-03-09.jsonl
Normal file
0
.openchrome/hints/hints-2026-03-09.jsonl
Normal file
0
.openchrome/timeline/timeline-2026-03-09.jsonl
Normal file
0
.openchrome/timeline/timeline-2026-03-09.jsonl
Normal file
92
src/agents/anti-duplication.test.ts
Normal file
92
src/agents/anti-duplication.test.ts
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
/// <reference types="bun-types" />
|
||||||
|
|
||||||
|
import { describe, it, expect } from "bun:test"
|
||||||
|
import { buildAntiDuplicationSection } from "./dynamic-agent-prompt-builder"
|
||||||
|
|
||||||
|
describe("buildAntiDuplicationSection", () => {
|
||||||
|
it("#given no arguments #when building anti-duplication section #then returns comprehensive rule section", () => {
|
||||||
|
//#given: no special configuration needed
|
||||||
|
|
||||||
|
//#when: building the anti-duplication section
|
||||||
|
const result = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
//#then: should contain the anti-duplication rule with all key concepts
|
||||||
|
expect(result).toContain("Anti-Duplication Rule")
|
||||||
|
expect(result).toContain("CRITICAL")
|
||||||
|
expect(result).toContain("DO NOT perform the same search yourself")
|
||||||
|
})
|
||||||
|
|
||||||
|
it("#given no arguments #when building #then explicitly forbids manual re-search after delegation", () => {
|
||||||
|
//#given: no special configuration
|
||||||
|
|
||||||
|
//#when: building the section
|
||||||
|
const result = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
//#then: should explicitly list forbidden behaviors
|
||||||
|
expect(result).toContain("FORBIDDEN")
|
||||||
|
expect(result).toContain("manually grep/search for the same information")
|
||||||
|
expect(result).toContain("Re-doing the research")
|
||||||
|
})
|
||||||
|
|
||||||
|
it("#given no arguments #when building #then allows non-overlapping work", () => {
|
||||||
|
//#given: no special configuration
|
||||||
|
|
||||||
|
//#when: building the section
|
||||||
|
const result = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
//#then: should explicitly allow non-overlapping work
|
||||||
|
expect(result).toContain("ALLOWED")
|
||||||
|
expect(result).toContain("non-overlapping work")
|
||||||
|
expect(result).toContain("work that doesn't depend on the delegated research")
|
||||||
|
})
|
||||||
|
|
||||||
|
it("#given no arguments #when building #then includes wait-for-results instructions", () => {
|
||||||
|
//#given: no special configuration
|
||||||
|
|
||||||
|
//#when: building the section
|
||||||
|
const result = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
//#then: should include instructions for waiting properly
|
||||||
|
expect(result).toContain("Wait for Results Properly")
|
||||||
|
expect(result).toContain("End your response")
|
||||||
|
expect(result).toContain("Wait for the completion notification")
|
||||||
|
expect(result).toContain("background_output")
|
||||||
|
})
|
||||||
|
|
||||||
|
it("#given no arguments #when building #then explains why this matters", () => {
|
||||||
|
//#given: no special configuration
|
||||||
|
|
||||||
|
//#when: building the section
|
||||||
|
const result = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
//#then: should explain the purpose
|
||||||
|
expect(result).toContain("Why This Matters")
|
||||||
|
expect(result).toContain("Wasted tokens")
|
||||||
|
expect(result).toContain("Confusion")
|
||||||
|
expect(result).toContain("Efficiency")
|
||||||
|
})
|
||||||
|
|
||||||
|
it("#given no arguments #when building #then provides code examples", () => {
|
||||||
|
//#given: no special configuration
|
||||||
|
|
||||||
|
//#when: building the section
|
||||||
|
const result = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
//#then: should include examples
|
||||||
|
expect(result).toContain("Example")
|
||||||
|
expect(result).toContain("WRONG")
|
||||||
|
expect(result).toContain("CORRECT")
|
||||||
|
expect(result).toContain("task(subagent_type=")
|
||||||
|
})
|
||||||
|
|
||||||
|
it("#given no arguments #when building #then uses proper markdown formatting", () => {
|
||||||
|
//#given: no special configuration
|
||||||
|
|
||||||
|
//#when: building the section
|
||||||
|
const result = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
//#then: should be wrapped in Anti_Duplication tag
|
||||||
|
expect(result).toContain("<Anti_Duplication>")
|
||||||
|
expect(result).toContain("</Anti_Duplication>")
|
||||||
|
})
|
||||||
|
})
|
||||||
118
src/agents/atlas/atlas-prompt.test.ts
Normal file
118
src/agents/atlas/atlas-prompt.test.ts
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
import { describe, test, expect } from "bun:test"
|
||||||
|
import { ATLAS_SYSTEM_PROMPT } from "./default"
|
||||||
|
import { ATLAS_GPT_SYSTEM_PROMPT } from "./gpt"
|
||||||
|
import { ATLAS_GEMINI_SYSTEM_PROMPT } from "./gemini"
|
||||||
|
|
||||||
|
describe("Atlas prompts auto-continue policy", () => {
|
||||||
|
test("default variant should forbid asking user for continuation confirmation", () => {
|
||||||
|
// given
|
||||||
|
const prompt = ATLAS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// when
|
||||||
|
const lowerPrompt = prompt.toLowerCase()
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(lowerPrompt).toContain("auto-continue policy")
|
||||||
|
expect(lowerPrompt).toContain("never ask the user")
|
||||||
|
expect(lowerPrompt).toContain("should i continue")
|
||||||
|
expect(lowerPrompt).toContain("proceed to next task")
|
||||||
|
expect(lowerPrompt).toContain("approval-style")
|
||||||
|
expect(lowerPrompt).toContain("auto-continue immediately")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("gpt variant should forbid asking user for continuation confirmation", () => {
|
||||||
|
// given
|
||||||
|
const prompt = ATLAS_GPT_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// when
|
||||||
|
const lowerPrompt = prompt.toLowerCase()
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(lowerPrompt).toContain("auto-continue policy")
|
||||||
|
expect(lowerPrompt).toContain("never ask the user")
|
||||||
|
expect(lowerPrompt).toContain("should i continue")
|
||||||
|
expect(lowerPrompt).toContain("proceed to next task")
|
||||||
|
expect(lowerPrompt).toContain("approval-style")
|
||||||
|
expect(lowerPrompt).toContain("auto-continue immediately")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("gemini variant should forbid asking user for continuation confirmation", () => {
|
||||||
|
// given
|
||||||
|
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// when
|
||||||
|
const lowerPrompt = prompt.toLowerCase()
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(lowerPrompt).toContain("auto-continue policy")
|
||||||
|
expect(lowerPrompt).toContain("never ask the user")
|
||||||
|
expect(lowerPrompt).toContain("should i continue")
|
||||||
|
expect(lowerPrompt).toContain("proceed to next task")
|
||||||
|
expect(lowerPrompt).toContain("approval-style")
|
||||||
|
expect(lowerPrompt).toContain("auto-continue immediately")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("all variants should require immediate continuation after verification passes", () => {
|
||||||
|
// given
|
||||||
|
const prompts = [ATLAS_SYSTEM_PROMPT, ATLAS_GPT_SYSTEM_PROMPT, ATLAS_GEMINI_SYSTEM_PROMPT]
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
for (const prompt of prompts) {
|
||||||
|
const lowerPrompt = prompt.toLowerCase()
|
||||||
|
expect(lowerPrompt).toMatch(/auto-continue immediately after verification/)
|
||||||
|
expect(lowerPrompt).toMatch(/immediately delegate next task/)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test("all variants should define when user interaction is actually needed", () => {
|
||||||
|
// given
|
||||||
|
const prompts = [ATLAS_SYSTEM_PROMPT, ATLAS_GPT_SYSTEM_PROMPT, ATLAS_GEMINI_SYSTEM_PROMPT]
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
for (const prompt of prompts) {
|
||||||
|
const lowerPrompt = prompt.toLowerCase()
|
||||||
|
expect(lowerPrompt).toMatch(/only pause.*truly blocked/)
|
||||||
|
expect(lowerPrompt).toMatch(/plan needs clarification|blocked by external/)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("Atlas prompts plan path consistency", () => {
|
||||||
|
test("default variant should use .sisyphus/plans/{plan-name}.md path", () => {
|
||||||
|
// given
|
||||||
|
const prompt = ATLAS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
expect(prompt).toContain(".sisyphus/plans/{plan-name}.md")
|
||||||
|
expect(prompt).not.toContain(".sisyphus/tasks/{plan-name}.yaml")
|
||||||
|
expect(prompt).not.toContain(".sisyphus/tasks/")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("gpt variant should use .sisyphus/plans/{plan-name}.md path", () => {
|
||||||
|
// given
|
||||||
|
const prompt = ATLAS_GPT_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
expect(prompt).toContain(".sisyphus/plans/{plan-name}.md")
|
||||||
|
expect(prompt).not.toContain(".sisyphus/tasks/")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("gemini variant should use .sisyphus/plans/{plan-name}.md path", () => {
|
||||||
|
// given
|
||||||
|
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
expect(prompt).toContain(".sisyphus/plans/{plan-name}.md")
|
||||||
|
expect(prompt).not.toContain(".sisyphus/tasks/")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("all variants should read plan file after verification", () => {
|
||||||
|
// given
|
||||||
|
const prompts = [ATLAS_SYSTEM_PROMPT, ATLAS_GPT_SYSTEM_PROMPT, ATLAS_GEMINI_SYSTEM_PROMPT]
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
for (const prompt of prompts) {
|
||||||
|
expect(prompt).toMatch(/read[\s\S]*?\.sisyphus\/plans\//)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -99,6 +99,29 @@ Every \`task()\` prompt MUST include ALL 6 sections:
|
|||||||
**If your prompt is under 30 lines, it's TOO SHORT.**
|
**If your prompt is under 30 lines, it's TOO SHORT.**
|
||||||
</delegation_system>
|
</delegation_system>
|
||||||
|
|
||||||
|
<auto_continue>
|
||||||
|
## AUTO-CONTINUE POLICY (STRICT)
|
||||||
|
|
||||||
|
**CRITICAL: NEVER ask the user "should I continue", "proceed to next task", or any approval-style questions between plan steps.**
|
||||||
|
|
||||||
|
**You MUST auto-continue immediately after verification passes:**
|
||||||
|
- After any delegation completes and passes verification → Immediately delegate next task
|
||||||
|
- Do NOT wait for user input, do NOT ask "should I continue"
|
||||||
|
- Only pause or ask if you are truly blocked by missing information, an external dependency, or a critical failure
|
||||||
|
|
||||||
|
**The only time you ask the user:**
|
||||||
|
- Plan needs clarification or modification before execution
|
||||||
|
- Blocked by an external dependency beyond your control
|
||||||
|
- Critical failure prevents any further progress
|
||||||
|
|
||||||
|
**Auto-continue examples:**
|
||||||
|
- Task A done → Verify → Pass → Immediately start Task B
|
||||||
|
- Task fails → Retry 3x → Still fails → Document → Move to next independent task
|
||||||
|
- NEVER: "Should I continue to the next task?"
|
||||||
|
|
||||||
|
**This is NOT optional. This is core to your role as orchestrator.**
|
||||||
|
</auto_continue>
|
||||||
|
|
||||||
<workflow>
|
<workflow>
|
||||||
## Step 0: Register Tracking
|
## Step 0: Register Tracking
|
||||||
|
|
||||||
@@ -214,7 +237,7 @@ After EVERY delegation, complete ALL of these steps — no shortcuts:
|
|||||||
|
|
||||||
After verification, READ the plan file directly — every time, no exceptions:
|
After verification, READ the plan file directly — every time, no exceptions:
|
||||||
\`\`\`
|
\`\`\`
|
||||||
Read(".sisyphus/tasks/{plan-name}.yaml")
|
Read(".sisyphus/plans/{plan-name}.md")
|
||||||
\`\`\`
|
\`\`\`
|
||||||
Count remaining \`- [ ]\` tasks. This is your ground truth for what comes next.
|
Count remaining \`- [ ]\` tasks. This is your ground truth for what comes next.
|
||||||
|
|
||||||
|
|||||||
@@ -116,6 +116,29 @@ Every \`task()\` prompt MUST include ALL 6 sections:
|
|||||||
**Minimum 30 lines per delegation prompt. Under 30 lines = the subagent WILL fail.**
|
**Minimum 30 lines per delegation prompt. Under 30 lines = the subagent WILL fail.**
|
||||||
</delegation_system>
|
</delegation_system>
|
||||||
|
|
||||||
|
<auto_continue>
|
||||||
|
## AUTO-CONTINUE POLICY (STRICT)
|
||||||
|
|
||||||
|
**CRITICAL: NEVER ask the user "should I continue", "proceed to next task", or any approval-style questions between plan steps.**
|
||||||
|
|
||||||
|
**You MUST auto-continue immediately after verification passes:**
|
||||||
|
- After any delegation completes and passes verification → Immediately delegate next task
|
||||||
|
- Do NOT wait for user input, do NOT ask "should I continue"
|
||||||
|
- Only pause or ask if you are truly blocked by missing information, an external dependency, or a critical failure
|
||||||
|
|
||||||
|
**The only time you ask the user:**
|
||||||
|
- Plan needs clarification or modification before execution
|
||||||
|
- Blocked by an external dependency beyond your control
|
||||||
|
- Critical failure prevents any further progress
|
||||||
|
|
||||||
|
**Auto-continue examples:**
|
||||||
|
- Task A done → Verify → Pass → Immediately start Task B
|
||||||
|
- Task fails → Retry 3x → Still fails → Document → Move to next independent task
|
||||||
|
- NEVER: "Should I continue to the next task?"
|
||||||
|
|
||||||
|
**This is NOT optional. This is core to your role as orchestrator.**
|
||||||
|
</auto_continue>
|
||||||
|
|
||||||
<workflow>
|
<workflow>
|
||||||
## Step 0: Register Tracking
|
## Step 0: Register Tracking
|
||||||
|
|
||||||
|
|||||||
@@ -48,9 +48,10 @@ Complete ALL tasks in a work plan via \`task()\` until fully done.
|
|||||||
</scope_and_design_constraints>
|
</scope_and_design_constraints>
|
||||||
|
|
||||||
<uncertainty_and_ambiguity>
|
<uncertainty_and_ambiguity>
|
||||||
- If a task is ambiguous or underspecified:
|
- During initial plan analysis, if a task is ambiguous or underspecified:
|
||||||
- Ask 1-3 precise clarifying questions, OR
|
- Ask 1-3 precise clarifying questions, OR
|
||||||
- State your interpretation explicitly and proceed with the simplest approach.
|
- State your interpretation explicitly and proceed with the simplest approach.
|
||||||
|
- Once execution has started, do NOT stop to ask for continuation or approval between steps.
|
||||||
- Never fabricate task details, file paths, or requirements.
|
- Never fabricate task details, file paths, or requirements.
|
||||||
- Prefer language like "Based on the plan..." instead of absolute claims.
|
- Prefer language like "Based on the plan..." instead of absolute claims.
|
||||||
- When unsure about parallelization, default to sequential execution.
|
- When unsure about parallelization, default to sequential execution.
|
||||||
@@ -134,6 +135,29 @@ Every \`task()\` prompt MUST include ALL 6 sections:
|
|||||||
**Minimum 30 lines per delegation prompt.**
|
**Minimum 30 lines per delegation prompt.**
|
||||||
</delegation_system>
|
</delegation_system>
|
||||||
|
|
||||||
|
<auto_continue>
|
||||||
|
## AUTO-CONTINUE POLICY (STRICT)
|
||||||
|
|
||||||
|
**CRITICAL: NEVER ask the user "should I continue", "proceed to next task", or any approval-style questions between plan steps.**
|
||||||
|
|
||||||
|
**You MUST auto-continue immediately after verification passes:**
|
||||||
|
- After any delegation completes and passes verification → Immediately delegate next task
|
||||||
|
- Do NOT wait for user input, do NOT ask "should I continue"
|
||||||
|
- Only pause or ask if you are truly blocked by missing information, an external dependency, or a critical failure
|
||||||
|
|
||||||
|
**The only time you ask the user:**
|
||||||
|
- Plan needs clarification or modification before execution
|
||||||
|
- Blocked by an external dependency beyond your control
|
||||||
|
- Critical failure prevents any further progress
|
||||||
|
|
||||||
|
**Auto-continue examples:**
|
||||||
|
- Task A done → Verify → Pass → Immediately start Task B
|
||||||
|
- Task fails → Retry 3x → Still fails → Document → Move to next independent task
|
||||||
|
- NEVER: "Should I continue to the next task?"
|
||||||
|
|
||||||
|
**This is NOT optional. This is core to your role as orchestrator.**
|
||||||
|
</auto_continue>
|
||||||
|
|
||||||
<workflow>
|
<workflow>
|
||||||
## Step 0: Register Tracking
|
## Step 0: Register Tracking
|
||||||
|
|
||||||
|
|||||||
88
src/agents/delegation-trust-prompt.test.ts
Normal file
88
src/agents/delegation-trust-prompt.test.ts
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import { describe, expect, test } from "bun:test"
|
||||||
|
import { createSisyphusAgent } from "./sisyphus"
|
||||||
|
import { createHephaestusAgent } from "./hephaestus"
|
||||||
|
import { buildSisyphusJuniorPrompt } from "./sisyphus-junior/agent"
|
||||||
|
import {
|
||||||
|
buildAntiDuplicationSection,
|
||||||
|
buildExploreSection,
|
||||||
|
type AvailableAgent,
|
||||||
|
} from "./dynamic-agent-prompt-builder"
|
||||||
|
|
||||||
|
const exploreAgent = {
|
||||||
|
name: "explore",
|
||||||
|
description: "Contextual grep specialist",
|
||||||
|
metadata: {
|
||||||
|
category: "advisor",
|
||||||
|
cost: "FREE",
|
||||||
|
promptAlias: "Explore",
|
||||||
|
triggers: [],
|
||||||
|
useWhen: ["Multiple search angles needed"],
|
||||||
|
avoidWhen: ["Single keyword search is enough"],
|
||||||
|
},
|
||||||
|
} satisfies AvailableAgent
|
||||||
|
|
||||||
|
describe("delegation trust prompt rules", () => {
|
||||||
|
test("buildAntiDuplicationSection explains overlap is forbidden", () => {
|
||||||
|
// given
|
||||||
|
const section = buildAntiDuplicationSection()
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
expect(section).toContain("DO NOT perform the same search yourself")
|
||||||
|
expect(section).toContain("non-overlapping work")
|
||||||
|
expect(section).toContain("End your response")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("buildExploreSection includes delegation trust rule", () => {
|
||||||
|
// given
|
||||||
|
const agents = [exploreAgent]
|
||||||
|
|
||||||
|
// when
|
||||||
|
const section = buildExploreSection(agents)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(section).toContain("Delegation Trust Rule")
|
||||||
|
expect(section).toContain("do **not** manually perform that same search yourself")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("Sisyphus prompt forbids duplicate delegated exploration", () => {
|
||||||
|
// given
|
||||||
|
const agent = createSisyphusAgent("anthropic/claude-sonnet-4-6", [exploreAgent])
|
||||||
|
|
||||||
|
// when
|
||||||
|
const prompt = agent.prompt
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(prompt).toContain("Continue only with non-overlapping work")
|
||||||
|
expect(prompt).toContain("DO NOT perform the same search yourself")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("Hephaestus prompt forbids duplicate delegated exploration", () => {
|
||||||
|
// given
|
||||||
|
const agent = createHephaestusAgent("openai/gpt-5.2", [exploreAgent])
|
||||||
|
|
||||||
|
// when
|
||||||
|
const prompt = agent.prompt
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(prompt).toContain("Continue only with non-overlapping work after launching background agents")
|
||||||
|
expect(prompt).toContain("DO NOT perform the same search yourself")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("Sisyphus-Junior GPT prompt forbids duplicate delegated exploration", () => {
|
||||||
|
// given
|
||||||
|
const prompt = buildSisyphusJuniorPrompt("openai/gpt-5.2", false)
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
expect(prompt).toContain("continue only with non-overlapping work while they search")
|
||||||
|
expect(prompt).toContain("DO NOT perform the same search yourself")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("Sisyphus-Junior Gemini prompt forbids duplicate delegated exploration", () => {
|
||||||
|
// given
|
||||||
|
const prompt = buildSisyphusJuniorPrompt("google/gemini-3.1-pro", false)
|
||||||
|
|
||||||
|
// when / then
|
||||||
|
expect(prompt).toContain("continue only with non-overlapping work while they search")
|
||||||
|
expect(prompt).toContain("DO NOT perform the same search yourself")
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -118,6 +118,8 @@ export function buildExploreSection(agents: AvailableAgent[]): string {
|
|||||||
|
|
||||||
Use it as a **peer tool**, not a fallback. Fire liberally.
|
Use it as a **peer tool**, not a fallback. Fire liberally.
|
||||||
|
|
||||||
|
**Delegation Trust Rule:** Once you fire an explore agent for a search, do **not** manually perform that same search yourself. Use direct tools only for non-overlapping work or when you intentionally skipped delegation.
|
||||||
|
|
||||||
**Use Direct Tools when:**
|
**Use Direct Tools when:**
|
||||||
${avoidWhen.map((w) => `- ${w}`).join("\n")}
|
${avoidWhen.map((w) => `- ${w}`).join("\n")}
|
||||||
|
|
||||||
@@ -308,6 +310,7 @@ export function buildAntiPatternsSection(): string {
|
|||||||
"- **Search**: Firing agents for single-line typos or obvious syntax errors",
|
"- **Search**: Firing agents for single-line typos or obvious syntax errors",
|
||||||
"- **Debugging**: Shotgun debugging, random changes",
|
"- **Debugging**: Shotgun debugging, random changes",
|
||||||
"- **Background Tasks**: Polling `background_output` on running tasks — end response and wait for notification",
|
"- **Background Tasks**: Polling `background_output` on running tasks — end response and wait for notification",
|
||||||
|
"- **Delegation Duplication**: Delegating exploration to explore/librarian and then manually doing the same search yourself",
|
||||||
"- **Oracle**: Delivering answer without collecting Oracle results",
|
"- **Oracle**: Delivering answer without collecting Oracle results",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -409,3 +412,52 @@ export function buildUltraworkSection(
|
|||||||
|
|
||||||
return lines.join("\n")
|
return lines.join("\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Anti-duplication section for agent prompts
|
||||||
|
export function buildAntiDuplicationSection(): string {
|
||||||
|
return `<Anti_Duplication>
|
||||||
|
## Anti-Duplication Rule (CRITICAL)
|
||||||
|
|
||||||
|
Once you delegate exploration to explore/librarian agents, **DO NOT perform the same search yourself**.
|
||||||
|
|
||||||
|
### What this means:
|
||||||
|
|
||||||
|
**FORBIDDEN:**
|
||||||
|
- After firing explore/librarian, manually grep/search for the same information
|
||||||
|
- Re-doing the research the agents were just tasked with
|
||||||
|
- "Just quickly checking" the same files the background agents are checking
|
||||||
|
|
||||||
|
**ALLOWED:**
|
||||||
|
- Continue with **non-overlapping work** — work that doesn't depend on the delegated research
|
||||||
|
- Work on unrelated parts of the codebase
|
||||||
|
- Preparation work (e.g., setting up files, configs) that can proceed independently
|
||||||
|
|
||||||
|
### Wait for Results Properly:
|
||||||
|
|
||||||
|
When you need the delegated results but they're not ready:
|
||||||
|
|
||||||
|
1. **End your response** — do NOT continue with work that depends on those results
|
||||||
|
2. **Wait for the completion notification** — the system will trigger your next turn
|
||||||
|
3. **Then** collect results via \`background_output(task_id="...")\`
|
||||||
|
4. **Do NOT** impatiently re-search the same topics while waiting
|
||||||
|
|
||||||
|
### Why This Matters:
|
||||||
|
|
||||||
|
- **Wasted tokens**: Duplicate exploration wastes your context budget
|
||||||
|
- **Confusion**: You might contradict the agent's findings
|
||||||
|
- **Efficiency**: The whole point of delegation is parallel throughput
|
||||||
|
|
||||||
|
### Example:
|
||||||
|
|
||||||
|
\`\`\`typescript
|
||||||
|
// WRONG: After delegating, re-doing the search
|
||||||
|
task(subagent_type="explore", run_in_background=true, ...)
|
||||||
|
// Then immediately grep for the same thing yourself — FORBIDDEN
|
||||||
|
|
||||||
|
// CORRECT: Continue non-overlapping work
|
||||||
|
task(subagent_type="explore", run_in_background=true, ...)
|
||||||
|
// Work on a different, unrelated file while they search
|
||||||
|
// End your response and wait for the notification
|
||||||
|
\`\`\`
|
||||||
|
</Anti_Duplication>`
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import {
|
|||||||
buildOracleSection,
|
buildOracleSection,
|
||||||
buildHardBlocksSection,
|
buildHardBlocksSection,
|
||||||
buildAntiPatternsSection,
|
buildAntiPatternsSection,
|
||||||
|
buildAntiDuplicationSection,
|
||||||
categorizeTools,
|
categorizeTools,
|
||||||
} from "./dynamic-agent-prompt-builder";
|
} from "./dynamic-agent-prompt-builder";
|
||||||
|
|
||||||
@@ -290,11 +291,13 @@ Prompt structure for each agent:
|
|||||||
- Fire 2-5 explore agents in parallel for any non-trivial codebase question
|
- Fire 2-5 explore agents in parallel for any non-trivial codebase question
|
||||||
- Parallelize independent file reads — don't read files one at a time
|
- Parallelize independent file reads — don't read files one at a time
|
||||||
- NEVER use \`run_in_background=false\` for explore/librarian
|
- NEVER use \`run_in_background=false\` for explore/librarian
|
||||||
- Continue your work immediately after launching background agents
|
- Continue only with non-overlapping work after launching background agents
|
||||||
- Collect results with \`background_output(task_id="...")\` when needed
|
- Collect results with \`background_output(task_id="...")\` when needed
|
||||||
- BEFORE final answer, cancel DISPOSABLE tasks individually: \`background_cancel(taskId="bg_explore_xxx")\`, \`background_cancel(taskId="bg_librarian_xxx")\`
|
- BEFORE final answer, cancel DISPOSABLE tasks individually: \`background_cancel(taskId="bg_explore_xxx")\`, \`background_cancel(taskId="bg_librarian_xxx")\`
|
||||||
- **NEVER use \`background_cancel(all=true)\`** — it kills tasks whose results you haven't collected yet
|
- **NEVER use \`background_cancel(all=true)\`** — it kills tasks whose results you haven't collected yet
|
||||||
|
|
||||||
|
${buildAntiDuplicationSection()}
|
||||||
|
|
||||||
### Search Stop Conditions
|
### Search Stop Conditions
|
||||||
|
|
||||||
STOP searching when:
|
STOP searching when:
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { resolvePromptAppend } from "../builtin-agents/resolve-file-uri"
|
import { resolvePromptAppend } from "../builtin-agents/resolve-file-uri"
|
||||||
|
import { buildAntiDuplicationSection } from "../dynamic-agent-prompt-builder"
|
||||||
|
|
||||||
export function buildGeminiSisyphusJuniorPrompt(
|
export function buildGeminiSisyphusJuniorPrompt(
|
||||||
useTaskSystem: boolean,
|
useTaskSystem: boolean,
|
||||||
@@ -58,7 +59,7 @@ Before responding, ask yourself: What tools do I need to call? What am I assumin
|
|||||||
- Run verification (lint, tests, build) WITHOUT asking
|
- Run verification (lint, tests, build) WITHOUT asking
|
||||||
- Make decisions. Course-correct only on CONCRETE failure
|
- Make decisions. Course-correct only on CONCRETE failure
|
||||||
- Note assumptions in final message, not as questions mid-work
|
- Note assumptions in final message, not as questions mid-work
|
||||||
- Need context? Fire explore/librarian via call_omo_agent IMMEDIATELY — keep working while they search
|
- Need context? Fire explore/librarian via call_omo_agent IMMEDIATELY — continue only with non-overlapping work while they search
|
||||||
|
|
||||||
## Scope Discipline
|
## Scope Discipline
|
||||||
|
|
||||||
@@ -77,13 +78,15 @@ Before responding, ask yourself: What tools do I need to call? What am I assumin
|
|||||||
|
|
||||||
<tool_usage_rules>
|
<tool_usage_rules>
|
||||||
- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once
|
- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once
|
||||||
- Explore/Librarian via call_omo_agent = background research. Fire them and keep working
|
- Explore/Librarian via call_omo_agent = background research. Fire them and continue only with non-overlapping work
|
||||||
- After any file edit: restate what changed, where, and what validation follows
|
- After any file edit: restate what changed, where, and what validation follows
|
||||||
- Prefer tools over guessing whenever you need specific data (files, configs, patterns)
|
- Prefer tools over guessing whenever you need specific data (files, configs, patterns)
|
||||||
- ALWAYS use tools over internal knowledge for file contents, project state, and verification
|
- ALWAYS use tools over internal knowledge for file contents, project state, and verification
|
||||||
- **DO NOT SKIP tool calls because you think you already know the answer. You DON'T.**
|
- **DO NOT SKIP tool calls because you think you already know the answer. You DON'T.**
|
||||||
</tool_usage_rules>
|
</tool_usage_rules>
|
||||||
|
|
||||||
|
${buildAntiDuplicationSection()}
|
||||||
|
|
||||||
${taskDiscipline}
|
${taskDiscipline}
|
||||||
|
|
||||||
## Progress Updates
|
## Progress Updates
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { resolvePromptAppend } from "../builtin-agents/resolve-file-uri"
|
import { resolvePromptAppend } from "../builtin-agents/resolve-file-uri"
|
||||||
|
import { buildAntiDuplicationSection } from "../dynamic-agent-prompt-builder"
|
||||||
|
|
||||||
export function buildGptSisyphusJuniorPrompt(
|
export function buildGptSisyphusJuniorPrompt(
|
||||||
useTaskSystem: boolean,
|
useTaskSystem: boolean,
|
||||||
@@ -40,7 +41,7 @@ When blocked: try a different approach → decompose the problem → challenge a
|
|||||||
- Run verification (lint, tests, build) WITHOUT asking
|
- Run verification (lint, tests, build) WITHOUT asking
|
||||||
- Make decisions. Course-correct only on CONCRETE failure
|
- Make decisions. Course-correct only on CONCRETE failure
|
||||||
- Note assumptions in final message, not as questions mid-work
|
- Note assumptions in final message, not as questions mid-work
|
||||||
- Need context? Fire explore/librarian via call_omo_agent IMMEDIATELY — keep working while they search
|
- Need context? Fire explore/librarian via call_omo_agent IMMEDIATELY — continue only with non-overlapping work while they search
|
||||||
|
|
||||||
## Scope Discipline
|
## Scope Discipline
|
||||||
|
|
||||||
@@ -58,12 +59,14 @@ When blocked: try a different approach → decompose the problem → challenge a
|
|||||||
|
|
||||||
<tool_usage_rules>
|
<tool_usage_rules>
|
||||||
- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once
|
- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once
|
||||||
- Explore/Librarian via call_omo_agent = background research. Fire them and keep working
|
- Explore/Librarian via call_omo_agent = background research. Fire them and continue only with non-overlapping work
|
||||||
- After any file edit: restate what changed, where, and what validation follows
|
- After any file edit: restate what changed, where, and what validation follows
|
||||||
- Prefer tools over guessing whenever you need specific data (files, configs, patterns)
|
- Prefer tools over guessing whenever you need specific data (files, configs, patterns)
|
||||||
- ALWAYS use tools over internal knowledge for file contents, project state, and verification
|
- ALWAYS use tools over internal knowledge for file contents, project state, and verification
|
||||||
</tool_usage_rules>
|
</tool_usage_rules>
|
||||||
|
|
||||||
|
${buildAntiDuplicationSection()}
|
||||||
|
|
||||||
${taskDiscipline}
|
${taskDiscipline}
|
||||||
|
|
||||||
## Progress Updates
|
## Progress Updates
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ import {
|
|||||||
buildAntiPatternsSection,
|
buildAntiPatternsSection,
|
||||||
buildDeepParallelSection,
|
buildDeepParallelSection,
|
||||||
buildNonClaudePlannerSection,
|
buildNonClaudePlannerSection,
|
||||||
|
buildAntiDuplicationSection,
|
||||||
categorizeTools,
|
categorizeTools,
|
||||||
} from "./dynamic-agent-prompt-builder";
|
} from "./dynamic-agent-prompt-builder";
|
||||||
|
|
||||||
@@ -333,7 +334,7 @@ task(subagent_type="explore", run_in_background=true, load_skills=[], descriptio
|
|||||||
// Reference Grep (external)
|
// Reference Grep (external)
|
||||||
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT auth and need current security best practices to choose token storage (httpOnly cookies vs localStorage) and set expiration policy. Find: OWASP auth guidelines, recommended token lifetimes, refresh token rotation strategies, common JWT vulnerabilities. Skip 'what is JWT' tutorials — production security guidance only.")
|
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT auth and need current security best practices to choose token storage (httpOnly cookies vs localStorage) and set expiration policy. Find: OWASP auth guidelines, recommended token lifetimes, refresh token rotation strategies, common JWT vulnerabilities. Skip 'what is JWT' tutorials — production security guidance only.")
|
||||||
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express auth middleware and need production-quality patterns to structure my middleware chain. Find how established Express apps (1000+ stars) handle: middleware ordering, token refresh, role-based access control, auth error propagation. Skip basic tutorials — I need battle-tested patterns with proper error handling.")
|
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express auth middleware and need production-quality patterns to structure my middleware chain. Find how established Express apps (1000+ stars) handle: middleware ordering, token refresh, role-based access control, auth error propagation. Skip basic tutorials — I need battle-tested patterns with proper error handling.")
|
||||||
// Continue working immediately. System notifies on completion — collect with background_output then.
|
// Continue only with non-overlapping work. System notifies on completion — collect with background_output then.
|
||||||
|
|
||||||
// WRONG: Sequential or blocking
|
// WRONG: Sequential or blocking
|
||||||
result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
|
result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
|
||||||
@@ -341,11 +342,13 @@ result = task(..., run_in_background=false) // Never wait synchronously for exp
|
|||||||
|
|
||||||
### Background Result Collection:
|
### Background Result Collection:
|
||||||
1. Launch parallel agents \u2192 receive task_ids
|
1. Launch parallel agents \u2192 receive task_ids
|
||||||
2. Continue immediate work
|
2. Continue only with non-overlapping work
|
||||||
3. System sends \`<system-reminder>\` on each task completion — then call \`background_output(task_id="...")\`
|
3. System sends \`<system-reminder>\` on each task completion — then call \`background_output(task_id="...")\`
|
||||||
4. Need results not yet ready? **End your response.** The notification will trigger your next turn.
|
4. Need results not yet ready? **End your response.** The notification will trigger your next turn.
|
||||||
5. Cleanup: Cancel disposable tasks individually via \`background_cancel(taskId="...")\`
|
5. Cleanup: Cancel disposable tasks individually via \`background_cancel(taskId="...")\`
|
||||||
|
|
||||||
|
${buildAntiDuplicationSection()}
|
||||||
|
|
||||||
### Search Stop Conditions
|
### Search Stop Conditions
|
||||||
|
|
||||||
STOP searching when:
|
STOP searching when:
|
||||||
|
|||||||
@@ -87,7 +87,6 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client:
|
|||||||
|
|
||||||
const shouldBlock = args.block === true
|
const shouldBlock = args.block === true
|
||||||
const timeoutMs = Math.min(args.timeout ?? 60000, 600000)
|
const timeoutMs = Math.min(args.timeout ?? 60000, 600000)
|
||||||
const fullSession = args.full_session ?? true
|
|
||||||
|
|
||||||
let resolvedTask = task
|
let resolvedTask = task
|
||||||
|
|
||||||
@@ -123,6 +122,10 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client:
|
|||||||
}
|
}
|
||||||
|
|
||||||
const isActive = isTaskActiveStatus(resolvedTask.status)
|
const isActive = isTaskActiveStatus(resolvedTask.status)
|
||||||
|
const fullSessionProvided = args.full_session !== undefined
|
||||||
|
const fullSession = fullSessionProvided
|
||||||
|
? (args.full_session ?? true)
|
||||||
|
: !isActive
|
||||||
const includeThinking = isActive || (args.include_thinking ?? false)
|
const includeThinking = isActive || (args.include_thinking ?? false)
|
||||||
const includeToolResults = isActive || (args.include_tool_results ?? false)
|
const includeToolResults = isActive || (args.include_tool_results ?? false)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user