Merge pull request #2773 from MoerAI/fix/ralph-loop-fuzzy-completion
fix(ralph-loop): add semantic completion detection as fallback for natural language (fixes #2489)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
/// <reference types="bun-types" />
|
/// <reference types="bun-types" />
|
||||||
import { describe, expect, test } from "bun:test"
|
import { describe, expect, test } from "bun:test"
|
||||||
import type { PluginInput } from "@opencode-ai/plugin"
|
import type { PluginInput } from "@opencode-ai/plugin"
|
||||||
import { detectCompletionInSessionMessages } from "./completion-promise-detector"
|
import { detectCompletionInSessionMessages, detectSemanticCompletion } from "./completion-promise-detector"
|
||||||
|
|
||||||
type SessionMessage = {
|
type SessionMessage = {
|
||||||
info?: { role?: string }
|
info?: { role?: string }
|
||||||
@@ -184,4 +184,148 @@ describe("detectCompletionInSessionMessages", () => {
|
|||||||
expect(detected).toBe(false)
|
expect(detected).toBe(false)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe("#given semantic completion patterns", () => {
|
||||||
|
test("#when agent says 'task is complete' #then should detect semantic completion", async () => {
|
||||||
|
// #given
|
||||||
|
const messages: SessionMessage[] = [
|
||||||
|
{
|
||||||
|
info: { role: "assistant" },
|
||||||
|
parts: [{ type: "text", text: "The task is complete. All work has been finished." }],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
const ctx = createPluginInput(messages)
|
||||||
|
|
||||||
|
// #when
|
||||||
|
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||||
|
sessionID: "session-123",
|
||||||
|
promise: "DONE",
|
||||||
|
apiTimeoutMs: 1000,
|
||||||
|
directory: "/tmp",
|
||||||
|
})
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(detected).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when agent says 'all items are done' #then should detect semantic completion", async () => {
|
||||||
|
// #given
|
||||||
|
const messages: SessionMessage[] = [
|
||||||
|
{
|
||||||
|
info: { role: "assistant" },
|
||||||
|
parts: [{ type: "text", text: "All items are done and marked as complete." }],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
const ctx = createPluginInput(messages)
|
||||||
|
|
||||||
|
// #when
|
||||||
|
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||||
|
sessionID: "session-123",
|
||||||
|
promise: "DONE",
|
||||||
|
apiTimeoutMs: 1000,
|
||||||
|
directory: "/tmp",
|
||||||
|
})
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(detected).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when agent says 'nothing left to do' #then should detect semantic completion", async () => {
|
||||||
|
// #given
|
||||||
|
const messages: SessionMessage[] = [
|
||||||
|
{
|
||||||
|
info: { role: "assistant" },
|
||||||
|
parts: [{ type: "text", text: "There is nothing left to do. Everything is finished." }],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
const ctx = createPluginInput(messages)
|
||||||
|
|
||||||
|
// #when
|
||||||
|
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||||
|
sessionID: "session-123",
|
||||||
|
promise: "DONE",
|
||||||
|
apiTimeoutMs: 1000,
|
||||||
|
directory: "/tmp",
|
||||||
|
})
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(detected).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when agent says 'successfully completed all' #then should detect semantic completion", async () => {
|
||||||
|
// #given
|
||||||
|
const messages: SessionMessage[] = [
|
||||||
|
{
|
||||||
|
info: { role: "assistant" },
|
||||||
|
parts: [{ type: "text", text: "I have successfully completed all the required tasks." }],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
const ctx = createPluginInput(messages)
|
||||||
|
|
||||||
|
// #when
|
||||||
|
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||||
|
sessionID: "session-123",
|
||||||
|
promise: "DONE",
|
||||||
|
apiTimeoutMs: 1000,
|
||||||
|
directory: "/tmp",
|
||||||
|
})
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(detected).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when promise is VERIFIED #then semantic completion should NOT trigger", async () => {
|
||||||
|
// #given
|
||||||
|
const messages: SessionMessage[] = [
|
||||||
|
{
|
||||||
|
info: { role: "assistant" },
|
||||||
|
parts: [{ type: "text", text: "The task is complete. All work has been finished." }],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
const ctx = createPluginInput(messages)
|
||||||
|
|
||||||
|
// #when
|
||||||
|
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||||
|
sessionID: "session-123",
|
||||||
|
promise: "VERIFIED",
|
||||||
|
apiTimeoutMs: 1000,
|
||||||
|
directory: "/tmp",
|
||||||
|
})
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(detected).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("detectSemanticCompletion", () => {
|
||||||
|
describe("#given semantic completion patterns", () => {
|
||||||
|
test("#when text contains 'task is complete' #then should return true", () => {
|
||||||
|
expect(detectSemanticCompletion("The task is complete.")).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when text contains 'all items are done' #then should return true", () => {
|
||||||
|
expect(detectSemanticCompletion("All items are done.")).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when text contains 'nothing left to do' #then should return true", () => {
|
||||||
|
expect(detectSemanticCompletion("There is nothing left to do.")).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when text contains 'successfully completed all' #then should return true", () => {
|
||||||
|
expect(detectSemanticCompletion("Successfully completed all tasks.")).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when text contains 'everything is finished' #then should return true", () => {
|
||||||
|
expect(detectSemanticCompletion("Everything is finished.")).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when text does NOT contain completion patterns #then should return false", () => {
|
||||||
|
expect(detectSemanticCompletion("Working on the next task.")).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#when text is empty #then should return false", () => {
|
||||||
|
expect(detectSemanticCompletion("")).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -9,6 +9,20 @@ interface OpenCodeSessionMessage {
|
|||||||
parts?: Array<{ type: string; text?: string }>
|
parts?: Array<{ type: string; text?: string }>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface TranscriptEntry {
|
||||||
|
type?: string
|
||||||
|
timestamp?: string
|
||||||
|
content?: string
|
||||||
|
tool_output?: { output?: string } | string
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractTranscriptEntryText(entry: TranscriptEntry): string {
|
||||||
|
if (typeof entry.content === "string") return entry.content
|
||||||
|
if (typeof entry.tool_output === "string") return entry.tool_output
|
||||||
|
if (entry.tool_output && typeof entry.tool_output === "object" && typeof entry.tool_output.output === "string") return entry.tool_output.output
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
function escapeRegex(str: string): string {
|
function escapeRegex(str: string): string {
|
||||||
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
|
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
|
||||||
}
|
}
|
||||||
@@ -17,6 +31,18 @@ function buildPromisePattern(promise: string): RegExp {
|
|||||||
return new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
|
return new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SEMANTIC_COMPLETION_PATTERNS = [
|
||||||
|
/\b(?:task|work|implementation|all\s+tasks?)\s+(?:is|are)\s+(?:complete|completed|done|finished)\b/i,
|
||||||
|
/\ball\s+(?:items?|todos?|steps?)\s+(?:are\s+)?(?:complete|completed|done|finished|marked)\b/i,
|
||||||
|
/\b(?:everything|all\s+work)\s+(?:is\s+)?(?:complete|completed|done|finished)\b/i,
|
||||||
|
/\bsuccessfully\s+completed?\s+all\b/i,
|
||||||
|
/\bnothing\s+(?:left|more|remaining)\s+to\s+(?:do|implement|fix)\b/i,
|
||||||
|
]
|
||||||
|
|
||||||
|
export function detectSemanticCompletion(text: string): boolean {
|
||||||
|
return SEMANTIC_COMPLETION_PATTERNS.some((pattern) => pattern.test(text))
|
||||||
|
}
|
||||||
|
|
||||||
export function detectCompletionInTranscript(
|
export function detectCompletionInTranscript(
|
||||||
transcriptPath: string | undefined,
|
transcriptPath: string | undefined,
|
||||||
promise: string,
|
promise: string,
|
||||||
@@ -33,10 +59,18 @@ export function detectCompletionInTranscript(
|
|||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
try {
|
try {
|
||||||
const entry = JSON.parse(line) as { type?: string; timestamp?: string }
|
const entry = JSON.parse(line) as TranscriptEntry
|
||||||
if (entry.type === "user") continue
|
if (entry.type === "user") continue
|
||||||
if (startedAt && entry.timestamp && entry.timestamp < startedAt) continue
|
if (startedAt && entry.timestamp && entry.timestamp < startedAt) continue
|
||||||
if (pattern.test(line)) return true
|
const entryText = extractTranscriptEntryText(entry)
|
||||||
|
if (!entryText) continue
|
||||||
|
if (pattern.test(entryText)) return true
|
||||||
|
// Fallback: semantic completion only for DONE promise and assistant entries
|
||||||
|
const isAssistantEntry = entry.type === "assistant" || entry.type === "text"
|
||||||
|
if (promise === "DONE" && isAssistantEntry && detectSemanticCompletion(entryText)) {
|
||||||
|
log("[ralph-loop] WARNING: Semantic completion detected in transcript (agent used natural language instead of <promise>DONE</promise>)")
|
||||||
|
return true
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -100,6 +134,14 @@ export async function detectCompletionInSessionMessages(
|
|||||||
if (pattern.test(responseText)) {
|
if (pattern.test(responseText)) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fallback: semantic completion only for DONE promise
|
||||||
|
if (options.promise === "DONE" && detectSemanticCompletion(responseText)) {
|
||||||
|
log("[ralph-loop] WARNING: Semantic completion detected (agent used natural language instead of <promise>DONE</promise>)", {
|
||||||
|
sessionID: options.sessionID,
|
||||||
|
})
|
||||||
|
return true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
|
|||||||
Reference in New Issue
Block a user