fix(ralph-loop): add semantic completion detection as fallback for natural language (fixes #2489)

This commit is contained in:
MoerAI
2026-03-23 20:46:10 +09:00
parent 331f7ec52b
commit aaaeb6997c
2 changed files with 148 additions and 1 deletions

View File

@@ -1,7 +1,7 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test"
import type { PluginInput } from "@opencode-ai/plugin"
import { detectCompletionInSessionMessages } from "./completion-promise-detector"
import { detectCompletionInSessionMessages, detectSemanticCompletion } from "./completion-promise-detector"
type SessionMessage = {
info?: { role?: string }
@@ -184,4 +184,126 @@ describe("detectCompletionInSessionMessages", () => {
expect(detected).toBe(false)
})
})
describe("#given semantic completion patterns", () => {
test("#when agent says 'task is complete' #then should detect semantic completion", async () => {
// #given
const messages: SessionMessage[] = [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "The task is complete. All work has been finished." }],
},
]
const ctx = createPluginInput(messages)
// #when
const detected = await detectCompletionInSessionMessages(ctx, {
sessionID: "session-123",
promise: "DONE",
apiTimeoutMs: 1000,
directory: "/tmp",
})
// #then
expect(detected).toBe(true)
})
test("#when agent says 'all items are done' #then should detect semantic completion", async () => {
// #given
const messages: SessionMessage[] = [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "All items are done and marked as complete." }],
},
]
const ctx = createPluginInput(messages)
// #when
const detected = await detectCompletionInSessionMessages(ctx, {
sessionID: "session-123",
promise: "DONE",
apiTimeoutMs: 1000,
directory: "/tmp",
})
// #then
expect(detected).toBe(true)
})
test("#when agent says 'nothing left to do' #then should detect semantic completion", async () => {
// #given
const messages: SessionMessage[] = [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "There is nothing left to do. Everything is finished." }],
},
]
const ctx = createPluginInput(messages)
// #when
const detected = await detectCompletionInSessionMessages(ctx, {
sessionID: "session-123",
promise: "DONE",
apiTimeoutMs: 1000,
directory: "/tmp",
})
// #then
expect(detected).toBe(true)
})
test("#when agent says 'successfully completed all' #then should detect semantic completion", async () => {
// #given
const messages: SessionMessage[] = [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "I have successfully completed all the required tasks." }],
},
]
const ctx = createPluginInput(messages)
// #when
const detected = await detectCompletionInSessionMessages(ctx, {
sessionID: "session-123",
promise: "DONE",
apiTimeoutMs: 1000,
directory: "/tmp",
})
// #then
expect(detected).toBe(true)
})
})
})
describe("detectSemanticCompletion", () => {
describe("#given semantic completion patterns", () => {
test("#when text contains 'task is complete' #then should return true", () => {
expect(detectSemanticCompletion("The task is complete.")).toBe(true)
})
test("#when text contains 'all items are done' #then should return true", () => {
expect(detectSemanticCompletion("All items are done.")).toBe(true)
})
test("#when text contains 'nothing left to do' #then should return true", () => {
expect(detectSemanticCompletion("There is nothing left to do.")).toBe(true)
})
test("#when text contains 'successfully completed all' #then should return true", () => {
expect(detectSemanticCompletion("Successfully completed all tasks.")).toBe(true)
})
test("#when text contains 'everything is finished' #then should return true", () => {
expect(detectSemanticCompletion("Everything is finished.")).toBe(true)
})
test("#when text does NOT contain completion patterns #then should return false", () => {
expect(detectSemanticCompletion("Working on the next task.")).toBe(false)
})
test("#when text is empty #then should return false", () => {
expect(detectSemanticCompletion("")).toBe(false)
})
})
})

View File

@@ -17,6 +17,18 @@ function buildPromisePattern(promise: string): RegExp {
return new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
}
const SEMANTIC_COMPLETION_PATTERNS = [
/\b(?:task|work|implementation|all\s+tasks?)\s+(?:is|are)\s+(?:complete|completed|done|finished)\b/i,
/\ball\s+(?:items?|todos?|steps?)\s+(?:are\s+)?(?:complete|completed|done|finished|marked)\b/i,
/\b(?:everything|all\s+work)\s+(?:is\s+)?(?:complete|completed|done|finished)\b/i,
/\bsuccessfully\s+completed?\s+all\b/i,
/\bnothing\s+(?:left|more|remaining)\s+to\s+(?:do|implement|fix)\b/i,
]
export function detectSemanticCompletion(text: string): boolean {
return SEMANTIC_COMPLETION_PATTERNS.some((pattern) => pattern.test(text))
}
export function detectCompletionInTranscript(
transcriptPath: string | undefined,
promise: string,
@@ -37,6 +49,11 @@ export function detectCompletionInTranscript(
if (entry.type === "user") continue
if (startedAt && entry.timestamp && entry.timestamp < startedAt) continue
if (pattern.test(line)) return true
// Fallback: check for semantic completion
if (detectSemanticCompletion(line)) {
log("[ralph-loop] WARNING: Semantic completion detected in transcript (agent used natural language instead of <promise>DONE</promise>)")
return true
}
} catch {
continue
}
@@ -100,6 +117,14 @@ export async function detectCompletionInSessionMessages(
if (pattern.test(responseText)) {
return true
}
// Fallback: check for semantic completion
if (detectSemanticCompletion(responseText)) {
log("[ralph-loop] WARNING: Semantic completion detected (agent used natural language instead of <promise>DONE</promise>)", {
sessionID: options.sessionID,
})
return true
}
}
return false