Merge pull request #1658 from code-yeongyu/fix-1233

fix: detect completion tags in ralph/ULW loop (#1233)
This commit is contained in:
YeonGyu-Kim
2026-02-08 15:51:16 +09:00
committed by GitHub
2 changed files with 111 additions and 47 deletions

View File

@@ -511,6 +511,38 @@ describe("ralph-loop", () => {
expect(messagesCalls[0].sessionID).toBe("session-123")
})
test("should detect completion promise in reasoning part via session messages API", async () => {
//#given - active loop with assistant reasoning containing completion promise
mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Build something" }] },
{
info: { role: "assistant" },
parts: [
{ type: "reasoning", text: "I am done now. <promise>REASONING_DONE</promise>" },
],
},
]
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
})
hook.startLoop("session-123", "Build something", {
completionPromise: "REASONING_DONE",
})
//#when - session goes idle
await hook.event({
event: {
type: "session.idle",
properties: { sessionID: "session-123" },
},
})
//#then - loop completed via API detection, no continuation
expect(promptCalls.length).toBe(0)
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
expect(hook.getState()).toBeNull()
})
test("should handle multiple iterations correctly", async () => {
// given - active loop
const hook = createRalphLoopHook(createMockPluginInput())
@@ -596,13 +628,14 @@ describe("ralph-loop", () => {
expect(promptCalls.length).toBe(1)
})
test("should only check LAST assistant message for completion", async () => {
// given - multiple assistant messages, only first has completion promise
test("should check last 3 assistant messages for completion", async () => {
// given - multiple assistant messages, promise in recent (not last) assistant message
mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "I'll work on it. <promise>DONE</promise>" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Working on it." }] },
{ info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Working on more features..." }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Nearly there... <promise>DONE</promise>" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "(extra output after promise)" }] },
]
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
@@ -614,35 +647,36 @@ describe("ralph-loop", () => {
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
// then - loop should continue (last message has no completion promise)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
test("should detect completion only in LAST assistant message", async () => {
// given - last assistant message has completion promise
mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Starting work..." }] },
{ info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Task complete! <promise>DONE</promise>" }] },
]
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
})
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
// when - session goes idle
await hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
// then - loop should complete (last message has completion promise)
// then - loop should complete (promise found within last 3 assistant messages)
expect(promptCalls.length).toBe(0)
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
expect(hook.getState()).toBeNull()
})
test("should NOT detect completion if promise is older than last 3 assistant messages", async () => {
// given - promise appears in an assistant message older than last 3
mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 1" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 2" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 3" }] },
]
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
})
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
// when - session goes idle
await hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
// then - loop should continue (promise is older than last 3 assistant messages)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
test("should allow starting new loop while previous loop is active (different session)", async () => {
// given - active loop in session A
const hook = createRalphLoopHook(createMockPluginInput())
@@ -928,7 +962,7 @@ Original task: Build something`
const elapsed = Date.now() - startTime
// then - should complete quickly (not hang for 10s)
expect(elapsed).toBeLessThan(2000)
expect(elapsed).toBeLessThan(6000)
// then - loop should continue (API error = no completion detected)
expect(promptCalls.length).toBe(1)
expect(apiCallCount).toBeGreaterThan(0)

View File

@@ -67,7 +67,7 @@ export interface RalphLoopHook {
getState: () => RalphLoopState | null
}
const DEFAULT_API_TIMEOUT = 3000
const DEFAULT_API_TIMEOUT = 5000
export function createRalphLoopHook(
ctx: PluginInput,
@@ -80,6 +80,23 @@ export function createRalphLoopHook(
const apiTimeout = options?.apiTimeout ?? DEFAULT_API_TIMEOUT
const checkSessionExists = options?.checkSessionExists
async function withTimeout<TData>(promise: Promise<TData>, timeoutMs: number): Promise<TData> {
let timeoutId: ReturnType<typeof setTimeout> | undefined
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutId = setTimeout(() => {
reject(new Error("API timeout"))
}, timeoutMs)
})
try {
return await Promise.race([promise, timeoutPromise])
} finally {
if (timeoutId !== undefined) {
clearTimeout(timeoutId)
}
}
}
function getSessionState(sessionID: string): SessionState {
let state = sessions.get(sessionID)
if (!state) {
@@ -126,34 +143,44 @@ export function createRalphLoopHook(
promise: string
): Promise<boolean> {
try {
const response = await Promise.race([
const response = await withTimeout(
ctx.client.session.messages({
path: { id: sessionID },
query: { directory: ctx.directory },
}),
new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error("API timeout")), apiTimeout)
),
])
apiTimeout
)
const messages = (response as { data?: unknown[] }).data ?? []
if (!Array.isArray(messages)) return false
const assistantMessages = (messages as OpenCodeSessionMessage[]).filter(
(msg) => msg.info?.role === "assistant"
)
const lastAssistant = assistantMessages[assistantMessages.length - 1]
if (!lastAssistant?.parts) return false
const assistantMessages = (messages as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant")
if (assistantMessages.length === 0) return false
const pattern = new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
const responseText = lastAssistant.parts
.filter((p) => p.type === "text")
.map((p) => p.text ?? "")
.join("\n")
return pattern.test(responseText)
const recentAssistants = assistantMessages.slice(-3)
for (const assistant of recentAssistants) {
if (!assistant.parts) continue
const responseText = assistant.parts
.filter((p) => p.type === "text" || p.type === "reasoning")
.map((p) => p.text ?? "")
.join("\n")
if (pattern.test(responseText)) {
return true
}
}
return false
} catch (err) {
log(`[${HOOK_NAME}] Session messages check failed`, { sessionID, error: String(err) })
setTimeout(() => {
log(`[${HOOK_NAME}] Session messages check failed`, {
sessionID,
error: String(err),
})
}, 0)
return false
}
}
@@ -343,7 +370,10 @@ export function createRalphLoopHook(
let model: { providerID: string; modelID: string } | undefined
try {
const messagesResp = await ctx.client.session.messages({ path: { id: sessionID } })
const messagesResp = await withTimeout(
ctx.client.session.messages({ path: { id: sessionID } }),
apiTimeout
)
const messages = (messagesResp.data ?? []) as Array<{
info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
}>