Merge pull request #1658 from code-yeongyu/fix-1233
fix: detect completion tags in ralph/ULW loop (#1233)
This commit is contained in:
@@ -511,6 +511,38 @@ describe("ralph-loop", () => {
|
||||
expect(messagesCalls[0].sessionID).toBe("session-123")
|
||||
})
|
||||
|
||||
test("should detect completion promise in reasoning part via session messages API", async () => {
|
||||
//#given - active loop with assistant reasoning containing completion promise
|
||||
mockSessionMessages = [
|
||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Build something" }] },
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [
|
||||
{ type: "reasoning", text: "I am done now. <promise>REASONING_DONE</promise>" },
|
||||
],
|
||||
},
|
||||
]
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||
})
|
||||
hook.startLoop("session-123", "Build something", {
|
||||
completionPromise: "REASONING_DONE",
|
||||
})
|
||||
|
||||
//#when - session goes idle
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: "session-123" },
|
||||
},
|
||||
})
|
||||
|
||||
//#then - loop completed via API detection, no continuation
|
||||
expect(promptCalls.length).toBe(0)
|
||||
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
|
||||
expect(hook.getState()).toBeNull()
|
||||
})
|
||||
|
||||
test("should handle multiple iterations correctly", async () => {
|
||||
// given - active loop
|
||||
const hook = createRalphLoopHook(createMockPluginInput())
|
||||
@@ -596,13 +628,14 @@ describe("ralph-loop", () => {
|
||||
expect(promptCalls.length).toBe(1)
|
||||
})
|
||||
|
||||
test("should only check LAST assistant message for completion", async () => {
|
||||
// given - multiple assistant messages, only first has completion promise
|
||||
test("should check last 3 assistant messages for completion", async () => {
|
||||
// given - multiple assistant messages, promise in recent (not last) assistant message
|
||||
mockSessionMessages = [
|
||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "I'll work on it. <promise>DONE</promise>" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Working on it." }] },
|
||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Working on more features..." }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Nearly there... <promise>DONE</promise>" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "(extra output after promise)" }] },
|
||||
]
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||
@@ -614,35 +647,36 @@ describe("ralph-loop", () => {
|
||||
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
||||
})
|
||||
|
||||
// then - loop should continue (last message has no completion promise)
|
||||
expect(promptCalls.length).toBe(1)
|
||||
expect(hook.getState()?.iteration).toBe(2)
|
||||
})
|
||||
|
||||
test("should detect completion only in LAST assistant message", async () => {
|
||||
// given - last assistant message has completion promise
|
||||
mockSessionMessages = [
|
||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Starting work..." }] },
|
||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Task complete! <promise>DONE</promise>" }] },
|
||||
]
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||
})
|
||||
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
|
||||
|
||||
// when - session goes idle
|
||||
await hook.event({
|
||||
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
||||
})
|
||||
|
||||
// then - loop should complete (last message has completion promise)
|
||||
// then - loop should complete (promise found within last 3 assistant messages)
|
||||
expect(promptCalls.length).toBe(0)
|
||||
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
|
||||
expect(hook.getState()).toBeNull()
|
||||
})
|
||||
|
||||
test("should NOT detect completion if promise is older than last 3 assistant messages", async () => {
|
||||
// given - promise appears in an assistant message older than last 3
|
||||
mockSessionMessages = [
|
||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 1" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 2" }] },
|
||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 3" }] },
|
||||
]
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||
})
|
||||
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
|
||||
|
||||
// when - session goes idle
|
||||
await hook.event({
|
||||
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
||||
})
|
||||
|
||||
// then - loop should continue (promise is older than last 3 assistant messages)
|
||||
expect(promptCalls.length).toBe(1)
|
||||
expect(hook.getState()?.iteration).toBe(2)
|
||||
})
|
||||
|
||||
test("should allow starting new loop while previous loop is active (different session)", async () => {
|
||||
// given - active loop in session A
|
||||
const hook = createRalphLoopHook(createMockPluginInput())
|
||||
@@ -928,7 +962,7 @@ Original task: Build something`
|
||||
const elapsed = Date.now() - startTime
|
||||
|
||||
// then - should complete quickly (not hang for 10s)
|
||||
expect(elapsed).toBeLessThan(2000)
|
||||
expect(elapsed).toBeLessThan(6000)
|
||||
// then - loop should continue (API error = no completion detected)
|
||||
expect(promptCalls.length).toBe(1)
|
||||
expect(apiCallCount).toBeGreaterThan(0)
|
||||
|
||||
@@ -67,7 +67,7 @@ export interface RalphLoopHook {
|
||||
getState: () => RalphLoopState | null
|
||||
}
|
||||
|
||||
const DEFAULT_API_TIMEOUT = 3000
|
||||
const DEFAULT_API_TIMEOUT = 5000
|
||||
|
||||
export function createRalphLoopHook(
|
||||
ctx: PluginInput,
|
||||
@@ -80,6 +80,23 @@ export function createRalphLoopHook(
|
||||
const apiTimeout = options?.apiTimeout ?? DEFAULT_API_TIMEOUT
|
||||
const checkSessionExists = options?.checkSessionExists
|
||||
|
||||
async function withTimeout<TData>(promise: Promise<TData>, timeoutMs: number): Promise<TData> {
|
||||
let timeoutId: ReturnType<typeof setTimeout> | undefined
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
timeoutId = setTimeout(() => {
|
||||
reject(new Error("API timeout"))
|
||||
}, timeoutMs)
|
||||
})
|
||||
|
||||
try {
|
||||
return await Promise.race([promise, timeoutPromise])
|
||||
} finally {
|
||||
if (timeoutId !== undefined) {
|
||||
clearTimeout(timeoutId)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getSessionState(sessionID: string): SessionState {
|
||||
let state = sessions.get(sessionID)
|
||||
if (!state) {
|
||||
@@ -126,34 +143,44 @@ export function createRalphLoopHook(
|
||||
promise: string
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
const response = await Promise.race([
|
||||
const response = await withTimeout(
|
||||
ctx.client.session.messages({
|
||||
path: { id: sessionID },
|
||||
query: { directory: ctx.directory },
|
||||
}),
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error("API timeout")), apiTimeout)
|
||||
),
|
||||
])
|
||||
apiTimeout
|
||||
)
|
||||
|
||||
const messages = (response as { data?: unknown[] }).data ?? []
|
||||
if (!Array.isArray(messages)) return false
|
||||
|
||||
const assistantMessages = (messages as OpenCodeSessionMessage[]).filter(
|
||||
(msg) => msg.info?.role === "assistant"
|
||||
)
|
||||
const lastAssistant = assistantMessages[assistantMessages.length - 1]
|
||||
if (!lastAssistant?.parts) return false
|
||||
const assistantMessages = (messages as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant")
|
||||
if (assistantMessages.length === 0) return false
|
||||
|
||||
const pattern = new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
|
||||
const responseText = lastAssistant.parts
|
||||
.filter((p) => p.type === "text")
|
||||
.map((p) => p.text ?? "")
|
||||
.join("\n")
|
||||
|
||||
return pattern.test(responseText)
|
||||
const recentAssistants = assistantMessages.slice(-3)
|
||||
for (const assistant of recentAssistants) {
|
||||
if (!assistant.parts) continue
|
||||
|
||||
const responseText = assistant.parts
|
||||
.filter((p) => p.type === "text" || p.type === "reasoning")
|
||||
.map((p) => p.text ?? "")
|
||||
.join("\n")
|
||||
|
||||
if (pattern.test(responseText)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
} catch (err) {
|
||||
log(`[${HOOK_NAME}] Session messages check failed`, { sessionID, error: String(err) })
|
||||
setTimeout(() => {
|
||||
log(`[${HOOK_NAME}] Session messages check failed`, {
|
||||
sessionID,
|
||||
error: String(err),
|
||||
})
|
||||
}, 0)
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -343,7 +370,10 @@ export function createRalphLoopHook(
|
||||
let model: { providerID: string; modelID: string } | undefined
|
||||
|
||||
try {
|
||||
const messagesResp = await ctx.client.session.messages({ path: { id: sessionID } })
|
||||
const messagesResp = await withTimeout(
|
||||
ctx.client.session.messages({ path: { id: sessionID } }),
|
||||
apiTimeout
|
||||
)
|
||||
const messages = (messagesResp.data ?? []) as Array<{
|
||||
info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
|
||||
}>
|
||||
|
||||
Reference in New Issue
Block a user