Merge pull request #1658 from code-yeongyu/fix-1233

fix: detect completion tags in ralph/ULW loop (#1233)
2026-02-08 15:51:16 +09:00
parent 4c1e369176 676ff513fa
commit 32193dc10d
2 changed files with 111 additions and 47 deletions
--- a/src/hooks/ralph-loop/index.test.ts
+++ b/src/hooks/ralph-loop/index.test.ts
@@ -511,6 +511,38 @@ describe("ralph-loop", () => {
      expect(messagesCalls[0].sessionID).toBe("session-123")
    })

+    test("should detect completion promise in reasoning part via session messages API", async () => {
+      //#given - active loop with assistant reasoning containing completion promise
+      mockSessionMessages = [
+        { info: { role: "user" }, parts: [{ type: "text", text: "Build something" }] },
+        {
+          info: { role: "assistant" },
+          parts: [
+            { type: "reasoning", text: "I am done now. <promise>REASONING_DONE</promise>" },
+          ],
+        },
+      ]
+      const hook = createRalphLoopHook(createMockPluginInput(), {
+        getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
+      })
+      hook.startLoop("session-123", "Build something", {
+        completionPromise: "REASONING_DONE",
+      })
+
+      //#when - session goes idle
+      await hook.event({
+        event: {
+          type: "session.idle",
+          properties: { sessionID: "session-123" },
+        },
+      })
+
+      //#then - loop completed via API detection, no continuation
+      expect(promptCalls.length).toBe(0)
+      expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
+      expect(hook.getState()).toBeNull()
+    })
+
    test("should handle multiple iterations correctly", async () => {
      // given - active loop
      const hook = createRalphLoopHook(createMockPluginInput())
@@ -596,13 +628,14 @@ describe("ralph-loop", () => {
      expect(promptCalls.length).toBe(1)
    })

-    test("should only check LAST assistant message for completion", async () => {
-      // given - multiple assistant messages, only first has completion promise
+    test("should check last 3 assistant messages for completion", async () => {
+      // given - multiple assistant messages, promise in recent (not last) assistant message
      mockSessionMessages = [
        { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
-        { info: { role: "assistant" }, parts: [{ type: "text", text: "I'll work on it. <promise>DONE</promise>" }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "Working on it." }] },
        { info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
-        { info: { role: "assistant" }, parts: [{ type: "text", text: "Working on more features..." }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "Nearly there... <promise>DONE</promise>" }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "(extra output after promise)" }] },
      ]
      const hook = createRalphLoopHook(createMockPluginInput(), {
        getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
@@ -614,35 +647,36 @@ describe("ralph-loop", () => {
        event: { type: "session.idle", properties: { sessionID: "session-123" } },
      })

-      // then - loop should continue (last message has no completion promise)
-      expect(promptCalls.length).toBe(1)
-      expect(hook.getState()?.iteration).toBe(2)
-    })
-
-    test("should detect completion only in LAST assistant message", async () => {
-      // given - last assistant message has completion promise
-      mockSessionMessages = [
-        { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
-        { info: { role: "assistant" }, parts: [{ type: "text", text: "Starting work..." }] },
-        { info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
-        { info: { role: "assistant" }, parts: [{ type: "text", text: "Task complete! <promise>DONE</promise>" }] },
-      ]
-      const hook = createRalphLoopHook(createMockPluginInput(), {
-        getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
-      })
-      hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
-
-      // when - session goes idle
-      await hook.event({
-        event: { type: "session.idle", properties: { sessionID: "session-123" } },
-      })
-
-      // then - loop should complete (last message has completion promise)
+      // then - loop should complete (promise found within last 3 assistant messages)
      expect(promptCalls.length).toBe(0)
      expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
      expect(hook.getState()).toBeNull()
    })

+    test("should NOT detect completion if promise is older than last 3 assistant messages", async () => {
+      // given - promise appears in an assistant message older than last 3
+      mockSessionMessages = [
+        { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 1" }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 2" }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 3" }] },
+      ]
+      const hook = createRalphLoopHook(createMockPluginInput(), {
+        getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
+      })
+      hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
+
+      // when - session goes idle
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-123" } },
+      })
+
+      // then - loop should continue (promise is older than last 3 assistant messages)
+      expect(promptCalls.length).toBe(1)
+      expect(hook.getState()?.iteration).toBe(2)
+    })
+
    test("should allow starting new loop while previous loop is active (different session)", async () => {
      // given - active loop in session A
      const hook = createRalphLoopHook(createMockPluginInput())
@@ -928,7 +962,7 @@ Original task: Build something`
      const elapsed = Date.now() - startTime

      // then - should complete quickly (not hang for 10s)
-      expect(elapsed).toBeLessThan(2000)
+      expect(elapsed).toBeLessThan(6000)
      // then - loop should continue (API error = no completion detected)
      expect(promptCalls.length).toBe(1)
      expect(apiCallCount).toBeGreaterThan(0)
--- a/src/hooks/ralph-loop/index.ts
+++ b/src/hooks/ralph-loop/index.ts
@@ -67,7 +67,7 @@ export interface RalphLoopHook {
  getState: () => RalphLoopState | null
 }

-const DEFAULT_API_TIMEOUT = 3000
+const DEFAULT_API_TIMEOUT = 5000

 export function createRalphLoopHook(
  ctx: PluginInput,
@@ -80,6 +80,23 @@ export function createRalphLoopHook(
  const apiTimeout = options?.apiTimeout ?? DEFAULT_API_TIMEOUT
  const checkSessionExists = options?.checkSessionExists

+  async function withTimeout<TData>(promise: Promise<TData>, timeoutMs: number): Promise<TData> {
+    let timeoutId: ReturnType<typeof setTimeout> | undefined
+    const timeoutPromise = new Promise<never>((_, reject) => {
+      timeoutId = setTimeout(() => {
+        reject(new Error("API timeout"))
+      }, timeoutMs)
+    })
+
+    try {
+      return await Promise.race([promise, timeoutPromise])
+    } finally {
+      if (timeoutId !== undefined) {
+        clearTimeout(timeoutId)
+      }
+    }
+  }
+
  function getSessionState(sessionID: string): SessionState {
    let state = sessions.get(sessionID)
    if (!state) {
@@ -126,34 +143,44 @@ export function createRalphLoopHook(
    promise: string
  ): Promise<boolean> {
    try {
-      const response = await Promise.race([
+      const response = await withTimeout(
        ctx.client.session.messages({
          path: { id: sessionID },
          query: { directory: ctx.directory },
        }),
-        new Promise<never>((_, reject) =>
-          setTimeout(() => reject(new Error("API timeout")), apiTimeout)
-        ),
-      ])
+        apiTimeout
+      )

      const messages = (response as { data?: unknown[] }).data ?? []
      if (!Array.isArray(messages)) return false

-      const assistantMessages = (messages as OpenCodeSessionMessage[]).filter(
-        (msg) => msg.info?.role === "assistant"
-      )
-      const lastAssistant = assistantMessages[assistantMessages.length - 1]
-      if (!lastAssistant?.parts) return false
+      const assistantMessages = (messages as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant")
+      if (assistantMessages.length === 0) return false

      const pattern = new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
-      const responseText = lastAssistant.parts
-        .filter((p) => p.type === "text")
-        .map((p) => p.text ?? "")
-        .join("\n")

-      return pattern.test(responseText)
+      const recentAssistants = assistantMessages.slice(-3)
+      for (const assistant of recentAssistants) {
+        if (!assistant.parts) continue
+
+        const responseText = assistant.parts
+          .filter((p) => p.type === "text" || p.type === "reasoning")
+          .map((p) => p.text ?? "")
+          .join("\n")
+
+        if (pattern.test(responseText)) {
+          return true
+        }
+      }
+
+      return false
    } catch (err) {
-      log(`[${HOOK_NAME}] Session messages check failed`, { sessionID, error: String(err) })
+      setTimeout(() => {
+        log(`[${HOOK_NAME}] Session messages check failed`, {
+          sessionID,
+          error: String(err),
+        })
+      }, 0)
      return false
    }
  }
@@ -343,7 +370,10 @@ export function createRalphLoopHook(
        let model: { providerID: string; modelID: string } | undefined

        try {
-          const messagesResp = await ctx.client.session.messages({ path: { id: sessionID } })
+          const messagesResp = await withTimeout(
+            ctx.client.session.messages({ path: { id: sessionID } }),
+            apiTimeout
+          )
          const messages = (messagesResp.data ?? []) as Array<{
            info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
          }>