diff --git a/src/hooks/ralph-loop/index.test.ts b/src/hooks/ralph-loop/index.test.ts index a344a5a14..79c04479f 100644 --- a/src/hooks/ralph-loop/index.test.ts +++ b/src/hooks/ralph-loop/index.test.ts @@ -1078,7 +1078,7 @@ Original task: Build something` expect(messagesCalls.length).toBe(1) }) - test("should show ultrawork completion toast", async () => { + test("should require oracle verification toast for ultrawork completion promise", async () => { // given - hook with ultrawork mode and completion in transcript const transcriptPath = join(TEST_DIR, "transcript.jsonl") const hook = createRalphLoopHook(createMockPluginInput(), { @@ -1090,10 +1090,9 @@ Original task: Build something` // when - idle event triggered await hook.event({ event: { type: "session.idle", properties: { sessionID: "test-id" } } }) - // then - ultrawork toast shown - const completionToast = toastCalls.find(t => t.title === "ULTRAWORK LOOP COMPLETE!") - expect(completionToast).toBeDefined() - expect(completionToast!.message).toMatch(/JUST ULW ULW!/) + const verificationToast = toastCalls.find(t => t.title === "ULTRAWORK LOOP") + expect(verificationToast).toBeDefined() + expect(verificationToast!.message).toMatch(/Oracle verification is now required/) }) test("should show regular completion toast when ultrawork disabled", async () => { diff --git a/src/hooks/ralph-loop/ulw-loop-verification.test.ts b/src/hooks/ralph-loop/ulw-loop-verification.test.ts new file mode 100644 index 000000000..eb90eaec2 --- /dev/null +++ b/src/hooks/ralph-loop/ulw-loop-verification.test.ts @@ -0,0 +1,146 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test" +import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { createRalphLoopHook } from "./index" +import { ULTRAWORK_VERIFICATION_PROMISE } from "./constants" +import { clearState } from "./storage" + +describe("ulw-loop verification", () => { + const testDir = join(tmpdir(), `ulw-loop-verification-${Date.now()}`) + let promptCalls: Array<{ sessionID: string; text: string }> + let toastCalls: Array<{ title: string; message: string; variant: string }> + let transcriptPath: string + + function createMockPluginInput() { + return { + client: { + session: { + promptAsync: async (opts: { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } }) => { + promptCalls.push({ + sessionID: opts.path.id, + text: opts.body.parts[0].text, + }) + return {} + }, + messages: async () => ({ data: [] }), + }, + tui: { + showToast: async (opts: { body: { title: string; message: string; variant: string } }) => { + toastCalls.push(opts.body) + return {} + }, + }, + }, + directory: testDir, + } as unknown as Parameters[0] + } + + beforeEach(() => { + promptCalls = [] + toastCalls = [] + transcriptPath = join(testDir, "transcript.jsonl") + + if (!existsSync(testDir)) { + mkdirSync(testDir, { recursive: true }) + } + + clearState(testDir) + }) + + afterEach(() => { + clearState(testDir) + if (existsSync(testDir)) { + rmSync(testDir, { recursive: true, force: true }) + } + }) + + test("#given ulw loop emits DONE #when idle fires #then verification phase starts instead of completing", async () => { + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => transcriptPath, + }) + hook.startLoop("session-123", "Build API", { ultrawork: true }) + writeFileSync( + transcriptPath, + `${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done DONE" } })}\n`, + ) + + await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } }) + + expect(hook.getState()?.verification_pending).toBe(true) + expect(hook.getState()?.completion_promise).toBe(ULTRAWORK_VERIFICATION_PROMISE) + expect(promptCalls).toHaveLength(1) + expect(promptCalls[0].text).toContain('task(subagent_type="oracle"') + expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(false) + }) + + test("#given ulw loop is awaiting verification #when VERIFIED appears #then loop completes", async () => { + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => transcriptPath, + }) + hook.startLoop("session-123", "Build API", { ultrawork: true }) + writeFileSync( + transcriptPath, + `${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done DONE" } })}\n`, + ) + + await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } }) + writeFileSync( + transcriptPath, + `${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done DONE" } })}\n${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `verified ${ULTRAWORK_VERIFICATION_PROMISE}` } })}\n`, + ) + + await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } }) + + expect(hook.getState()).toBeNull() + expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(true) + }) + + test("#given ulw loop without max iterations #when it continues #then it stays unbounded", async () => { + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => transcriptPath, + }) + hook.startLoop("session-123", "Build API", { ultrawork: true }) + + await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } }) + + expect(hook.getState()?.iteration).toBe(2) + expect(hook.getState()?.max_iterations).toBeUndefined() + expect(promptCalls[0].text).toContain("2/unbounded") + }) + + test("#given prior transcript completion from older run #when new ulw loop starts #then old completion is ignored", async () => { + writeFileSync( + transcriptPath, + `${JSON.stringify({ type: "tool_result", timestamp: "2000-01-01T00:00:00.000Z", tool_output: { output: "old DONE" } })}\n`, + ) + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => transcriptPath, + }) + hook.startLoop("session-123", "Build API", { ultrawork: true }) + + await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } }) + + expect(hook.getState()?.iteration).toBe(2) + expect(hook.getState()?.verification_pending).toBeUndefined() + expect(promptCalls).toHaveLength(1) + }) + + test("#given ulw loop was awaiting verification #when same session starts again #then verification state is overwritten", async () => { + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => transcriptPath, + }) + hook.startLoop("session-123", "Build API", { ultrawork: true }) + writeFileSync( + transcriptPath, + `${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done DONE" } })}\n`, + ) + + await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } }) + hook.startLoop("session-123", "Restarted task", { ultrawork: true }) + + expect(hook.getState()?.prompt).toBe("Restarted task") + expect(hook.getState()?.verification_pending).toBeUndefined() + expect(hook.getState()?.completion_promise).toBe("DONE") + }) +}) diff --git a/src/plugin/tool-execute-before.ulw-loop.test.ts b/src/plugin/tool-execute-before.ulw-loop.test.ts new file mode 100644 index 000000000..c5606821d --- /dev/null +++ b/src/plugin/tool-execute-before.ulw-loop.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, test } from "bun:test" +import { mkdirSync, rmSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { createToolExecuteBeforeHandler } from "./tool-execute-before" +import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants" +import { clearState, writeState } from "../hooks/ralph-loop/storage" + +describe("tool.execute.before ultrawork oracle verification", () => { + function createCtx(directory: string) { + return { + directory, + client: { + session: { + messages: async () => ({ data: [] }), + }, + }, + } + } + + test("#given ulw loop is awaiting verification #when oracle task runs #then oracle prompt is enforced and sync", async () => { + const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}`) + mkdirSync(directory, { recursive: true }) + writeState(directory, { + active: true, + iteration: 3, + completion_promise: ULTRAWORK_VERIFICATION_PROMISE, + initial_completion_promise: "DONE", + started_at: new Date().toISOString(), + prompt: "Ship feature", + session_id: "ses-main", + ultrawork: true, + verification_pending: true, + }) + + const handler = createToolExecuteBeforeHandler({ + ctx: createCtx(directory) as unknown as Parameters[0]["ctx"], + hooks: {} as Parameters[0]["hooks"], + }) + const output = { + args: { + subagent_type: "oracle", + run_in_background: true, + prompt: "Check it", + } as Record, + } + + await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output) + + expect(output.args.run_in_background).toBe(false) + expect(output.args.prompt).toContain("Ship feature") + expect(output.args.prompt).toContain(`${ULTRAWORK_VERIFICATION_PROMISE}`) + + clearState(directory) + rmSync(directory, { recursive: true, force: true }) + }) + + test("#given ulw loop is not awaiting verification #when oracle task runs #then prompt is unchanged", async () => { + const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}-plain`) + mkdirSync(directory, { recursive: true }) + const handler = createToolExecuteBeforeHandler({ + ctx: createCtx(directory) as unknown as Parameters[0]["ctx"], + hooks: {} as Parameters[0]["hooks"], + }) + const output = { + args: { + subagent_type: "oracle", + run_in_background: true, + prompt: "Check it", + } as Record, + } + + await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output) + + expect(output.args.run_in_background).toBe(true) + expect(output.args.prompt).toBe("Check it") + + rmSync(directory, { recursive: true, force: true }) + }) +})