test(ulw-loop): cover Oracle verification flow

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
YeonGyu-Kim
2026-03-06 22:00:21 +09:00
parent a010de1db2
commit c17f7215f2
3 changed files with 230 additions and 5 deletions

View File

@@ -1078,7 +1078,7 @@ Original task: Build something`
expect(messagesCalls.length).toBe(1)
})
test("should show ultrawork completion toast", async () => {
test("should require oracle verification toast for ultrawork completion promise", async () => {
// given - hook with ultrawork mode and completion in transcript
const transcriptPath = join(TEST_DIR, "transcript.jsonl")
const hook = createRalphLoopHook(createMockPluginInput(), {
@@ -1090,10 +1090,9 @@ Original task: Build something`
// when - idle event triggered
await hook.event({ event: { type: "session.idle", properties: { sessionID: "test-id" } } })
// then - ultrawork toast shown
const completionToast = toastCalls.find(t => t.title === "ULTRAWORK LOOP COMPLETE!")
expect(completionToast).toBeDefined()
expect(completionToast!.message).toMatch(/JUST ULW ULW!/)
const verificationToast = toastCalls.find(t => t.title === "ULTRAWORK LOOP")
expect(verificationToast).toBeDefined()
expect(verificationToast!.message).toMatch(/Oracle verification is now required/)
})
test("should show regular completion toast when ultrawork disabled", async () => {

View File

@@ -0,0 +1,146 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test"
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { createRalphLoopHook } from "./index"
import { ULTRAWORK_VERIFICATION_PROMISE } from "./constants"
import { clearState } from "./storage"
describe("ulw-loop verification", () => {
const testDir = join(tmpdir(), `ulw-loop-verification-${Date.now()}`)
let promptCalls: Array<{ sessionID: string; text: string }>
let toastCalls: Array<{ title: string; message: string; variant: string }>
let transcriptPath: string
function createMockPluginInput() {
return {
client: {
session: {
promptAsync: async (opts: { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } }) => {
promptCalls.push({
sessionID: opts.path.id,
text: opts.body.parts[0].text,
})
return {}
},
messages: async () => ({ data: [] }),
},
tui: {
showToast: async (opts: { body: { title: string; message: string; variant: string } }) => {
toastCalls.push(opts.body)
return {}
},
},
},
directory: testDir,
} as unknown as Parameters<typeof createRalphLoopHook>[0]
}
beforeEach(() => {
promptCalls = []
toastCalls = []
transcriptPath = join(testDir, "transcript.jsonl")
if (!existsSync(testDir)) {
mkdirSync(testDir, { recursive: true })
}
clearState(testDir)
})
afterEach(() => {
clearState(testDir)
if (existsSync(testDir)) {
rmSync(testDir, { recursive: true, force: true })
}
})
test("#given ulw loop emits DONE #when idle fires #then verification phase starts instead of completing", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
writeFileSync(
transcriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
expect(hook.getState()?.verification_pending).toBe(true)
expect(hook.getState()?.completion_promise).toBe(ULTRAWORK_VERIFICATION_PROMISE)
expect(promptCalls).toHaveLength(1)
expect(promptCalls[0].text).toContain('task(subagent_type="oracle"')
expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(false)
})
test("#given ulw loop is awaiting verification #when VERIFIED appears #then loop completes", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
writeFileSync(
transcriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
writeFileSync(
transcriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `verified <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
expect(hook.getState()).toBeNull()
expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(true)
})
test("#given ulw loop without max iterations #when it continues #then it stays unbounded", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
expect(hook.getState()?.iteration).toBe(2)
expect(hook.getState()?.max_iterations).toBeUndefined()
expect(promptCalls[0].text).toContain("2/unbounded")
})
test("#given prior transcript completion from older run #when new ulw loop starts #then old completion is ignored", async () => {
writeFileSync(
transcriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: "2000-01-01T00:00:00.000Z", tool_output: { output: "old <promise>DONE</promise>" } })}\n`,
)
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
expect(hook.getState()?.iteration).toBe(2)
expect(hook.getState()?.verification_pending).toBeUndefined()
expect(promptCalls).toHaveLength(1)
})
test("#given ulw loop was awaiting verification #when same session starts again #then verification state is overwritten", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
writeFileSync(
transcriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
hook.startLoop("session-123", "Restarted task", { ultrawork: true })
expect(hook.getState()?.prompt).toBe("Restarted task")
expect(hook.getState()?.verification_pending).toBeUndefined()
expect(hook.getState()?.completion_promise).toBe("DONE")
})
})

View File

@@ -0,0 +1,80 @@
import { describe, expect, test } from "bun:test"
import { mkdirSync, rmSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants"
import { clearState, writeState } from "../hooks/ralph-loop/storage"
describe("tool.execute.before ultrawork oracle verification", () => {
function createCtx(directory: string) {
return {
directory,
client: {
session: {
messages: async () => ({ data: [] }),
},
},
}
}
test("#given ulw loop is awaiting verification #when oracle task runs #then oracle prompt is enforced and sync", async () => {
const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}`)
mkdirSync(directory, { recursive: true })
writeState(directory, {
active: true,
iteration: 3,
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
initial_completion_promise: "DONE",
started_at: new Date().toISOString(),
prompt: "Ship feature",
session_id: "ses-main",
ultrawork: true,
verification_pending: true,
})
const handler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const output = {
args: {
subagent_type: "oracle",
run_in_background: true,
prompt: "Check it",
} as Record<string, unknown>,
}
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
expect(output.args.run_in_background).toBe(false)
expect(output.args.prompt).toContain("Ship feature")
expect(output.args.prompt).toContain(`<promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>`)
clearState(directory)
rmSync(directory, { recursive: true, force: true })
})
test("#given ulw loop is not awaiting verification #when oracle task runs #then prompt is unchanged", async () => {
const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}-plain`)
mkdirSync(directory, { recursive: true })
const handler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const output = {
args: {
subagent_type: "oracle",
run_in_background: true,
prompt: "Check it",
} as Record<string, unknown>,
}
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
expect(output.args.run_in_background).toBe(true)
expect(output.args.prompt).toBe("Check it")
rmSync(directory, { recursive: true, force: true })
})
})