test(ulw-loop): cover Oracle verification flow
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -1078,7 +1078,7 @@ Original task: Build something`
|
||||
expect(messagesCalls.length).toBe(1)
|
||||
})
|
||||
|
||||
test("should show ultrawork completion toast", async () => {
|
||||
test("should require oracle verification toast for ultrawork completion promise", async () => {
|
||||
// given - hook with ultrawork mode and completion in transcript
|
||||
const transcriptPath = join(TEST_DIR, "transcript.jsonl")
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
@@ -1090,10 +1090,9 @@ Original task: Build something`
|
||||
// when - idle event triggered
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "test-id" } } })
|
||||
|
||||
// then - ultrawork toast shown
|
||||
const completionToast = toastCalls.find(t => t.title === "ULTRAWORK LOOP COMPLETE!")
|
||||
expect(completionToast).toBeDefined()
|
||||
expect(completionToast!.message).toMatch(/JUST ULW ULW!/)
|
||||
const verificationToast = toastCalls.find(t => t.title === "ULTRAWORK LOOP")
|
||||
expect(verificationToast).toBeDefined()
|
||||
expect(verificationToast!.message).toMatch(/Oracle verification is now required/)
|
||||
})
|
||||
|
||||
test("should show regular completion toast when ultrawork disabled", async () => {
|
||||
|
||||
146
src/hooks/ralph-loop/ulw-loop-verification.test.ts
Normal file
146
src/hooks/ralph-loop/ulw-loop-verification.test.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test"
|
||||
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
|
||||
import { tmpdir } from "node:os"
|
||||
import { join } from "node:path"
|
||||
import { createRalphLoopHook } from "./index"
|
||||
import { ULTRAWORK_VERIFICATION_PROMISE } from "./constants"
|
||||
import { clearState } from "./storage"
|
||||
|
||||
describe("ulw-loop verification", () => {
|
||||
const testDir = join(tmpdir(), `ulw-loop-verification-${Date.now()}`)
|
||||
let promptCalls: Array<{ sessionID: string; text: string }>
|
||||
let toastCalls: Array<{ title: string; message: string; variant: string }>
|
||||
let transcriptPath: string
|
||||
|
||||
function createMockPluginInput() {
|
||||
return {
|
||||
client: {
|
||||
session: {
|
||||
promptAsync: async (opts: { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } }) => {
|
||||
promptCalls.push({
|
||||
sessionID: opts.path.id,
|
||||
text: opts.body.parts[0].text,
|
||||
})
|
||||
return {}
|
||||
},
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
tui: {
|
||||
showToast: async (opts: { body: { title: string; message: string; variant: string } }) => {
|
||||
toastCalls.push(opts.body)
|
||||
return {}
|
||||
},
|
||||
},
|
||||
},
|
||||
directory: testDir,
|
||||
} as unknown as Parameters<typeof createRalphLoopHook>[0]
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
promptCalls = []
|
||||
toastCalls = []
|
||||
transcriptPath = join(testDir, "transcript.jsonl")
|
||||
|
||||
if (!existsSync(testDir)) {
|
||||
mkdirSync(testDir, { recursive: true })
|
||||
}
|
||||
|
||||
clearState(testDir)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
clearState(testDir)
|
||||
if (existsSync(testDir)) {
|
||||
rmSync(testDir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test("#given ulw loop emits DONE #when idle fires #then verification phase starts instead of completing", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
|
||||
expect(hook.getState()?.verification_pending).toBe(true)
|
||||
expect(hook.getState()?.completion_promise).toBe(ULTRAWORK_VERIFICATION_PROMISE)
|
||||
expect(promptCalls).toHaveLength(1)
|
||||
expect(promptCalls[0].text).toContain('task(subagent_type="oracle"')
|
||||
expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(false)
|
||||
})
|
||||
|
||||
test("#given ulw loop is awaiting verification #when VERIFIED appears #then loop completes", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `verified <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
|
||||
expect(hook.getState()).toBeNull()
|
||||
expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(true)
|
||||
})
|
||||
|
||||
test("#given ulw loop without max iterations #when it continues #then it stays unbounded", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
|
||||
expect(hook.getState()?.iteration).toBe(2)
|
||||
expect(hook.getState()?.max_iterations).toBeUndefined()
|
||||
expect(promptCalls[0].text).toContain("2/unbounded")
|
||||
})
|
||||
|
||||
test("#given prior transcript completion from older run #when new ulw loop starts #then old completion is ignored", async () => {
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: "2000-01-01T00:00:00.000Z", tool_output: { output: "old <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
|
||||
expect(hook.getState()?.iteration).toBe(2)
|
||||
expect(hook.getState()?.verification_pending).toBeUndefined()
|
||||
expect(promptCalls).toHaveLength(1)
|
||||
})
|
||||
|
||||
test("#given ulw loop was awaiting verification #when same session starts again #then verification state is overwritten", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
hook.startLoop("session-123", "Restarted task", { ultrawork: true })
|
||||
|
||||
expect(hook.getState()?.prompt).toBe("Restarted task")
|
||||
expect(hook.getState()?.verification_pending).toBeUndefined()
|
||||
expect(hook.getState()?.completion_promise).toBe("DONE")
|
||||
})
|
||||
})
|
||||
80
src/plugin/tool-execute-before.ulw-loop.test.ts
Normal file
80
src/plugin/tool-execute-before.ulw-loop.test.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import { mkdirSync, rmSync } from "node:fs"
|
||||
import { tmpdir } from "node:os"
|
||||
import { join } from "node:path"
|
||||
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
|
||||
import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants"
|
||||
import { clearState, writeState } from "../hooks/ralph-loop/storage"
|
||||
|
||||
describe("tool.execute.before ultrawork oracle verification", () => {
|
||||
function createCtx(directory: string) {
|
||||
return {
|
||||
directory,
|
||||
client: {
|
||||
session: {
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
test("#given ulw loop is awaiting verification #when oracle task runs #then oracle prompt is enforced and sync", async () => {
|
||||
const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}`)
|
||||
mkdirSync(directory, { recursive: true })
|
||||
writeState(directory, {
|
||||
active: true,
|
||||
iteration: 3,
|
||||
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
|
||||
initial_completion_promise: "DONE",
|
||||
started_at: new Date().toISOString(),
|
||||
prompt: "Ship feature",
|
||||
session_id: "ses-main",
|
||||
ultrawork: true,
|
||||
verification_pending: true,
|
||||
})
|
||||
|
||||
const handler = createToolExecuteBeforeHandler({
|
||||
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
|
||||
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
|
||||
})
|
||||
const output = {
|
||||
args: {
|
||||
subagent_type: "oracle",
|
||||
run_in_background: true,
|
||||
prompt: "Check it",
|
||||
} as Record<string, unknown>,
|
||||
}
|
||||
|
||||
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
|
||||
|
||||
expect(output.args.run_in_background).toBe(false)
|
||||
expect(output.args.prompt).toContain("Ship feature")
|
||||
expect(output.args.prompt).toContain(`<promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>`)
|
||||
|
||||
clearState(directory)
|
||||
rmSync(directory, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
test("#given ulw loop is not awaiting verification #when oracle task runs #then prompt is unchanged", async () => {
|
||||
const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}-plain`)
|
||||
mkdirSync(directory, { recursive: true })
|
||||
const handler = createToolExecuteBeforeHandler({
|
||||
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
|
||||
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
|
||||
})
|
||||
const output = {
|
||||
args: {
|
||||
subagent_type: "oracle",
|
||||
run_in_background: true,
|
||||
prompt: "Check it",
|
||||
} as Record<string, unknown>,
|
||||
}
|
||||
|
||||
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
|
||||
|
||||
expect(output.args.run_in_background).toBe(true)
|
||||
expect(output.args.prompt).toBe("Check it")
|
||||
|
||||
rmSync(directory, { recursive: true, force: true })
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user