Files
oh-my-openagent/src/plugin/tool-execute-before.ulw-loop.test.ts
YeonGyu-Kim 4bc7b1d27c fix(ulw-loop): add fallback for Oracle verification session tracking
The verification_session_id was never reliably set because the
prompt-based attempt_id matching in tool-execute-after depends on
metadata.prompt surviving the delegate-task execution chain. When
this fails silently, the loop never detects Oracle's VERIFIED
emission.

Add a fallback: when exact attempt_id matching fails but oracle
agent + verification_pending state match, still set the session ID.
Add diagnostic logging to trace verification flow failures.
Add integration test covering the full verification chain.
2026-03-17 16:21:40 +09:00

244 lines
8.2 KiB
TypeScript

import { describe, expect, test } from "bun:test"
import { mkdirSync, rmSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { createToolExecuteAfterHandler } from "./tool-execute-after"
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants"
import { clearState, readState, writeState } from "../hooks/ralph-loop/storage"
describe("tool.execute.before ultrawork oracle verification", () => {
function createCtx(directory: string) {
return {
directory,
client: {
session: {
messages: async () => ({ data: [] }),
},
},
}
}
function createOracleTaskArgs(prompt: string): Record<string, unknown> {
return {
subagent_type: "oracle",
run_in_background: true,
prompt,
}
}
function createSyncTaskMetadata(
args: Record<string, unknown>,
sessionId: string,
): Record<string, unknown> {
return {
prompt: args.prompt,
agent: "oracle",
run_in_background: args.run_in_background,
sessionId,
sync: true,
}
}
test("#given ulw loop is awaiting verification #when oracle task runs #then oracle prompt is enforced and sync", async () => {
const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}`)
mkdirSync(directory, { recursive: true })
writeState(directory, {
active: true,
iteration: 3,
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
initial_completion_promise: "DONE",
started_at: new Date().toISOString(),
prompt: "Ship feature",
session_id: "ses-main",
ultrawork: true,
verification_pending: true,
})
const handler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const output = { args: createOracleTaskArgs("Check it") }
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
expect(readState(directory)?.verification_attempt_id).toBeTruthy()
expect(output.args.run_in_background).toBe(false)
expect(output.args.prompt).toContain("Ship feature")
expect(output.args.prompt).toContain(`<promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>`)
clearState(directory)
rmSync(directory, { recursive: true, force: true })
})
test("#given ulw loop is not awaiting verification #when oracle task runs #then prompt is unchanged", async () => {
const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}-plain`)
mkdirSync(directory, { recursive: true })
const handler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const output = { args: createOracleTaskArgs("Check it") }
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
expect(output.args.run_in_background).toBe(true)
expect(output.args.prompt).toBe("Check it")
rmSync(directory, { recursive: true, force: true })
})
test("#given ulw loop is awaiting verification #when oracle sync task metadata is persisted #then oracle session id is stored", async () => {
const directory = join(tmpdir(), `tool-after-ulw-${Date.now()}`)
mkdirSync(directory, { recursive: true })
writeState(directory, {
active: true,
iteration: 3,
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
initial_completion_promise: "DONE",
started_at: new Date().toISOString(),
prompt: "Ship feature",
session_id: "ses-main",
ultrawork: true,
verification_pending: true,
})
const beforeHandler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const beforeOutput = { args: createOracleTaskArgs("Check it") }
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, beforeOutput)
const metadataFromSyncTask = createSyncTaskMetadata(beforeOutput.args, "ses-oracle")
const handler = createToolExecuteAfterHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
})
await handler(
{ tool: "task", sessionID: "ses-main", callID: "call-1" },
{
title: "oracle task",
output: "done",
metadata: metadataFromSyncTask,
},
)
expect(readState(directory)?.verification_session_id).toBe("ses-oracle")
clearState(directory)
rmSync(directory, { recursive: true, force: true })
})
test("#given ulw loop is awaiting verification #when oracle metadata prompt is missing #then oracle session fallback is stored", async () => {
const directory = join(tmpdir(), `tool-after-ulw-fallback-${Date.now()}`)
mkdirSync(directory, { recursive: true })
writeState(directory, {
active: true,
iteration: 3,
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
initial_completion_promise: "DONE",
started_at: new Date().toISOString(),
prompt: "Ship feature",
session_id: "ses-main",
ultrawork: true,
verification_pending: true,
})
const handler = createToolExecuteAfterHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
})
await handler(
{ tool: "task", sessionID: "ses-main", callID: "call-1" },
{
title: "oracle task",
output: "done",
metadata: {
agent: "oracle",
sessionId: "ses-oracle-fallback",
sync: true,
},
},
)
expect(readState(directory)?.verification_session_id).toBe("ses-oracle-fallback")
clearState(directory)
rmSync(directory, { recursive: true, force: true })
})
test("#given newer oracle attempt exists #when older oracle task finishes #then old session does not overwrite active verification", async () => {
const directory = join(tmpdir(), `tool-race-ulw-${Date.now()}`)
mkdirSync(directory, { recursive: true })
writeState(directory, {
active: true,
iteration: 3,
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
initial_completion_promise: "DONE",
started_at: new Date().toISOString(),
prompt: "Ship feature",
session_id: "ses-main",
ultrawork: true,
verification_pending: true,
})
const beforeHandler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const afterHandler = createToolExecuteAfterHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
})
const firstOutput = { args: createOracleTaskArgs("Check it") }
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, firstOutput)
const firstAttemptId = readState(directory)?.verification_attempt_id
const secondOutput = { args: createOracleTaskArgs("Check it again") }
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-2" }, secondOutput)
const secondAttemptId = readState(directory)?.verification_attempt_id
expect(firstAttemptId).toBeTruthy()
expect(secondAttemptId).toBeTruthy()
expect(secondAttemptId).not.toBe(firstAttemptId)
await afterHandler(
{ tool: "task", sessionID: "ses-main", callID: "call-1" },
{
title: "oracle task",
output: "done",
metadata: {
agent: "oracle",
prompt: String(firstOutput.args.prompt),
sessionId: "ses-oracle-old",
},
},
)
expect(readState(directory)?.verification_session_id).toBeUndefined()
await afterHandler(
{ tool: "task", sessionID: "ses-main", callID: "call-2" },
{
title: "oracle task",
output: "done",
metadata: {
agent: "oracle",
prompt: String(secondOutput.args.prompt),
sessionId: "ses-oracle-new",
},
},
)
expect(readState(directory)?.verification_session_id).toBe("ses-oracle-new")
clearState(directory)
rmSync(directory, { recursive: true, force: true })
})
})