fix(ulw-loop): track Oracle verification sessions explicitly

🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
YeonGyu-Kim
2026-03-06 22:37:41 +09:00
parent 9778cc6c98
commit 898b628d3d
11 changed files with 281 additions and 29 deletions

View File

@@ -35,7 +35,7 @@ export const ULW_LOOP_TEMPLATE = `You are starting an ULTRAWORK Loop - a self-re
1. You will work on the task continuously
2. When you believe the work is complete, output: \`<promise>{{COMPLETION_PROMISE}}</promise>\`
3. That does NOT finish the loop yet. The system will require Oracle verification
4. The loop only ends after Oracle verifies the result with \`<promise>VERIFIED</promise>\`
4. The loop only ends after the system confirms Oracle verified the result
5. There is no iteration limit
## Rules
@@ -46,7 +46,7 @@ export const ULW_LOOP_TEMPLATE = `You are starting an ULTRAWORK Loop - a self-re
## Exit Conditions
1. **Verified Completion**: Oracle returns \`<promise>VERIFIED</promise>\`
1. **Verified Completion**: Oracle verifies the result and the system confirms it
2. **Cancel**: User runs \`/cancel-ralph\`
## Your Task

View File

@@ -25,7 +25,7 @@ You already emitted <promise>{{INITIAL_PROMISE}}</promise>. This does NOT finish
REQUIRED NOW:
- Call Oracle using task(subagent_type="oracle", load_skills=[], run_in_background=false, ...)
- Ask Oracle to verify whether the original task is actually complete
- The loop only finishes when Oracle returns <promise>{{PROMISE}}</promise>
- The system will inspect the Oracle session directly for the verification result
- If Oracle does not verify, continue fixing the task and do not consider it complete
Original task:

View File

@@ -43,6 +43,8 @@ export function createLoopStateController(options: {
message_count_at_start: loopOptions?.messageCountAtStart,
completion_promise: initialCompletionPromise,
initial_completion_promise: initialCompletionPromise,
verification_attempt_id: undefined,
verification_session_id: undefined,
ultrawork: loopOptions?.ultrawork,
verification_pending: undefined,
strategy: loopOptions?.strategy ?? config?.default_strategy ?? "continue",
@@ -123,6 +125,8 @@ export function createLoopStateController(options: {
state.verification_pending = true
state.completion_promise = ULTRAWORK_VERIFICATION_PROMISE
state.verification_attempt_id = undefined
state.verification_session_id = undefined
state.initial_completion_promise ??= DEFAULT_COMPLETION_PROMISE
if (!writeState(directory, state, stateDir)) {
@@ -131,5 +135,20 @@ export function createLoopStateController(options: {
return state
},
setVerificationSessionID(sessionID: string, verificationSessionID: string): RalphLoopState | null {
const state = readState(directory, stateDir)
if (!state || state.session_id !== sessionID || !state.ultrawork || !state.verification_pending) {
return null
}
state.verification_session_id = verificationSessionID
if (!writeState(directory, state, stateDir)) {
return null
}
return state
},
}
}

View File

@@ -21,6 +21,7 @@ type LoopStateController = {
incrementIteration: () => RalphLoopState | null
setSessionID: (sessionID: string) => RalphLoopState | null
markVerificationPending: (sessionID: string) => RalphLoopState | null
setVerificationSessionID: (sessionID: string, verificationSessionID: string) => RalphLoopState | null
}
type RalphLoopEventHandlerOptions = { directory: string; apiTimeoutMs: number; getTranscriptPath: (sessionID: string) => string | undefined; checkSessionExists?: RalphLoopOptions["checkSessionExists"]; sessionRecovery: SessionRecovery; loopState: LoopStateController }
@@ -78,14 +79,30 @@ export function createRalphLoopEventHandler(
return
}
const transcriptPath = options.getTranscriptPath(sessionID)
const completionViaTranscript = detectCompletionInTranscript(
transcriptPath,
state.completion_promise,
state.started_at,
)
const verificationSessionID = state.verification_pending
? state.verification_session_id
: undefined
const completionSessionID = verificationSessionID ?? (state.verification_pending ? undefined : sessionID)
const transcriptPath = completionSessionID ? options.getTranscriptPath(completionSessionID) : undefined
const completionViaTranscript = completionSessionID
? detectCompletionInTranscript(
transcriptPath,
state.completion_promise,
state.started_at,
)
: false
const completionViaApi = completionViaTranscript
? false
: verificationSessionID
? await detectCompletionInSessionMessages(ctx, {
sessionID: verificationSessionID,
promise: state.completion_promise,
apiTimeoutMs: options.apiTimeoutMs,
directory: options.directory,
sinceMessageIndex: undefined,
})
: state.verification_pending
? false
: await detectCompletionInSessionMessages(ctx, {
sessionID,
promise: state.completion_promise,

View File

@@ -62,6 +62,12 @@ export function readState(directory: string, customPath?: string): RalphLoopStat
initial_completion_promise: data.initial_completion_promise
? stripQuotes(data.initial_completion_promise)
: undefined,
verification_attempt_id: data.verification_attempt_id
? stripQuotes(data.verification_attempt_id)
: undefined,
verification_session_id: data.verification_session_id
? stripQuotes(data.verification_session_id)
: undefined,
started_at: stripQuotes(data.started_at) || new Date().toISOString(),
prompt: body.trim(),
session_id: data.session_id ? stripQuotes(data.session_id) : undefined,
@@ -100,6 +106,12 @@ export function writeState(
const initialCompletionPromiseLine = state.initial_completion_promise
? `initial_completion_promise: "${state.initial_completion_promise}"\n`
: ""
const verificationAttemptLine = state.verification_attempt_id
? `verification_attempt_id: "${state.verification_attempt_id}"\n`
: ""
const verificationSessionLine = state.verification_session_id
? `verification_session_id: "${state.verification_session_id}"\n`
: ""
const messageCountAtStartLine =
typeof state.message_count_at_start === "number"
? `message_count_at_start: ${state.message_count_at_start}\n`
@@ -112,7 +124,7 @@ export function writeState(
active: ${state.active}
iteration: ${state.iteration}
${maxIterationsLine}completion_promise: "${state.completion_promise}"
${initialCompletionPromiseLine}started_at: "${state.started_at}"
${initialCompletionPromiseLine}${verificationAttemptLine}${verificationSessionLine}started_at: "${state.started_at}"
${sessionIdLine}${ultraworkLine}${verificationPendingLine}${strategyLine}${messageCountAtStartLine}---
${state.prompt}
`

View File

@@ -7,6 +7,8 @@ export interface RalphLoopState {
message_count_at_start?: number
completion_promise: string
initial_completion_promise?: string
verification_attempt_id?: string
verification_session_id?: string
started_at: string
prompt: string
session_id?: string

View File

@@ -4,13 +4,14 @@ import { tmpdir } from "node:os"
import { join } from "node:path"
import { createRalphLoopHook } from "./index"
import { ULTRAWORK_VERIFICATION_PROMISE } from "./constants"
import { clearState } from "./storage"
import { clearState, writeState } from "./storage"
describe("ulw-loop verification", () => {
const testDir = join(tmpdir(), `ulw-loop-verification-${Date.now()}`)
let promptCalls: Array<{ sessionID: string; text: string }>
let toastCalls: Array<{ title: string; message: string; variant: string }>
let transcriptPath: string
let parentTranscriptPath: string
let oracleTranscriptPath: string
function createMockPluginInput() {
return {
@@ -39,7 +40,8 @@ describe("ulw-loop verification", () => {
beforeEach(() => {
promptCalls = []
toastCalls = []
transcriptPath = join(testDir, "transcript.jsonl")
parentTranscriptPath = join(testDir, "transcript-parent.jsonl")
oracleTranscriptPath = join(testDir, "transcript-oracle.jsonl")
if (!existsSync(testDir)) {
mkdirSync(testDir, { recursive: true })
@@ -57,11 +59,11 @@ describe("ulw-loop verification", () => {
test("#given ulw loop emits DONE #when idle fires #then verification phase starts instead of completing", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
writeFileSync(
transcriptPath,
parentTranscriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
)
@@ -69,25 +71,30 @@ describe("ulw-loop verification", () => {
expect(hook.getState()?.verification_pending).toBe(true)
expect(hook.getState()?.completion_promise).toBe(ULTRAWORK_VERIFICATION_PROMISE)
expect(hook.getState()?.verification_session_id).toBeUndefined()
expect(promptCalls).toHaveLength(1)
expect(promptCalls[0].text).toContain('task(subagent_type="oracle"')
expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(false)
})
test("#given ulw loop is awaiting verification #when VERIFIED appears #then loop completes", async () => {
test("#given ulw loop is awaiting verification #when VERIFIED appears in oracle session #then loop completes", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
writeFileSync(
transcriptPath,
parentTranscriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
writeState(testDir, {
...hook.getState()!,
verification_session_id: "ses-oracle",
})
writeFileSync(
transcriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `verified <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
oracleTranscriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `verified <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
@@ -98,7 +105,7 @@ describe("ulw-loop verification", () => {
test("#given ulw loop without max iterations #when it continues #then it stays unbounded", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
@@ -111,11 +118,11 @@ describe("ulw-loop verification", () => {
test("#given prior transcript completion from older run #when new ulw loop starts #then old completion is ignored", async () => {
writeFileSync(
transcriptPath,
parentTranscriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: "2000-01-01T00:00:00.000Z", tool_output: { output: "old <promise>DONE</promise>" } })}\n`,
)
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
@@ -128,11 +135,11 @@ describe("ulw-loop verification", () => {
test("#given ulw loop was awaiting verification #when same session starts again #then verification state is overwritten", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
writeFileSync(
transcriptPath,
parentTranscriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
)
@@ -143,4 +150,26 @@ describe("ulw-loop verification", () => {
expect(hook.getState()?.verification_pending).toBeUndefined()
expect(hook.getState()?.completion_promise).toBe("DONE")
})
test("#given parent session emits VERIFIED #when oracle session is not tracked #then ulw loop does not complete", async () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
})
hook.startLoop("session-123", "Build API", { ultrawork: true })
writeFileSync(
parentTranscriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
writeFileSync(
parentTranscriptPath,
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `bad parent leak <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
)
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
expect(hook.getState()).not.toBeNull()
expect(hook.getState()?.verification_pending).toBe(true)
})
})

View File

@@ -68,6 +68,7 @@ export function createPluginInterface(args: {
}),
"tool.execute.after": createToolExecuteAfterHandler({
ctx,
hooks,
}),
}

View File

@@ -1,7 +1,12 @@
import { consumeToolMetadata } from "../features/tool-metadata-store"
import type { CreatedHooks } from "../create-hooks"
import type { PluginContext } from "./types"
import { readState, writeState } from "../hooks/ralph-loop/storage"
const VERIFICATION_ATTEMPT_PATTERN = /<ulw_verification_attempt_id>(.*?)<\/ulw_verification_attempt_id>/i
export function createToolExecuteAfterHandler(args: {
ctx: PluginContext
hooks: CreatedHooks
}): (
input: { tool: string; sessionID: string; callID: string },
@@ -9,7 +14,7 @@ export function createToolExecuteAfterHandler(args: {
| { title: string; output: string; metadata: Record<string, unknown> }
| undefined,
) => Promise<void> {
const { hooks } = args
const { ctx, hooks } = args
return async (
input: { tool: string; sessionID: string; callID: string },
@@ -27,6 +32,30 @@ export function createToolExecuteAfterHandler(args: {
}
}
if (input.tool === "task") {
const sessionId = typeof output.metadata?.sessionId === "string" ? output.metadata.sessionId : undefined
const agent = typeof output.metadata?.agent === "string" ? output.metadata.agent : undefined
const prompt = typeof output.metadata?.prompt === "string" ? output.metadata.prompt : undefined
const verificationAttemptId = prompt?.match(VERIFICATION_ATTEMPT_PATTERN)?.[1]?.trim()
const loopState = readState(ctx.directory)
if (
agent === "oracle"
&& sessionId
&& verificationAttemptId
&& loopState?.active === true
&& loopState.ultrawork === true
&& loopState.verification_pending === true
&& loopState.session_id === input.sessionID
&& loopState.verification_attempt_id === verificationAttemptId
) {
writeState(ctx.directory, {
...loopState,
verification_session_id: sessionId,
})
}
}
await hooks.claudeCodeHooks?.["tool.execute.after"]?.(input, output)
await hooks.toolOutputTruncator?.["tool.execute.after"]?.(input, output)
await hooks.preemptiveCompaction?.["tool.execute.after"]?.(input, output)

View File

@@ -1,4 +1,5 @@
import type { PluginContext } from "./types"
import { randomUUID } from "node:crypto"
import { getMainSessionID } from "../features/claude-code-session-state"
import { clearBoulderState } from "../features/boulder-state"
@@ -6,7 +7,7 @@ import { log } from "../shared"
import { resolveSessionAgent } from "./session-agent-resolver"
import { parseRalphLoopArguments } from "../hooks/ralph-loop/command-arguments"
import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants"
import { readState } from "../hooks/ralph-loop/storage"
import { readState, writeState } from "../hooks/ralph-loop/storage"
import type { CreatedHooks } from "../create-hooks"
@@ -77,8 +78,14 @@ export function createToolExecuteBeforeHandler(args: {
&& loopState.session_id === input.sessionID
if (shouldInjectOracleVerification) {
const verificationAttemptId = randomUUID()
writeState(ctx.directory, {
...loopState,
verification_attempt_id: verificationAttemptId,
verification_session_id: undefined,
})
argsObject.run_in_background = false
argsObject.prompt = `${prompt ? `${prompt}\n\n` : ""}You are verifying the active ULTRAWORK loop result for this session. Review whether the original task is truly complete: ${loopState.prompt}\n\nIf the work is fully complete, end your response with <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>. If the work is not complete, explain the blocking issues clearly and DO NOT emit that promise.`
argsObject.prompt = `${prompt ? `${prompt}\n\n` : ""}You are verifying the active ULTRAWORK loop result for this session. Review whether the original task is truly complete: ${loopState.prompt}\n\nIf the work is fully complete, end your response with <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>. If the work is not complete, explain the blocking issues clearly and DO NOT emit that promise.\n\n<ulw_verification_attempt_id>${verificationAttemptId}</ulw_verification_attempt_id>`
}
}

View File

@@ -2,9 +2,10 @@ import { describe, expect, test } from "bun:test"
import { mkdirSync, rmSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { createToolExecuteAfterHandler } from "./tool-execute-after"
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants"
import { clearState, writeState } from "../hooks/ralph-loop/storage"
import { clearState, readState, writeState } from "../hooks/ralph-loop/storage"
describe("tool.execute.before ultrawork oracle verification", () => {
function createCtx(directory: string) {
@@ -47,6 +48,7 @@ describe("tool.execute.before ultrawork oracle verification", () => {
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
expect(readState(directory)?.verification_attempt_id).toBeTruthy()
expect(output.args.run_in_background).toBe(false)
expect(output.args.prompt).toContain("Ship feature")
expect(output.args.prompt).toContain(`<promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>`)
@@ -77,4 +79,138 @@ describe("tool.execute.before ultrawork oracle verification", () => {
rmSync(directory, { recursive: true, force: true })
})
test("#given ulw loop is awaiting verification #when oracle task finishes #then oracle session id is stored", async () => {
const directory = join(tmpdir(), `tool-after-ulw-${Date.now()}`)
mkdirSync(directory, { recursive: true })
writeState(directory, {
active: true,
iteration: 3,
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
initial_completion_promise: "DONE",
started_at: new Date().toISOString(),
prompt: "Ship feature",
session_id: "ses-main",
ultrawork: true,
verification_pending: true,
})
const beforeHandler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const beforeOutput = {
args: {
subagent_type: "oracle",
run_in_background: true,
prompt: "Check it",
} as Record<string, unknown>,
}
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, beforeOutput)
const handler = createToolExecuteAfterHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
})
await handler(
{ tool: "task", sessionID: "ses-main", callID: "call-1" },
{
title: "oracle task",
output: "done",
metadata: {
agent: "oracle",
prompt: String(beforeOutput.args.prompt),
sessionId: "ses-oracle",
},
},
)
expect(readState(directory)?.verification_session_id).toBe("ses-oracle")
clearState(directory)
rmSync(directory, { recursive: true, force: true })
})
test("#given newer oracle attempt exists #when older oracle task finishes #then old session does not overwrite active verification", async () => {
const directory = join(tmpdir(), `tool-race-ulw-${Date.now()}`)
mkdirSync(directory, { recursive: true })
writeState(directory, {
active: true,
iteration: 3,
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
initial_completion_promise: "DONE",
started_at: new Date().toISOString(),
prompt: "Ship feature",
session_id: "ses-main",
ultrawork: true,
verification_pending: true,
})
const beforeHandler = createToolExecuteBeforeHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
})
const afterHandler = createToolExecuteAfterHandler({
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
})
const firstOutput = {
args: {
subagent_type: "oracle",
run_in_background: true,
prompt: "Check it",
} as Record<string, unknown>,
}
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, firstOutput)
const firstAttemptId = readState(directory)?.verification_attempt_id
const secondOutput = {
args: {
subagent_type: "oracle",
run_in_background: true,
prompt: "Check it again",
} as Record<string, unknown>,
}
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-2" }, secondOutput)
const secondAttemptId = readState(directory)?.verification_attempt_id
expect(firstAttemptId).toBeTruthy()
expect(secondAttemptId).toBeTruthy()
expect(secondAttemptId).not.toBe(firstAttemptId)
await afterHandler(
{ tool: "task", sessionID: "ses-main", callID: "call-1" },
{
title: "oracle task",
output: "done",
metadata: {
agent: "oracle",
prompt: String(firstOutput.args.prompt),
sessionId: "ses-oracle-old",
},
},
)
expect(readState(directory)?.verification_session_id).toBeUndefined()
await afterHandler(
{ tool: "task", sessionID: "ses-main", callID: "call-2" },
{
title: "oracle task",
output: "done",
metadata: {
agent: "oracle",
prompt: String(secondOutput.args.prompt),
sessionId: "ses-oracle-new",
},
},
)
expect(readState(directory)?.verification_session_id).toBe("ses-oracle-new")
clearState(directory)
rmSync(directory, { recursive: true, force: true })
})
})