fix(ulw-loop): track Oracle verification sessions explicitly
🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
@@ -35,7 +35,7 @@ export const ULW_LOOP_TEMPLATE = `You are starting an ULTRAWORK Loop - a self-re
|
||||
1. You will work on the task continuously
|
||||
2. When you believe the work is complete, output: \`<promise>{{COMPLETION_PROMISE}}</promise>\`
|
||||
3. That does NOT finish the loop yet. The system will require Oracle verification
|
||||
4. The loop only ends after Oracle verifies the result with \`<promise>VERIFIED</promise>\`
|
||||
4. The loop only ends after the system confirms Oracle verified the result
|
||||
5. There is no iteration limit
|
||||
|
||||
## Rules
|
||||
@@ -46,7 +46,7 @@ export const ULW_LOOP_TEMPLATE = `You are starting an ULTRAWORK Loop - a self-re
|
||||
|
||||
## Exit Conditions
|
||||
|
||||
1. **Verified Completion**: Oracle returns \`<promise>VERIFIED</promise>\`
|
||||
1. **Verified Completion**: Oracle verifies the result and the system confirms it
|
||||
2. **Cancel**: User runs \`/cancel-ralph\`
|
||||
|
||||
## Your Task
|
||||
|
||||
@@ -25,7 +25,7 @@ You already emitted <promise>{{INITIAL_PROMISE}}</promise>. This does NOT finish
|
||||
REQUIRED NOW:
|
||||
- Call Oracle using task(subagent_type="oracle", load_skills=[], run_in_background=false, ...)
|
||||
- Ask Oracle to verify whether the original task is actually complete
|
||||
- The loop only finishes when Oracle returns <promise>{{PROMISE}}</promise>
|
||||
- The system will inspect the Oracle session directly for the verification result
|
||||
- If Oracle does not verify, continue fixing the task and do not consider it complete
|
||||
|
||||
Original task:
|
||||
|
||||
@@ -43,6 +43,8 @@ export function createLoopStateController(options: {
|
||||
message_count_at_start: loopOptions?.messageCountAtStart,
|
||||
completion_promise: initialCompletionPromise,
|
||||
initial_completion_promise: initialCompletionPromise,
|
||||
verification_attempt_id: undefined,
|
||||
verification_session_id: undefined,
|
||||
ultrawork: loopOptions?.ultrawork,
|
||||
verification_pending: undefined,
|
||||
strategy: loopOptions?.strategy ?? config?.default_strategy ?? "continue",
|
||||
@@ -123,6 +125,8 @@ export function createLoopStateController(options: {
|
||||
|
||||
state.verification_pending = true
|
||||
state.completion_promise = ULTRAWORK_VERIFICATION_PROMISE
|
||||
state.verification_attempt_id = undefined
|
||||
state.verification_session_id = undefined
|
||||
state.initial_completion_promise ??= DEFAULT_COMPLETION_PROMISE
|
||||
|
||||
if (!writeState(directory, state, stateDir)) {
|
||||
@@ -131,5 +135,20 @@ export function createLoopStateController(options: {
|
||||
|
||||
return state
|
||||
},
|
||||
|
||||
setVerificationSessionID(sessionID: string, verificationSessionID: string): RalphLoopState | null {
|
||||
const state = readState(directory, stateDir)
|
||||
if (!state || state.session_id !== sessionID || !state.ultrawork || !state.verification_pending) {
|
||||
return null
|
||||
}
|
||||
|
||||
state.verification_session_id = verificationSessionID
|
||||
|
||||
if (!writeState(directory, state, stateDir)) {
|
||||
return null
|
||||
}
|
||||
|
||||
return state
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ type LoopStateController = {
|
||||
incrementIteration: () => RalphLoopState | null
|
||||
setSessionID: (sessionID: string) => RalphLoopState | null
|
||||
markVerificationPending: (sessionID: string) => RalphLoopState | null
|
||||
setVerificationSessionID: (sessionID: string, verificationSessionID: string) => RalphLoopState | null
|
||||
}
|
||||
type RalphLoopEventHandlerOptions = { directory: string; apiTimeoutMs: number; getTranscriptPath: (sessionID: string) => string | undefined; checkSessionExists?: RalphLoopOptions["checkSessionExists"]; sessionRecovery: SessionRecovery; loopState: LoopStateController }
|
||||
|
||||
@@ -78,14 +79,30 @@ export function createRalphLoopEventHandler(
|
||||
return
|
||||
}
|
||||
|
||||
const transcriptPath = options.getTranscriptPath(sessionID)
|
||||
const completionViaTranscript = detectCompletionInTranscript(
|
||||
transcriptPath,
|
||||
state.completion_promise,
|
||||
state.started_at,
|
||||
)
|
||||
const verificationSessionID = state.verification_pending
|
||||
? state.verification_session_id
|
||||
: undefined
|
||||
const completionSessionID = verificationSessionID ?? (state.verification_pending ? undefined : sessionID)
|
||||
const transcriptPath = completionSessionID ? options.getTranscriptPath(completionSessionID) : undefined
|
||||
const completionViaTranscript = completionSessionID
|
||||
? detectCompletionInTranscript(
|
||||
transcriptPath,
|
||||
state.completion_promise,
|
||||
state.started_at,
|
||||
)
|
||||
: false
|
||||
const completionViaApi = completionViaTranscript
|
||||
? false
|
||||
: verificationSessionID
|
||||
? await detectCompletionInSessionMessages(ctx, {
|
||||
sessionID: verificationSessionID,
|
||||
promise: state.completion_promise,
|
||||
apiTimeoutMs: options.apiTimeoutMs,
|
||||
directory: options.directory,
|
||||
sinceMessageIndex: undefined,
|
||||
})
|
||||
: state.verification_pending
|
||||
? false
|
||||
: await detectCompletionInSessionMessages(ctx, {
|
||||
sessionID,
|
||||
promise: state.completion_promise,
|
||||
|
||||
@@ -62,6 +62,12 @@ export function readState(directory: string, customPath?: string): RalphLoopStat
|
||||
initial_completion_promise: data.initial_completion_promise
|
||||
? stripQuotes(data.initial_completion_promise)
|
||||
: undefined,
|
||||
verification_attempt_id: data.verification_attempt_id
|
||||
? stripQuotes(data.verification_attempt_id)
|
||||
: undefined,
|
||||
verification_session_id: data.verification_session_id
|
||||
? stripQuotes(data.verification_session_id)
|
||||
: undefined,
|
||||
started_at: stripQuotes(data.started_at) || new Date().toISOString(),
|
||||
prompt: body.trim(),
|
||||
session_id: data.session_id ? stripQuotes(data.session_id) : undefined,
|
||||
@@ -100,6 +106,12 @@ export function writeState(
|
||||
const initialCompletionPromiseLine = state.initial_completion_promise
|
||||
? `initial_completion_promise: "${state.initial_completion_promise}"\n`
|
||||
: ""
|
||||
const verificationAttemptLine = state.verification_attempt_id
|
||||
? `verification_attempt_id: "${state.verification_attempt_id}"\n`
|
||||
: ""
|
||||
const verificationSessionLine = state.verification_session_id
|
||||
? `verification_session_id: "${state.verification_session_id}"\n`
|
||||
: ""
|
||||
const messageCountAtStartLine =
|
||||
typeof state.message_count_at_start === "number"
|
||||
? `message_count_at_start: ${state.message_count_at_start}\n`
|
||||
@@ -112,7 +124,7 @@ export function writeState(
|
||||
active: ${state.active}
|
||||
iteration: ${state.iteration}
|
||||
${maxIterationsLine}completion_promise: "${state.completion_promise}"
|
||||
${initialCompletionPromiseLine}started_at: "${state.started_at}"
|
||||
${initialCompletionPromiseLine}${verificationAttemptLine}${verificationSessionLine}started_at: "${state.started_at}"
|
||||
${sessionIdLine}${ultraworkLine}${verificationPendingLine}${strategyLine}${messageCountAtStartLine}---
|
||||
${state.prompt}
|
||||
`
|
||||
|
||||
@@ -7,6 +7,8 @@ export interface RalphLoopState {
|
||||
message_count_at_start?: number
|
||||
completion_promise: string
|
||||
initial_completion_promise?: string
|
||||
verification_attempt_id?: string
|
||||
verification_session_id?: string
|
||||
started_at: string
|
||||
prompt: string
|
||||
session_id?: string
|
||||
|
||||
@@ -4,13 +4,14 @@ import { tmpdir } from "node:os"
|
||||
import { join } from "node:path"
|
||||
import { createRalphLoopHook } from "./index"
|
||||
import { ULTRAWORK_VERIFICATION_PROMISE } from "./constants"
|
||||
import { clearState } from "./storage"
|
||||
import { clearState, writeState } from "./storage"
|
||||
|
||||
describe("ulw-loop verification", () => {
|
||||
const testDir = join(tmpdir(), `ulw-loop-verification-${Date.now()}`)
|
||||
let promptCalls: Array<{ sessionID: string; text: string }>
|
||||
let toastCalls: Array<{ title: string; message: string; variant: string }>
|
||||
let transcriptPath: string
|
||||
let parentTranscriptPath: string
|
||||
let oracleTranscriptPath: string
|
||||
|
||||
function createMockPluginInput() {
|
||||
return {
|
||||
@@ -39,7 +40,8 @@ describe("ulw-loop verification", () => {
|
||||
beforeEach(() => {
|
||||
promptCalls = []
|
||||
toastCalls = []
|
||||
transcriptPath = join(testDir, "transcript.jsonl")
|
||||
parentTranscriptPath = join(testDir, "transcript-parent.jsonl")
|
||||
oracleTranscriptPath = join(testDir, "transcript-oracle.jsonl")
|
||||
|
||||
if (!existsSync(testDir)) {
|
||||
mkdirSync(testDir, { recursive: true })
|
||||
@@ -57,11 +59,11 @@ describe("ulw-loop verification", () => {
|
||||
|
||||
test("#given ulw loop emits DONE #when idle fires #then verification phase starts instead of completing", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
parentTranscriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
|
||||
@@ -69,25 +71,30 @@ describe("ulw-loop verification", () => {
|
||||
|
||||
expect(hook.getState()?.verification_pending).toBe(true)
|
||||
expect(hook.getState()?.completion_promise).toBe(ULTRAWORK_VERIFICATION_PROMISE)
|
||||
expect(hook.getState()?.verification_session_id).toBeUndefined()
|
||||
expect(promptCalls).toHaveLength(1)
|
||||
expect(promptCalls[0].text).toContain('task(subagent_type="oracle"')
|
||||
expect(toastCalls.some((toast) => toast.title === "ULTRAWORK LOOP COMPLETE!")).toBe(false)
|
||||
})
|
||||
|
||||
test("#given ulw loop is awaiting verification #when VERIFIED appears #then loop completes", async () => {
|
||||
test("#given ulw loop is awaiting verification #when VERIFIED appears in oracle session #then loop completes", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
parentTranscriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
writeState(testDir, {
|
||||
...hook.getState()!,
|
||||
verification_session_id: "ses-oracle",
|
||||
})
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `verified <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
|
||||
oracleTranscriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `verified <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
@@ -98,7 +105,7 @@ describe("ulw-loop verification", () => {
|
||||
|
||||
test("#given ulw loop without max iterations #when it continues #then it stays unbounded", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
|
||||
@@ -111,11 +118,11 @@ describe("ulw-loop verification", () => {
|
||||
|
||||
test("#given prior transcript completion from older run #when new ulw loop starts #then old completion is ignored", async () => {
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
parentTranscriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: "2000-01-01T00:00:00.000Z", tool_output: { output: "old <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
|
||||
@@ -128,11 +135,11 @@ describe("ulw-loop verification", () => {
|
||||
|
||||
test("#given ulw loop was awaiting verification #when same session starts again #then verification state is overwritten", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: () => transcriptPath,
|
||||
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
writeFileSync(
|
||||
transcriptPath,
|
||||
parentTranscriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
|
||||
@@ -143,4 +150,26 @@ describe("ulw-loop verification", () => {
|
||||
expect(hook.getState()?.verification_pending).toBeUndefined()
|
||||
expect(hook.getState()?.completion_promise).toBe("DONE")
|
||||
})
|
||||
|
||||
test("#given parent session emits VERIFIED #when oracle session is not tracked #then ulw loop does not complete", async () => {
|
||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||
getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
|
||||
})
|
||||
hook.startLoop("session-123", "Build API", { ultrawork: true })
|
||||
writeFileSync(
|
||||
parentTranscriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
writeFileSync(
|
||||
parentTranscriptPath,
|
||||
`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: `bad parent leak <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>` } })}\n`,
|
||||
)
|
||||
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
|
||||
|
||||
expect(hook.getState()).not.toBeNull()
|
||||
expect(hook.getState()?.verification_pending).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -68,6 +68,7 @@ export function createPluginInterface(args: {
|
||||
}),
|
||||
|
||||
"tool.execute.after": createToolExecuteAfterHandler({
|
||||
ctx,
|
||||
hooks,
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
import { consumeToolMetadata } from "../features/tool-metadata-store"
|
||||
import type { CreatedHooks } from "../create-hooks"
|
||||
import type { PluginContext } from "./types"
|
||||
import { readState, writeState } from "../hooks/ralph-loop/storage"
|
||||
|
||||
const VERIFICATION_ATTEMPT_PATTERN = /<ulw_verification_attempt_id>(.*?)<\/ulw_verification_attempt_id>/i
|
||||
|
||||
export function createToolExecuteAfterHandler(args: {
|
||||
ctx: PluginContext
|
||||
hooks: CreatedHooks
|
||||
}): (
|
||||
input: { tool: string; sessionID: string; callID: string },
|
||||
@@ -9,7 +14,7 @@ export function createToolExecuteAfterHandler(args: {
|
||||
| { title: string; output: string; metadata: Record<string, unknown> }
|
||||
| undefined,
|
||||
) => Promise<void> {
|
||||
const { hooks } = args
|
||||
const { ctx, hooks } = args
|
||||
|
||||
return async (
|
||||
input: { tool: string; sessionID: string; callID: string },
|
||||
@@ -27,6 +32,30 @@ export function createToolExecuteAfterHandler(args: {
|
||||
}
|
||||
}
|
||||
|
||||
if (input.tool === "task") {
|
||||
const sessionId = typeof output.metadata?.sessionId === "string" ? output.metadata.sessionId : undefined
|
||||
const agent = typeof output.metadata?.agent === "string" ? output.metadata.agent : undefined
|
||||
const prompt = typeof output.metadata?.prompt === "string" ? output.metadata.prompt : undefined
|
||||
const verificationAttemptId = prompt?.match(VERIFICATION_ATTEMPT_PATTERN)?.[1]?.trim()
|
||||
const loopState = readState(ctx.directory)
|
||||
|
||||
if (
|
||||
agent === "oracle"
|
||||
&& sessionId
|
||||
&& verificationAttemptId
|
||||
&& loopState?.active === true
|
||||
&& loopState.ultrawork === true
|
||||
&& loopState.verification_pending === true
|
||||
&& loopState.session_id === input.sessionID
|
||||
&& loopState.verification_attempt_id === verificationAttemptId
|
||||
) {
|
||||
writeState(ctx.directory, {
|
||||
...loopState,
|
||||
verification_session_id: sessionId,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
await hooks.claudeCodeHooks?.["tool.execute.after"]?.(input, output)
|
||||
await hooks.toolOutputTruncator?.["tool.execute.after"]?.(input, output)
|
||||
await hooks.preemptiveCompaction?.["tool.execute.after"]?.(input, output)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { PluginContext } from "./types"
|
||||
import { randomUUID } from "node:crypto"
|
||||
|
||||
import { getMainSessionID } from "../features/claude-code-session-state"
|
||||
import { clearBoulderState } from "../features/boulder-state"
|
||||
@@ -6,7 +7,7 @@ import { log } from "../shared"
|
||||
import { resolveSessionAgent } from "./session-agent-resolver"
|
||||
import { parseRalphLoopArguments } from "../hooks/ralph-loop/command-arguments"
|
||||
import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants"
|
||||
import { readState } from "../hooks/ralph-loop/storage"
|
||||
import { readState, writeState } from "../hooks/ralph-loop/storage"
|
||||
|
||||
import type { CreatedHooks } from "../create-hooks"
|
||||
|
||||
@@ -77,8 +78,14 @@ export function createToolExecuteBeforeHandler(args: {
|
||||
&& loopState.session_id === input.sessionID
|
||||
|
||||
if (shouldInjectOracleVerification) {
|
||||
const verificationAttemptId = randomUUID()
|
||||
writeState(ctx.directory, {
|
||||
...loopState,
|
||||
verification_attempt_id: verificationAttemptId,
|
||||
verification_session_id: undefined,
|
||||
})
|
||||
argsObject.run_in_background = false
|
||||
argsObject.prompt = `${prompt ? `${prompt}\n\n` : ""}You are verifying the active ULTRAWORK loop result for this session. Review whether the original task is truly complete: ${loopState.prompt}\n\nIf the work is fully complete, end your response with <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>. If the work is not complete, explain the blocking issues clearly and DO NOT emit that promise.`
|
||||
argsObject.prompt = `${prompt ? `${prompt}\n\n` : ""}You are verifying the active ULTRAWORK loop result for this session. Review whether the original task is truly complete: ${loopState.prompt}\n\nIf the work is fully complete, end your response with <promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>. If the work is not complete, explain the blocking issues clearly and DO NOT emit that promise.\n\n<ulw_verification_attempt_id>${verificationAttemptId}</ulw_verification_attempt_id>`
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,9 +2,10 @@ import { describe, expect, test } from "bun:test"
|
||||
import { mkdirSync, rmSync } from "node:fs"
|
||||
import { tmpdir } from "node:os"
|
||||
import { join } from "node:path"
|
||||
import { createToolExecuteAfterHandler } from "./tool-execute-after"
|
||||
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
|
||||
import { ULTRAWORK_VERIFICATION_PROMISE } from "../hooks/ralph-loop/constants"
|
||||
import { clearState, writeState } from "../hooks/ralph-loop/storage"
|
||||
import { clearState, readState, writeState } from "../hooks/ralph-loop/storage"
|
||||
|
||||
describe("tool.execute.before ultrawork oracle verification", () => {
|
||||
function createCtx(directory: string) {
|
||||
@@ -47,6 +48,7 @@ describe("tool.execute.before ultrawork oracle verification", () => {
|
||||
|
||||
await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)
|
||||
|
||||
expect(readState(directory)?.verification_attempt_id).toBeTruthy()
|
||||
expect(output.args.run_in_background).toBe(false)
|
||||
expect(output.args.prompt).toContain("Ship feature")
|
||||
expect(output.args.prompt).toContain(`<promise>${ULTRAWORK_VERIFICATION_PROMISE}</promise>`)
|
||||
@@ -77,4 +79,138 @@ describe("tool.execute.before ultrawork oracle verification", () => {
|
||||
|
||||
rmSync(directory, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
test("#given ulw loop is awaiting verification #when oracle task finishes #then oracle session id is stored", async () => {
|
||||
const directory = join(tmpdir(), `tool-after-ulw-${Date.now()}`)
|
||||
mkdirSync(directory, { recursive: true })
|
||||
writeState(directory, {
|
||||
active: true,
|
||||
iteration: 3,
|
||||
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
|
||||
initial_completion_promise: "DONE",
|
||||
started_at: new Date().toISOString(),
|
||||
prompt: "Ship feature",
|
||||
session_id: "ses-main",
|
||||
ultrawork: true,
|
||||
verification_pending: true,
|
||||
})
|
||||
|
||||
const beforeHandler = createToolExecuteBeforeHandler({
|
||||
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
|
||||
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
|
||||
})
|
||||
const beforeOutput = {
|
||||
args: {
|
||||
subagent_type: "oracle",
|
||||
run_in_background: true,
|
||||
prompt: "Check it",
|
||||
} as Record<string, unknown>,
|
||||
}
|
||||
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, beforeOutput)
|
||||
|
||||
const handler = createToolExecuteAfterHandler({
|
||||
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
|
||||
hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
|
||||
})
|
||||
|
||||
await handler(
|
||||
{ tool: "task", sessionID: "ses-main", callID: "call-1" },
|
||||
{
|
||||
title: "oracle task",
|
||||
output: "done",
|
||||
metadata: {
|
||||
agent: "oracle",
|
||||
prompt: String(beforeOutput.args.prompt),
|
||||
sessionId: "ses-oracle",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
expect(readState(directory)?.verification_session_id).toBe("ses-oracle")
|
||||
|
||||
clearState(directory)
|
||||
rmSync(directory, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
test("#given newer oracle attempt exists #when older oracle task finishes #then old session does not overwrite active verification", async () => {
|
||||
const directory = join(tmpdir(), `tool-race-ulw-${Date.now()}`)
|
||||
mkdirSync(directory, { recursive: true })
|
||||
writeState(directory, {
|
||||
active: true,
|
||||
iteration: 3,
|
||||
completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
|
||||
initial_completion_promise: "DONE",
|
||||
started_at: new Date().toISOString(),
|
||||
prompt: "Ship feature",
|
||||
session_id: "ses-main",
|
||||
ultrawork: true,
|
||||
verification_pending: true,
|
||||
})
|
||||
|
||||
const beforeHandler = createToolExecuteBeforeHandler({
|
||||
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
|
||||
hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
|
||||
})
|
||||
const afterHandler = createToolExecuteAfterHandler({
|
||||
ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
|
||||
hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
|
||||
})
|
||||
|
||||
const firstOutput = {
|
||||
args: {
|
||||
subagent_type: "oracle",
|
||||
run_in_background: true,
|
||||
prompt: "Check it",
|
||||
} as Record<string, unknown>,
|
||||
}
|
||||
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, firstOutput)
|
||||
const firstAttemptId = readState(directory)?.verification_attempt_id
|
||||
|
||||
const secondOutput = {
|
||||
args: {
|
||||
subagent_type: "oracle",
|
||||
run_in_background: true,
|
||||
prompt: "Check it again",
|
||||
} as Record<string, unknown>,
|
||||
}
|
||||
await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-2" }, secondOutput)
|
||||
const secondAttemptId = readState(directory)?.verification_attempt_id
|
||||
|
||||
expect(firstAttemptId).toBeTruthy()
|
||||
expect(secondAttemptId).toBeTruthy()
|
||||
expect(secondAttemptId).not.toBe(firstAttemptId)
|
||||
|
||||
await afterHandler(
|
||||
{ tool: "task", sessionID: "ses-main", callID: "call-1" },
|
||||
{
|
||||
title: "oracle task",
|
||||
output: "done",
|
||||
metadata: {
|
||||
agent: "oracle",
|
||||
prompt: String(firstOutput.args.prompt),
|
||||
sessionId: "ses-oracle-old",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
expect(readState(directory)?.verification_session_id).toBeUndefined()
|
||||
|
||||
await afterHandler(
|
||||
{ tool: "task", sessionID: "ses-main", callID: "call-2" },
|
||||
{
|
||||
title: "oracle task",
|
||||
output: "done",
|
||||
metadata: {
|
||||
agent: "oracle",
|
||||
prompt: String(secondOutput.args.prompt),
|
||||
sessionId: "ses-oracle-new",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
expect(readState(directory)?.verification_session_id).toBe("ses-oracle-new")
|
||||
|
||||
clearState(directory)
|
||||
rmSync(directory, { recursive: true, force: true })
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user