Merge remote-tracking branch 'origin/dev' into feat/custom-agents

# Conflicts:
#	src/agents/utils.test.ts
#	src/plugin-handlers/agent-config-handler.ts
This commit is contained in:
edxeth
2026-02-26 18:53:29 +01:00
147 changed files with 6360 additions and 1763 deletions

View File

@@ -35,15 +35,15 @@ jobs:
# - Uploads compressed artifacts for the publish job
# =============================================================================
build:
runs-on: ${{ matrix.platform == 'windows-x64' && 'windows-latest' || 'ubuntu-latest' }}
runs-on: ${{ startsWith(matrix.platform, 'windows-') && 'windows-latest' || 'ubuntu-latest' }}
defaults:
run:
shell: bash
strategy:
fail-fast: false
max-parallel: 7
max-parallel: 11
matrix:
platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64]
platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
steps:
- uses: actions/checkout@v4
@@ -95,14 +95,18 @@ jobs:
case "$PLATFORM" in
darwin-arm64) TARGET="bun-darwin-arm64" ;;
darwin-x64) TARGET="bun-darwin-x64" ;;
darwin-x64-baseline) TARGET="bun-darwin-x64-baseline" ;;
linux-x64) TARGET="bun-linux-x64" ;;
linux-x64-baseline) TARGET="bun-linux-x64-baseline" ;;
linux-arm64) TARGET="bun-linux-arm64" ;;
linux-x64-musl) TARGET="bun-linux-x64-musl" ;;
linux-x64-musl-baseline) TARGET="bun-linux-x64-musl-baseline" ;;
linux-arm64-musl) TARGET="bun-linux-arm64-musl" ;;
windows-x64) TARGET="bun-windows-x64" ;;
windows-x64-baseline) TARGET="bun-windows-x64-baseline" ;;
esac
if [ "$PLATFORM" = "windows-x64" ]; then
if [[ "$PLATFORM" == windows-* ]]; then
OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode.exe"
else
OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode"
@@ -119,7 +123,7 @@ jobs:
PLATFORM="${{ matrix.platform }}"
cd packages/${PLATFORM}
if [ "$PLATFORM" = "windows-x64" ]; then
if [[ "$PLATFORM" == windows-* ]]; then
# Windows: use 7z (pre-installed on windows-latest)
7z a -tzip ../../binary-${PLATFORM}.zip bin/ package.json
else
@@ -155,7 +159,7 @@ jobs:
fail-fast: false
max-parallel: 2
matrix:
platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64]
platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
steps:
- name: Check if already published
id: check
@@ -184,7 +188,7 @@ jobs:
PLATFORM="${{ matrix.platform }}"
mkdir -p packages/${PLATFORM}
if [ "$PLATFORM" = "windows-x64" ]; then
if [[ "$PLATFORM" == windows-* ]]; then
unzip binary-${PLATFORM}.zip -d packages/${PLATFORM}/
else
tar -xzvf binary-${PLATFORM}.tar.gz -C packages/${PLATFORM}/

View File

@@ -189,7 +189,7 @@ jobs:
VERSION="${{ steps.version.outputs.version }}"
jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json
for platform in darwin-arm64 darwin-x64 linux-x64 linux-arm64 linux-x64-musl linux-arm64-musl windows-x64; do
for platform in darwin-arm64 darwin-x64 darwin-x64-baseline linux-x64 linux-x64-baseline linux-arm64 linux-x64-musl linux-x64-musl-baseline linux-arm64-musl windows-x64 windows-x64-baseline; do
jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json
mv tmp.json "packages/${platform}/package.json"
done

View File

@@ -24,19 +24,7 @@
"disabled_agents": {
"type": "array",
"items": {
"type": "string",
"enum": [
"sisyphus",
"hephaestus",
"prometheus",
"oracle",
"librarian",
"explore",
"multimodal-looker",
"metis",
"momus",
"atlas"
]
"type": "string"
}
},
"disabled_skills": {
@@ -960,6 +948,9 @@
}
},
"additionalProperties": false
},
"allow_non_gpt_model": {
"type": "boolean"
}
},
"additionalProperties": false
@@ -3474,6 +3465,11 @@
"prompt_append": {
"type": "string"
},
"max_prompt_tokens": {
"type": "integer",
"exclusiveMinimum": 0,
"maximum": 9007199254740991
},
"is_unstable_agent": {
"type": "boolean"
},

62
benchmarks/bun.lock Normal file
View File

@@ -0,0 +1,62 @@
{
"lockfileVersion": 1,
"configVersion": 1,
"workspaces": {
"": {
"name": "hashline-edit-benchmark",
"dependencies": {
"@ai-sdk/openai": "^1.3.0",
"@friendliai/ai-provider": "^1.0.9",
"ai": "^6.0.94",
"zod": "^4.1.0",
},
},
},
"packages": {
"@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="],
"@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="],
"@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="],
"@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
"@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="],
"@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="],
"@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
"@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
"ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="],
"eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
"json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
"secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="],
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
"@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
}
}

193
benchmarks/headless.ts Normal file
View File

@@ -0,0 +1,193 @@
#!/usr/bin/env bun
import { readFile, writeFile, mkdir } from "node:fs/promises"
import { join, dirname } from "node:path"
import { stepCountIs, streamText, type CoreMessage } from "ai"
import { tool } from "ai"
import { createFriendli } from "@friendliai/ai-provider"
import { z } from "zod"
import { formatHashLines } from "../src/tools/hashline-edit/hash-computation"
import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits"
import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations"
import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization"
const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5"
const MAX_STEPS = 50
const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
const emit = (event: Record<string, unknown>) =>
console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event }))
// ── CLI ──────────────────────────────────────────────────────
function parseArgs(): { prompt: string; modelId: string } {
const args = process.argv.slice(2)
let prompt = ""
let modelId = DEFAULT_MODEL
for (let i = 0; i < args.length; i++) {
if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) {
prompt = args[++i]
} else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) {
modelId = args[++i]
} else if (args[i] === "--reasoning-mode" && args[i + 1]) {
i++ // consume
}
// --no-translate, --think consumed silently
}
if (!prompt) {
console.error("Usage: bun run benchmarks/headless.ts -p <prompt> [-m <model>]")
process.exit(1)
}
return { prompt, modelId }
}
// ── Tools ────────────────────────────────────────────────────
const readFileTool = tool({
description: "Read a file with hashline-tagged content (LINE#ID format)",
inputSchema: z.object({ path: z.string().describe("File path") }),
execute: async ({ path }) => {
const fullPath = join(process.cwd(), path)
try {
const content = await readFile(fullPath, "utf-8")
const lines = content.split("\n")
const tagged = formatHashLines(content)
return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}`
} catch {
return `Error: File not found: ${path}`
}
},
})
const editFileTool = tool({
description: "Edit a file using hashline anchors (LINE#ID format)",
inputSchema: z.object({
path: z.string(),
edits: z.array(
z.object({
op: z.enum(["replace", "append", "prepend"]),
pos: z.string().optional(),
end: z.string().optional(),
lines: z.union([z.array(z.string()), z.string(), z.null()]),
})
).min(1),
}),
execute: async ({ path, edits }) => {
const fullPath = join(process.cwd(), path)
try {
let rawContent = ""
let exists = true
try {
rawContent = await readFile(fullPath, "utf-8")
} catch {
exists = false
}
const normalized = normalizeHashlineEdits(edits)
if (!exists) {
const canCreate = normalized.every(
(e) => (e.op === "append" || e.op === "prepend") && !e.pos
)
if (!canCreate) return `Error: File not found: ${path}`
}
const envelope = canonicalizeFileText(rawContent)
const result = applyHashlineEditsWithReport(envelope.content, normalized)
if (result.content === envelope.content) {
return `Error: No changes made to ${path}. The edits produced identical content.`
}
const writeContent = restoreFileText(result.content, envelope)
await mkdir(dirname(fullPath), { recursive: true })
await writeFile(fullPath, writeContent, "utf-8")
const oldLineCount = rawContent.split("\n").length
const newLineCount = writeContent.split("\n").length
const delta = newLineCount - oldLineCount
const sign = delta > 0 ? "+" : ""
const action = exists ? "Updated" : "Created"
return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)`
} catch (error) {
return `Error: ${error instanceof Error ? error.message : String(error)}`
}
},
})
// ── Agent Loop ───────────────────────────────────────────────
async function run() {
const { prompt, modelId } = parseArgs()
const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! })
const model = friendli(modelId)
const tools = { read_file: readFileTool, edit_file: editFileTool }
emit({ type: "user", content: prompt })
const messages: CoreMessage[] = [{ role: "user", content: prompt }]
const system =
"You are a code editing assistant. Use read_file to read files and edit_file to edit them. " +
"Always read a file before editing it to get fresh LINE#ID anchors."
for (let step = 0; step < MAX_STEPS; step++) {
const stream = streamText({
model,
tools,
messages,
system,
stopWhen: stepCountIs(1),
})
let currentText = ""
for await (const part of stream.fullStream) {
switch (part.type) {
case "text-delta":
currentText += part.text
break
case "tool-call":
emit({
type: "tool_call",
tool_call_id: part.toolCallId,
tool_name: part.toolName,
tool_input: part.args,
model: modelId,
})
break
case "tool-result": {
const output = typeof part.result === "string" ? part.result : JSON.stringify(part.result)
const isError = typeof output === "string" && output.startsWith("Error:")
emit({
type: "tool_result",
tool_call_id: part.toolCallId,
output,
...(isError ? { error: output } : {}),
})
break
}
}
const response = await stream.response
messages.push(...response.messages)
const finishReason = await stream.finishReason
if (finishReason !== "tool-calls") {
if (currentText.trim()) {
emit({ type: "assistant", content: currentText, model: modelId })
}
break
}
}
}
// ── Signal + Startup ─────────────────────────────────────────
process.once("SIGINT", () => process.exit(0))
process.once("SIGTERM", () => process.exit(143))
const startTime = Date.now()
run()
.catch((error) => {
emit({ type: "error", error: error instanceof Error ? error.message : String(error) })
process.exit(1)
})
.then(() => {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2)
console.error(`[headless] Completed in ${elapsed}s`)
})

19
benchmarks/package.json Normal file
View File

@@ -0,0 +1,19 @@
{
"name": "hashline-edit-benchmark",
"version": "0.1.0",
"private": true,
"type": "module",
"description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
"scripts": {
"bench:basic": "bun run test-edit-ops.ts",
"bench:edge": "bun run test-edge-cases.ts",
"bench:multi": "bun run test-multi-model.ts",
"bench:all": "bun run bench:basic && bun run bench:edge"
},
"dependencies": {
"ai": "^6.0.94",
"@ai-sdk/openai": "^1.3.0",
"@friendliai/ai-provider": "^1.0.9",
"zod": "^4.1.0"
}
}

File diff suppressed because it is too large Load Diff

808
benchmarks/test-edit-ops.ts Normal file
View File

@@ -0,0 +1,808 @@
#!/usr/bin/env bun
/**
* Comprehensive headless edit_file stress test: 21 operation types
*
* Tests: 5 basic ops + 10 creative cases + 6 whitespace cases
* Each runs via headless mode with its own demo file + prompt.
*
* Usage:
* bun run scripts/test-headless-edit-ops.ts [-m <model>] [--provider <provider>]
*/
import { spawn } from "node:child_process";
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join, resolve } from "node:path";
// ── CLI arg passthrough ───────────────────────────────────────
const extraArgs: string[] = [];
const rawArgs = process.argv.slice(2);
for (let i = 0; i < rawArgs.length; i++) {
const arg = rawArgs[i];
if (
(arg === "-m" || arg === "--model" || arg === "--provider") &&
i + 1 < rawArgs.length
) {
extraArgs.push(arg, rawArgs[i + 1]);
i++;
} else if (arg === "--think" || arg === "--no-translate") {
extraArgs.push(arg);
} else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) {
extraArgs.push(arg, rawArgs[i + 1]);
i++;
}
}
// ── Colors ────────────────────────────────────────────────────
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const YELLOW = "\x1b[33m";
const DIM = "\x1b[2m";
const CYAN = "\x1b[36m";
const RESET = "\x1b[0m";
const pass = (msg: string) => console.log(` ${GREEN}${RESET} ${msg}`);
const fail = (msg: string) => console.log(` ${RED}${RESET} ${msg}`);
const info = (msg: string) => console.log(` ${DIM}${msg}${RESET}`);
const warn = (msg: string) => console.log(` ${YELLOW}${RESET} ${msg}`);
// ── Test case definition ─────────────────────────────────────
interface TestCase {
fileContent: string;
fileName: string;
name: string;
prompt: string;
validate: (content: string) => { passed: boolean; reason: string };
}
const TEST_CASES: TestCase[] = [
{
name: "1. Replace single line",
fileName: "config.txt",
fileContent: [
"host: localhost",
"port: 3000",
"debug: false",
"timeout: 30",
"retries: 3",
].join("\n"),
prompt: [
"Follow these steps exactly:",
"Step 1: Call read_file on config.txt.",
"Step 2: Note the anchor for the port line (line 2).",
"Step 3: Call edit_file with path='config.txt' and edits containing ONE object:",
" { op: 'replace', pos: '<line2 anchor>', lines: ['port: 8080'] }",
"IMPORTANT: pos must be ONLY the anchor (like '2#KB'). lines must be a SEPARATE array field with the new content.",
].join(" "),
validate: (content) => {
const has8080 = content.includes("port: 8080");
const has3000 = content.includes("port: 3000");
if (has8080 && !has3000) {
return { passed: true, reason: "port changed to 8080" };
}
if (has3000) {
return { passed: false, reason: "port still 3000 — edit not applied" };
}
return {
passed: false,
reason: `unexpected content: ${content.slice(0, 100)}`,
};
},
},
{
name: "2. Append after line",
fileName: "fruits.txt",
fileContent: ["apple", "banana", "cherry"].join("\n"),
prompt:
"Read fruits.txt with read_file. Then use edit_file with op='append' to insert a new line 'grape' after the 'banana' line. Use pos='LINE#HASH' of the banana line and lines=['grape'].",
validate: (content) => {
const lines = content.trim().split("\n");
const bananaIdx = lines.findIndex((l) => l.trim() === "banana");
const grapeIdx = lines.findIndex((l) => l.trim() === "grape");
if (grapeIdx === -1) {
return { passed: false, reason: '"grape" not found in file' };
}
if (bananaIdx === -1) {
return { passed: false, reason: '"banana" was removed' };
}
if (grapeIdx !== bananaIdx + 1) {
return {
passed: false,
reason: `"grape" at line ${grapeIdx + 1} but expected after "banana" at line ${bananaIdx + 1}`,
};
}
if (lines.length !== 4) {
return {
passed: false,
reason: `expected 4 lines, got ${lines.length}`,
};
}
return {
passed: true,
reason: '"grape" correctly appended after "banana"',
};
},
},
{
name: "3. Prepend before line",
fileName: "code.txt",
fileContent: ["function greet() {", ' return "hello";', "}"].join("\n"),
prompt:
"Read code.txt with read_file. Then use edit_file with op='prepend' to add '// Greeting function' before the function line. Use pos='LINE#HASH' of the function line and lines=['// Greeting function'].",
validate: (content) => {
const lines = content.trim().split("\n");
const commentIdx = lines.findIndex(
(l) => l.trim().startsWith("//") && l.toLowerCase().includes("greet")
);
const funcIdx = lines.findIndex((l) =>
l.trim().startsWith("function greet")
);
if (commentIdx === -1) {
return { passed: false, reason: "comment line not found" };
}
if (funcIdx === -1) {
return { passed: false, reason: '"function greet" line was removed' };
}
if (commentIdx !== funcIdx - 1) {
return {
passed: false,
reason: `comment at line ${commentIdx + 1} but function at ${funcIdx + 1} — not directly before`,
};
}
return {
passed: true,
reason: "comment correctly prepended before function",
};
},
},
{
name: "4. Range replace (multi-line → single line)",
fileName: "log.txt",
fileContent: [
"=== Log Start ===",
"INFO: started",
"WARN: slow query",
"ERROR: timeout",
"INFO: recovered",
"=== Log End ===",
].join("\n"),
prompt: [
"Follow these steps exactly:",
"Step 1: Call read_file on log.txt to see line anchors.",
"Step 2: Note the anchor for 'WARN: slow query' (line 3) and 'ERROR: timeout' (line 4).",
"Step 3: Call edit_file with path='log.txt' and edits containing ONE object with THREE separate JSON fields:",
" { op: 'replace', pos: '<line3 anchor>', end: '<line4 anchor>', lines: ['RESOLVED: issues cleared'] }",
"CRITICAL: pos, end, and lines are THREE SEPARATE JSON fields. pos is ONLY '3#XX'. end is ONLY '4#YY'. lines is ['RESOLVED: issues cleared'].",
"If edit_file fails or errors, use write_file to write the complete correct file content instead.",
"The correct final content should be: === Log Start ===, INFO: started, RESOLVED: issues cleared, INFO: recovered, === Log End ===",
"Do not make any other changes.",
].join(" "),
validate: (content) => {
const lines = content.trim().split("\n");
const hasResolved = lines.some(
(l) => l.trim() === "RESOLVED: issues cleared"
);
const hasWarn = content.includes("WARN: slow query");
const hasError = content.includes("ERROR: timeout");
if (!hasResolved) {
return {
passed: false,
reason: '"RESOLVED: issues cleared" not found',
};
}
if (hasWarn || hasError) {
return { passed: false, reason: "old WARN/ERROR lines still present" };
}
// Core assertion: 2 old lines removed, 1 new line added = net -1 line
// Allow slight overshoot from model adding extra content
if (lines.length < 4 || lines.length > 6) {
return {
passed: false,
reason: `expected ~5 lines, got ${lines.length}`,
};
}
return {
passed: true,
reason: "range replace succeeded — 2 lines → 1 line",
};
},
},
{
name: "5. Delete line",
fileName: "settings.txt",
fileContent: [
"mode: production",
"debug: true",
"cache: enabled",
"log_level: info",
].join("\n"),
prompt: [
"Follow these steps exactly:",
"Step 1: Call read_file on settings.txt to see line anchors.",
"Step 2: Note the anchor for 'debug: true' (line 2).",
"Step 3: Call edit_file with path='settings.txt' and edits containing ONE object:",
" { op: 'replace', pos: '<line2 anchor>', lines: [] }",
"IMPORTANT: lines must be an empty array [] to delete the line. pos must be ONLY the anchor like '2#SR'.",
].join(" "),
validate: (content) => {
const lines = content.trim().split("\n");
const hasDebug = content.includes("debug: true");
if (hasDebug) {
return { passed: false, reason: '"debug: true" still present' };
}
if (lines.length !== 3) {
return {
passed: false,
reason: `expected 3 lines, got ${lines.length}`,
};
}
if (
!(
content.includes("mode: production") &&
content.includes("cache: enabled")
)
) {
return { passed: false, reason: "other lines were removed" };
}
return { passed: true, reason: '"debug: true" successfully deleted' };
},
},
// ── Creative cases (6-15) ────────────────────────────────────
{
name: "6. Batch edit — two replacements in one call",
fileName: "batch.txt",
fileContent: ["red", "green", "blue", "yellow"].join("\n"),
prompt: [
"Read batch.txt with read_file.",
"Then call edit_file ONCE with path='batch.txt' and edits containing TWO objects:",
" 1) { op: 'replace', pos: '<line1 anchor>', lines: ['crimson'] }",
" 2) { op: 'replace', pos: '<line3 anchor>', lines: ['navy'] }",
"Both edits must be in the SAME edits array in a single edit_file call.",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("crimson")) return { passed: false, reason: "'crimson' not found" };
if (!c.includes("navy")) return { passed: false, reason: "'navy' not found" };
if (c.includes("red")) return { passed: false, reason: "'red' still present" };
if (c.includes("blue")) return { passed: false, reason: "'blue' still present" };
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
return { passed: true, reason: "both lines replaced in single call" };
},
},
{
name: "7. Line expansion — 1 line → 3 lines",
fileName: "expand.txt",
fileContent: ["header", "TODO: implement", "footer"].join("\n"),
prompt: [
"Read expand.txt with read_file.",
"Replace the 'TODO: implement' line (line 2) with THREE lines:",
" 'step 1: init', 'step 2: process', 'step 3: cleanup'",
"Use edit_file with op='replace', pos=<line2 anchor>, lines=['step 1: init', 'step 2: process', 'step 3: cleanup'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("TODO")) return { passed: false, reason: "TODO line still present" };
if (!c.includes("step 1: init")) return { passed: false, reason: "'step 1: init' not found" };
if (!c.includes("step 3: cleanup")) return { passed: false, reason: "'step 3: cleanup' not found" };
if (lines.length !== 5) return { passed: false, reason: `expected 5 lines, got ${lines.length}` };
return { passed: true, reason: "1 line expanded to 3 lines" };
},
},
{
name: "8. Append at EOF",
fileName: "eof.txt",
fileContent: ["line one", "line two"].join("\n"),
prompt: [
"Read eof.txt with read_file.",
"Use edit_file to append 'line three' after the LAST line of the file.",
"Use op='append', pos=<last line anchor>, lines=['line three'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("line three")) return { passed: false, reason: "'line three' not found" };
if (lines[lines.length - 1].trim() !== "line three")
return { passed: false, reason: "'line three' not at end" };
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
return { passed: true, reason: "appended at EOF" };
},
},
{
name: "9. Special characters in content",
fileName: "special.json",
fileContent: [
'{',
' "name": "old-value",',
' "count": 42',
'}',
].join("\n"),
prompt: [
"Read special.json with read_file.",
'Replace the line containing \"name\": \"old-value\" with \"name\": \"new-value\".',
"Use edit_file with op='replace', pos=<that line's anchor>, lines=[' \"name\": \"new-value\",'].",
].join(" "),
validate: (c) => {
if (c.includes("old-value")) return { passed: false, reason: "'old-value' still present" };
if (!c.includes('"new-value"')) return { passed: false, reason: "'new-value' not found" };
if (!c.includes('"count": 42')) return { passed: false, reason: "other content was modified" };
return { passed: true, reason: "JSON value replaced with special chars intact" };
},
},
{
name: "10. Replace first line",
fileName: "first.txt",
fileContent: ["OLD HEADER", "body content", "footer"].join("\n"),
prompt: [
"Read first.txt with read_file.",
"Replace the very first line 'OLD HEADER' with 'NEW HEADER'.",
"Use edit_file with op='replace', pos=<line1 anchor>, lines=['NEW HEADER'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("OLD HEADER")) return { passed: false, reason: "'OLD HEADER' still present" };
if (lines[0].trim() !== "NEW HEADER") return { passed: false, reason: "first line is not 'NEW HEADER'" };
if (!c.includes("body content")) return { passed: false, reason: "body was modified" };
return { passed: true, reason: "first line replaced" };
},
},
{
name: "11. Replace last line",
fileName: "last.txt",
fileContent: ["alpha", "bravo", "OLD_FOOTER"].join("\n"),
prompt: [
"Read last.txt with read_file.",
"Replace the last line 'OLD_FOOTER' with 'NEW_FOOTER'.",
"Use edit_file with op='replace', pos=<last line anchor>, lines=['NEW_FOOTER'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("OLD_FOOTER")) return { passed: false, reason: "'OLD_FOOTER' still present" };
if (lines[lines.length - 1].trim() !== "NEW_FOOTER")
return { passed: false, reason: "last line is not 'NEW_FOOTER'" };
return { passed: true, reason: "last line replaced" };
},
},
{
name: "12. Adjacent line edits",
fileName: "adjacent.txt",
fileContent: ["aaa", "bbb", "ccc", "ddd"].join("\n"),
prompt: [
"Read adjacent.txt with read_file.",
"Replace line 2 ('bbb') with 'BBB' and line 3 ('ccc') with 'CCC'.",
"Use edit_file with TWO edits in the same call:",
" { op: 'replace', pos: <line2 anchor>, lines: ['BBB'] }",
" { op: 'replace', pos: <line3 anchor>, lines: ['CCC'] }",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("bbb")) return { passed: false, reason: "'bbb' still present" };
if (c.includes("ccc")) return { passed: false, reason: "'ccc' still present" };
if (!c.includes("BBB")) return { passed: false, reason: "'BBB' not found" };
if (!c.includes("CCC")) return { passed: false, reason: "'CCC' not found" };
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
return { passed: true, reason: "two adjacent lines replaced" };
},
},
{
name: "13. Prepend multi-line block",
fileName: "block.py",
fileContent: ["def main():", " print('hello')", "", "main()"].join("\n"),
prompt: [
"Read block.py with read_file.",
"Prepend a 2-line comment block before 'def main():' (line 1).",
"The two lines are: '# Author: test' and '# Date: 2025-01-01'.",
"Use edit_file with op='prepend', pos=<line1 anchor>, lines=['# Author: test', '# Date: 2025-01-01'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("# Author: test")) return { passed: false, reason: "author comment not found" };
if (!c.includes("# Date: 2025-01-01")) return { passed: false, reason: "date comment not found" };
const defIdx = lines.findIndex((l) => l.startsWith("def main"));
const authorIdx = lines.findIndex((l) => l.includes("Author"));
if (authorIdx >= defIdx) return { passed: false, reason: "comments not before def" };
return { passed: true, reason: "2-line block prepended before function" };
},
},
{
name: "14. Delete range — 3 consecutive lines",
fileName: "cleanup.txt",
fileContent: ["keep1", "remove-a", "remove-b", "remove-c", "keep2"].join("\n"),
prompt: [
"Read cleanup.txt with read_file.",
"Delete lines 2-4 ('remove-a', 'remove-b', 'remove-c') using a single range replace.",
"Use edit_file with op='replace', pos=<line2 anchor>, end=<line4 anchor>, lines=[].",
"An empty lines array deletes the range.",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("remove")) return { passed: false, reason: "'remove' lines still present" };
if (!c.includes("keep1")) return { passed: false, reason: "'keep1' was deleted" };
if (!c.includes("keep2")) return { passed: false, reason: "'keep2' was deleted" };
if (lines.length !== 2) return { passed: false, reason: `expected 2 lines, got ${lines.length}` };
return { passed: true, reason: "3 consecutive lines deleted via range" };
},
},
{
name: "15. Replace with duplicate-content line",
fileName: "dupes.txt",
fileContent: ["item", "item", "item", "item"].join("\n"),
prompt: [
"Read dupes.txt with read_file. All 4 lines have the same text 'item'.",
"Replace ONLY line 3 with 'CHANGED'. Do NOT modify any other line.",
"Use edit_file with op='replace', pos=<line3 anchor>, lines=['CHANGED'].",
"The anchor hash uniquely identifies line 3 even though the content is identical.",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("CHANGED")) return { passed: false, reason: "'CHANGED' not found" };
const changedCount = lines.filter((l) => l.trim() === "CHANGED").length;
const itemCount = lines.filter((l) => l.trim() === "item").length;
if (changedCount !== 1) return { passed: false, reason: `expected 1 CHANGED, got ${changedCount}` };
if (itemCount !== 3) return { passed: false, reason: `expected 3 item lines, got ${itemCount}` };
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
return { passed: true, reason: "only line 3 changed among duplicates" };
},
},
// ── Whitespace cases (16-21) ──────────────────────────────────
{
name: "16. Fix indentation — 2 spaces → 4 spaces",
fileName: "indent.js",
fileContent: ["function foo() {", " const x = 1;", " return x;", "}"].join("\n"),
prompt: [
"Read indent.js with read_file.",
"Replace line 2 ' const x = 1;' (2-space indent) with ' const x = 1;' (4-space indent).",
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' const x = 1;'].",
"The ONLY change is the indentation: 2 spaces → 4 spaces. Content stays the same.",
].join(" "),
validate: (c) => {
const lines = c.split("\n");
const line2 = lines[1];
if (!line2) return { passed: false, reason: "line 2 missing" };
if (line2 === " const x = 1;") return { passed: true, reason: "indentation fixed to 4 spaces" };
if (line2 === " const x = 1;") return { passed: false, reason: "still 2-space indent" };
return { passed: false, reason: `unexpected line 2: '${line2}'` };
},
},
{
name: "17. Replace preserving leading whitespace",
fileName: "preserve.py",
fileContent: [
"class Foo:",
" def old_method(self):",
" pass",
].join("\n"),
prompt: [
"Read preserve.py with read_file.",
"Replace line 2 ' def old_method(self):' with ' def new_method(self):'.",
"Keep the 4-space indentation. Only change the method name.",
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' def new_method(self):'].",
].join(" "),
validate: (c) => {
if (c.includes("old_method")) return { passed: false, reason: "'old_method' still present" };
const lines = c.split("\n");
const methodLine = lines.find((l) => l.includes("new_method"));
if (!methodLine) return { passed: false, reason: "'new_method' not found" };
if (!methodLine.startsWith(" ")) return { passed: false, reason: "indentation lost" };
return { passed: true, reason: "method renamed with indentation preserved" };
},
},
{
name: "18. Insert blank line between sections",
fileName: "sections.txt",
fileContent: ["[section-a]", "value-a=1", "[section-b]", "value-b=2"].join("\n"),
prompt: [
"Read sections.txt with read_file.",
"Insert a blank empty line between 'value-a=1' (line 2) and '[section-b]' (line 3).",
"Use edit_file with op='append', pos=<line2 anchor>, lines=[''].",
"lines=[''] inserts one empty line.",
].join(" "),
validate: (c) => {
const lines = c.split("\n");
const valAIdx = lines.findIndex((l) => l.includes("value-a=1"));
const secBIdx = lines.findIndex((l) => l.includes("[section-b]"));
if (valAIdx === -1) return { passed: false, reason: "'value-a=1' missing" };
if (secBIdx === -1) return { passed: false, reason: "'[section-b]' missing" };
if (secBIdx - valAIdx < 2) return { passed: false, reason: "no blank line between sections" };
const between = lines[valAIdx + 1];
if (between.trim() !== "") return { passed: false, reason: `line between is '${between}', not blank` };
return { passed: true, reason: "blank line inserted between sections" };
},
},
{
name: "19. Delete blank line",
fileName: "noblank.txt",
fileContent: ["first", "", "second", "third"].join("\n"),
prompt: [
"Read noblank.txt with read_file.",
"Delete the empty blank line (line 2). Use edit_file with op='replace', pos=<line2 anchor>, lines=[].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
if (lines[0].trim() !== "first") return { passed: false, reason: "'first' not on line 1" };
if (lines[1].trim() !== "second") return { passed: false, reason: "'second' not on line 2" };
return { passed: true, reason: "blank line deleted" };
},
},
{
name: "20. Tab → spaces conversion",
fileName: "tabs.txt",
fileContent: ["start", "\tindented-with-tab", "end"].join("\n"),
prompt: [
"Read tabs.txt with read_file.",
"Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: [' indented-with-spaces'] }].",
"Expected final line 2 to be 4 spaces followed by indented-with-spaces.",
].join(" "),
validate: (c) => {
if (c.includes("\t")) return { passed: false, reason: "tab still present" };
if (!c.includes(" indented-with-spaces"))
return { passed: false, reason: "' indented-with-spaces' not found" };
if (!c.includes("start")) return { passed: false, reason: "'start' was modified" };
return { passed: true, reason: "tab converted to 4 spaces" };
},
},
{
name: "21. Deeply nested indent replacement",
fileName: "nested.ts",
fileContent: [
"if (a) {",
" if (b) {",
" if (c) {",
" old_call();",
" }",
" }",
"}",
].join("\n"),
prompt: [
"Read nested.ts with read_file.",
"Replace line 4 ' old_call();' with ' new_call();'.",
"Preserve the exact 6-space indentation. Only change the function name.",
"Use edit_file with op='replace', pos=<line4 anchor>, lines=[' new_call();'].",
].join(" "),
validate: (c) => {
if (c.includes("old_call")) return { passed: false, reason: "'old_call' still present" };
const lines = c.split("\n");
const callLine = lines.find((l) => l.includes("new_call"));
if (!callLine) return { passed: false, reason: "'new_call' not found" };
const leadingSpaces = callLine.match(/^ */)?.[0].length ?? 0;
if (leadingSpaces !== 6) return { passed: false, reason: `expected 6-space indent, got ${leadingSpaces}` };
return { passed: true, reason: "deeply nested line replaced with indent preserved" };
},
},
];
// ── JSONL event types ─────────────────────────────────────────
interface ToolCallEvent {
tool_call_id: string;
tool_input: Record<string, unknown>;
tool_name: string;
type: "tool_call";
}
interface ToolResultEvent {
error?: string;
output: string;
tool_call_id: string;
type: "tool_result";
}
interface AnyEvent {
type: string;
[key: string]: unknown;
}
// ── Run single test case ─────────────────────────────────────
async function runTestCase(
tc: TestCase,
testDir: string
): Promise<{
passed: boolean;
editCalls: number;
editSuccesses: number;
duration: number;
}> {
const testFile = join(testDir, tc.fileName);
writeFileSync(testFile, tc.fileContent, "utf-8");
const headlessScript = resolve(import.meta.dir, "headless.ts");
const headlessArgs = [
"run",
headlessScript,
"-p",
tc.prompt,
"--no-translate",
...extraArgs,
];
const startTime = Date.now();
const output = await new Promise<string>((res, reject) => {
const proc = spawn("bun", headlessArgs, {
cwd: testDir,
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
proc.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString();
});
proc.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString();
});
const timeout = setTimeout(
() => {
proc.kill("SIGTERM");
reject(new Error("Timed out after 4 minutes"));
},
4 * 60 * 1000
);
proc.on("close", (code) => {
clearTimeout(timeout);
if (code !== 0) {
reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`));
} else {
res(stdout);
}
});
proc.on("error", (err) => {
clearTimeout(timeout);
reject(err);
});
});
const duration = Date.now() - startTime;
// Parse events
const events: AnyEvent[] = [];
for (const line of output.split("\n").filter((l) => l.trim())) {
try {
events.push(JSON.parse(line) as AnyEvent);
} catch {
// skip non-JSON
}
}
const toolCalls = events.filter(
(e) => e.type === "tool_call"
) as unknown as ToolCallEvent[];
const toolResults = events.filter(
(e) => e.type === "tool_result"
) as unknown as ToolResultEvent[];
const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file");
const editCallIds = new Set(editCalls.map((e) => e.tool_call_id));
const editResults = toolResults.filter((e) =>
editCallIds.has(e.tool_call_id)
);
const editSuccesses = editResults.filter((e) => !e.error);
// Show blocked calls
const editErrors = editResults.filter((e) => e.error);
for (const err of editErrors) {
const matchingCall = editCalls.find(
(c) => c.tool_call_id === err.tool_call_id
);
info(` blocked: ${err.error?.slice(0, 120)}`);
if (matchingCall) {
info(` input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`);
}
}
// Validate file content
let finalContent: string;
try {
finalContent = readFileSync(testFile, "utf-8");
} catch {
return {
passed: false,
editCalls: editCalls.length,
editSuccesses: editSuccesses.length,
duration,
};
}
const validation = tc.validate(finalContent);
return {
passed: validation.passed,
editCalls: editCalls.length,
editSuccesses: editSuccesses.length,
duration,
};
}
// ── Main ──────────────────────────────────────────────────────
const main = async () => {
console.log(`\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n`);
const testDir = join(tmpdir(), `edit-ops-${Date.now()}`);
mkdirSync(testDir, { recursive: true });
info(`Test dir: ${testDir}`);
console.log();
let totalPassed = 0;
const results: { name: string; passed: boolean; detail: string }[] = [];
for (const tc of TEST_CASES) {
console.log(`${CYAN}${BOLD}${tc.name}${RESET}`);
info(`File: ${tc.fileName}`);
info(`Prompt: "${tc.prompt.slice(0, 80)}..."`);
try {
const result = await runTestCase(tc, testDir);
const status = result.passed
? `${GREEN}PASS${RESET}`
: `${RED}FAIL${RESET}`;
const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`;
console.log(` ${status}${detail}`);
if (result.passed) {
totalPassed++;
// Validate the file to show reason
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
const v = tc.validate(content);
pass(v.reason);
} else {
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
const v = tc.validate(content);
fail(v.reason);
info(
`Final content:\n${content
.split("\n")
.map((l, i) => ` ${i + 1}: ${l}`)
.join("\n")}`
);
}
results.push({ name: tc.name, passed: result.passed, detail });
} catch (error) {
const msg = error instanceof Error ? error.message : String(error);
console.log(` ${RED}ERROR${RESET}${msg.slice(0, 200)}`);
fail(msg.slice(0, 200));
results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) });
}
// Reset file for next test (in case of side effects)
try {
rmSync(join(testDir, tc.fileName), { force: true });
} catch {}
console.log();
}
// Summary
console.log(`${BOLD}━━━ Summary ━━━${RESET}`);
for (const r of results) {
const icon = r.passed ? `${GREEN}${RESET}` : `${RED}${RESET}`;
console.log(` ${icon} ${r.name}${r.detail}`);
}
console.log();
console.log(
`${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}`
);
// Cleanup
try {
rmSync(testDir, { recursive: true, force: true });
} catch {}
if (totalPassed === TEST_CASES.length) {
console.log(
`\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n`
);
process.exit(0);
} else {
console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`);
process.exit(1);
}
};
main();

View File

@@ -0,0 +1,280 @@
#!/usr/bin/env bun
/**
* Multi-model edit_file test runner
*
* Runs test-headless-edit-ops.ts against every available model
* and produces a summary table.
*
* Usage:
* bun run scripts/test-multi-model-edit.ts [--timeout <seconds>]
*/
import { spawn } from "node:child_process";
import { resolve } from "node:path";
// ── Models ────────────────────────────────────────────────────
const MODELS = [
{ id: "MiniMaxAI/MiniMax-M2.5", short: "M2.5" },
// { id: "MiniMaxAI/MiniMax-M2.1", short: "M2.1" }, // masked: slow + timeout-prone
// { id: "zai-org/GLM-5", short: "GLM-5" }, // masked: API 503
{ id: "zai-org/GLM-4.7", short: "GLM-4.7" },
];
// ── CLI args ──────────────────────────────────────────────────
let perModelTimeoutSec = 900; // 15 min default per model (5 tests)
const rawArgs = process.argv.slice(2);
for (let i = 0; i < rawArgs.length; i++) {
if (rawArgs[i] === "--timeout" && i + 1 < rawArgs.length) {
const parsed = Number.parseInt(rawArgs[i + 1], 10);
if (Number.isNaN(parsed) || parsed <= 0) {
console.error(`Invalid --timeout value: ${rawArgs[i + 1]}`);
process.exit(1);
}
perModelTimeoutSec = parsed;
i++;
}
// ── Colors ────────────────────────────────────────────────────
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const YELLOW = "\x1b[33m";
const DIM = "\x1b[2m";
const CYAN = "\x1b[36m";
const RESET = "\x1b[0m";
// ── Types ─────────────────────────────────────────────────────
interface TestResult {
detail: string;
name: string;
passed: boolean;
}
interface ModelResult {
durationMs: number;
error?: string;
modelId: string;
modelShort: string;
tests: TestResult[];
totalPassed: number;
totalTests: number;
}
// ── Parse test-headless-edit-ops stdout ───────────────────────
function parseOpsOutput(stdout: string): TestResult[] {
const results: TestResult[] = [];
// Match lines like: " PASS — edit_file: 1/1 succeeded, 32.5s"
// or " FAIL — edit_file: 0/3 succeeded, 15.2s"
// or " ERROR — Timed out after 10 minutes"
// Following a line like: "1. Replace single line"
const lines = stdout.split("\n");
let currentTestName = "";
for (const line of lines) {
// Detect test name: starts with ANSI-colored bold cyan + "N. Name"
// Strip ANSI codes for matching
const stripped = line.replace(/\x1b\[[0-9;]*m/g, "");
// Test name pattern: "N. <name>"
const testNameMatch = stripped.match(/^\s*(\d+\.\s+.+)$/);
if (
testNameMatch &&
!stripped.includes("—") &&
!stripped.includes("✓") &&
!stripped.includes("✗")
) {
currentTestName = testNameMatch[1].trim();
continue;
}
// Result line: PASS/FAIL/ERROR
if (currentTestName && stripped.includes("PASS")) {
const detail = stripped.replace(/^\s*PASS\s*—?\s*/, "").trim();
results.push({
name: currentTestName,
passed: true,
detail: detail || "passed",
});
currentTestName = "";
} else if (currentTestName && stripped.includes("FAIL")) {
const detail = stripped.replace(/^\s*FAIL\s*—?\s*/, "").trim();
results.push({
name: currentTestName,
passed: false,
detail: detail || "failed",
});
currentTestName = "";
} else if (currentTestName && stripped.includes("ERROR")) {
const detail = stripped.replace(/^\s*ERROR\s*—?\s*/, "").trim();
results.push({
name: currentTestName,
passed: false,
detail: detail || "error",
});
currentTestName = "";
}
}
return results;
}
// ── Run one model ────────────────────────────────────────────
async function runModel(model: {
id: string;
short: string;
}): Promise<ModelResult> {
const opsScript = resolve(import.meta.dir, "test-edit-ops.ts");
const startTime = Date.now();
return new Promise<ModelResult>((resolvePromise) => {
const proc = spawn(
"bun",
["run", opsScript, "-m", model.id, "--no-translate"],
{
cwd: resolve(import.meta.dir),
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
stdio: ["ignore", "pipe", "pipe"],
}
);
let stdout = "";
let stderr = "";
proc.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString();
});
proc.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString();
});
const timeout = setTimeout(() => {
proc.kill("SIGTERM");
resolvePromise({
modelId: model.id,
modelShort: model.short,
tests: [],
totalPassed: 0,
totalTests: 0,
durationMs: Date.now() - startTime,
error: `Timed out after ${perModelTimeoutSec}s`,
});
}, perModelTimeoutSec * 1000);
proc.on("close", () => {
clearTimeout(timeout);
const tests = parseOpsOutput(stdout);
const totalPassed = tests.filter((t) => t.passed).length;
resolvePromise({
modelId: model.id,
modelShort: model.short,
tests,
totalPassed,
totalTests: Math.max(tests.length, 5),
durationMs: Date.now() - startTime,
});
});
proc.on("error", (err) => {
clearTimeout(timeout);
resolvePromise({
modelId: model.id,
modelShort: model.short,
tests: [],
totalPassed: 0,
totalTests: 0,
durationMs: Date.now() - startTime,
error: err.message,
});
});
});
}
// ── Main ──────────────────────────────────────────────────────
const main = async () => {
console.log(`\n${BOLD}═══ Multi-Model edit_file Test Runner ═══${RESET}\n`);
console.log(`${DIM}Models: ${MODELS.map((m) => m.short).join(", ")}${RESET}`);
console.log(`${DIM}Timeout: ${perModelTimeoutSec}s per model${RESET}`);
console.log();
const allResults: ModelResult[] = [];
for (const model of MODELS) {
console.log(`${CYAN}${BOLD}▶ Testing ${model.short} (${model.id})${RESET}`);
const result = await runModel(model);
allResults.push(result);
const timeStr = `${(result.durationMs / 1000).toFixed(1)}s`;
if (result.error) {
console.log(` ${RED}ERROR${RESET}: ${result.error} (${timeStr})`);
} else {
const color =
result.totalPassed === result.totalTests
? GREEN
: result.totalPassed > 0
? YELLOW
: RED;
console.log(
` ${color}${result.totalPassed}/${result.totalTests} passed${RESET} (${timeStr})`
);
for (const t of result.tests) {
const icon = t.passed ? `${GREEN}${RESET}` : `${RED}${RESET}`;
console.log(` ${icon} ${t.name}`);
}
}
console.log();
}
// ── Summary Table ──────────────────────────────────────────
console.log(`${BOLD}═══ Summary ═══${RESET}\n`);
// Per-model results
for (const r of allResults) {
const timeStr = `${(r.durationMs / 1000).toFixed(0)}s`;
const color = r.error ? RED : r.totalPassed === r.totalTests ? GREEN : r.totalPassed > 0 ? YELLOW : RED;
const label = r.error ? `ERROR: ${r.error}` : `${r.totalPassed}/${r.totalTests}`;
console.log(` ${r.modelShort.padEnd(8)} ${color}${label}${RESET} (${timeStr})`);
for (const t of r.tests) {
const icon = t.passed ? `${GREEN}${RESET}` : `${RED}${RESET}`;
console.log(` ${icon} ${t.name}`);
}
}
console.log();
// Overall
const totalModels = allResults.length;
const erroredModels = allResults.filter((r) => r.error).length;
const perfectModels = allResults.filter(
(r) => !r.error && r.totalPassed === r.totalTests && r.totalTests > 0
).length;
console.log(
`${BOLD}Models with 100%: ${perfectModels}/${totalModels}${RESET}`
);
const overallPassed = allResults.reduce((sum, r) => sum + r.totalPassed, 0);
const overallTotal = allResults.reduce((sum, r) => sum + r.totalTests, 0);
console.log(
`${BOLD}Overall: ${overallPassed}/${overallTotal} (${Math.round((overallPassed / overallTotal) * 100)}%)${RESET}`
);
console.log();
if (erroredModels > 0) {
console.log(
`${BOLD}${RED}${erroredModels} model(s) errored. See details above.${RESET}\n`
);
process.exit(1);
} else if (perfectModels === totalModels) {
console.log(`${BOLD}${GREEN}🎉 ALL MODELS PASSED ALL TESTS!${RESET}\n`);
process.exit(0);
} else {
console.log(
`${BOLD}${YELLOW}Some models have failures. See details above.${RESET}\n`
);
process.exit(1);
}
};
main();

View File

@@ -3,8 +3,9 @@
// Wrapper script that detects platform and spawns the correct binary
import { spawnSync } from "node:child_process";
import { readFileSync } from "node:fs";
import { createRequire } from "node:module";
import { getPlatformPackage, getBinaryPath } from "./platform.js";
import { getPlatformPackageCandidates, getBinaryPath } from "./platform.js";
const require = createRequire(import.meta.url);
@@ -26,55 +27,116 @@ function getLibcFamily() {
}
}
function supportsAvx2() {
if (process.arch !== "x64") {
return null;
}
if (process.env.OH_MY_OPENCODE_FORCE_BASELINE === "1") {
return false;
}
if (process.platform === "linux") {
try {
const cpuInfo = readFileSync("/proc/cpuinfo", "utf8").toLowerCase();
return cpuInfo.includes("avx2");
} catch {
return null;
}
}
if (process.platform === "darwin") {
const probe = spawnSync("sysctl", ["-n", "machdep.cpu.leaf7_features"], {
encoding: "utf8",
});
if (probe.error || probe.status !== 0) {
return null;
}
return probe.stdout.toUpperCase().includes("AVX2");
}
return null;
}
function getSignalExitCode(signal) {
const signalCodeByName = {
SIGINT: 2,
SIGILL: 4,
SIGKILL: 9,
SIGTERM: 15,
};
return 128 + (signalCodeByName[signal] ?? 1);
}
function main() {
const { platform, arch } = process;
const libcFamily = getLibcFamily();
const avx2Supported = supportsAvx2();
// Get platform package name
let pkg;
let packageCandidates;
try {
pkg = getPlatformPackage({ platform, arch, libcFamily });
packageCandidates = getPlatformPackageCandidates({
platform,
arch,
libcFamily,
preferBaseline: avx2Supported === false,
});
} catch (error) {
console.error(`\noh-my-opencode: ${error.message}\n`);
process.exit(1);
}
// Resolve binary path
const binRelPath = getBinaryPath(pkg, platform);
let binPath;
try {
binPath = require.resolve(binRelPath);
} catch {
const resolvedBinaries = packageCandidates
.map((pkg) => {
try {
return { pkg, binPath: require.resolve(getBinaryPath(pkg, platform)) };
} catch {
return null;
}
})
.filter((entry) => entry !== null);
if (resolvedBinaries.length === 0) {
console.error(`\noh-my-opencode: Platform binary not installed.`);
console.error(`\nYour platform: ${platform}-${arch}${libcFamily === "musl" ? "-musl" : ""}`);
console.error(`Expected package: ${pkg}`);
console.error(`Expected packages (in order): ${packageCandidates.join(", ")}`);
console.error(`\nTo fix, run:`);
console.error(` npm install ${pkg}\n`);
console.error(` npm install ${packageCandidates[0]}\n`);
process.exit(1);
}
// Spawn the binary
const result = spawnSync(binPath, process.argv.slice(2), {
stdio: "inherit",
});
// Handle spawn errors
if (result.error) {
console.error(`\noh-my-opencode: Failed to execute binary.`);
console.error(`Error: ${result.error.message}\n`);
process.exit(2);
}
// Handle signals
if (result.signal) {
const signalNum = result.signal === "SIGTERM" ? 15 :
result.signal === "SIGKILL" ? 9 :
result.signal === "SIGINT" ? 2 : 1;
process.exit(128 + signalNum);
for (let index = 0; index < resolvedBinaries.length; index += 1) {
const currentBinary = resolvedBinaries[index];
const hasFallback = index < resolvedBinaries.length - 1;
const result = spawnSync(currentBinary.binPath, process.argv.slice(2), {
stdio: "inherit",
});
if (result.error) {
if (hasFallback) {
continue;
}
console.error(`\noh-my-opencode: Failed to execute binary.`);
console.error(`Error: ${result.error.message}\n`);
process.exit(2);
}
if (result.signal === "SIGILL" && hasFallback) {
continue;
}
if (result.signal) {
process.exit(getSignalExitCode(result.signal));
}
process.exit(result.status ?? 1);
}
process.exit(result.status ?? 1);
process.exit(1);
}
main();

14
bin/platform.d.ts vendored Normal file
View File

@@ -0,0 +1,14 @@
export declare function getPlatformPackage(options: {
platform: string;
arch: string;
libcFamily?: string | null;
}): string;
export declare function getPlatformPackageCandidates(options: {
platform: string;
arch: string;
libcFamily?: string | null;
preferBaseline?: boolean;
}): string[];
export declare function getBinaryPath(pkg: string, platform: string): string;

View File

@@ -26,6 +26,50 @@ export function getPlatformPackage({ platform, arch, libcFamily }) {
return `oh-my-opencode-${os}-${arch}${suffix}`;
}
/** @param {{ platform: string, arch: string, libcFamily?: string | null, preferBaseline?: boolean }} options */
export function getPlatformPackageCandidates({ platform, arch, libcFamily, preferBaseline = false }) {
const primaryPackage = getPlatformPackage({ platform, arch, libcFamily });
const baselinePackage = getBaselinePlatformPackage({ platform, arch, libcFamily });
if (!baselinePackage) {
return [primaryPackage];
}
return preferBaseline ? [baselinePackage, primaryPackage] : [primaryPackage, baselinePackage];
}
/** @param {{ platform: string, arch: string, libcFamily?: string | null }} options */
function getBaselinePlatformPackage({ platform, arch, libcFamily }) {
if (arch !== "x64") {
return null;
}
if (platform === "darwin") {
return "oh-my-opencode-darwin-x64-baseline";
}
if (platform === "win32") {
return "oh-my-opencode-windows-x64-baseline";
}
if (platform === "linux") {
if (libcFamily === null || libcFamily === undefined) {
throw new Error(
"Could not detect libc on Linux. " +
"Please ensure detect-libc is installed or report this issue."
);
}
if (libcFamily === "musl") {
return "oh-my-opencode-linux-x64-musl-baseline";
}
return "oh-my-opencode-linux-x64-baseline";
}
return null;
}
/**
* Get the path to the binary within a platform package
* @param {string} pkg Package name

View File

@@ -1,6 +1,6 @@
// bin/platform.test.ts
import { describe, expect, test } from "bun:test";
import { getPlatformPackage, getBinaryPath } from "./platform.js";
import { getBinaryPath, getPlatformPackage, getPlatformPackageCandidates } from "./platform.js";
describe("getPlatformPackage", () => {
// #region Darwin platforms
@@ -146,3 +146,58 @@ describe("getBinaryPath", () => {
expect(result).toBe("oh-my-opencode-linux-x64/bin/oh-my-opencode");
});
});
describe("getPlatformPackageCandidates", () => {
test("returns x64 and baseline candidates for Linux glibc", () => {
// #given Linux x64 with glibc
const input = { platform: "linux", arch: "x64", libcFamily: "glibc" };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then returns modern first then baseline fallback
expect(result).toEqual([
"oh-my-opencode-linux-x64",
"oh-my-opencode-linux-x64-baseline",
]);
});
test("returns x64 musl and baseline candidates for Linux musl", () => {
// #given Linux x64 with musl
const input = { platform: "linux", arch: "x64", libcFamily: "musl" };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then returns musl modern first then musl baseline fallback
expect(result).toEqual([
"oh-my-opencode-linux-x64-musl",
"oh-my-opencode-linux-x64-musl-baseline",
]);
});
test("returns baseline first when preferBaseline is true", () => {
// #given Windows x64 and baseline preference
const input = { platform: "win32", arch: "x64", preferBaseline: true };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then baseline package is preferred first
expect(result).toEqual([
"oh-my-opencode-windows-x64-baseline",
"oh-my-opencode-windows-x64",
]);
});
test("returns only one candidate for ARM64", () => {
// #given non-x64 platform
const input = { platform: "linux", arch: "arm64", libcFamily: "glibc" };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then baseline fallback is not included
expect(result).toEqual(["oh-my-opencode-linux-arm64"]);
});
});

View File

@@ -77,11 +77,15 @@
"optionalDependencies": {
"oh-my-opencode-darwin-arm64": "3.8.5",
"oh-my-opencode-darwin-x64": "3.8.5",
"oh-my-opencode-darwin-x64-baseline": "3.8.5",
"oh-my-opencode-linux-arm64": "3.8.5",
"oh-my-opencode-linux-arm64-musl": "3.8.5",
"oh-my-opencode-linux-x64": "3.8.5",
"oh-my-opencode-linux-x64-baseline": "3.8.5",
"oh-my-opencode-linux-x64-musl": "3.8.5",
"oh-my-opencode-windows-x64": "3.8.5"
"oh-my-opencode-linux-x64-musl-baseline": "3.8.5",
"oh-my-opencode-windows-x64": "3.8.5",
"oh-my-opencode-windows-x64-baseline": "3.8.5"
},
"trustedDependencies": [
"@ast-grep/cli",

View File

@@ -2,7 +2,7 @@
// Runs after npm install to verify platform binary is available
import { createRequire } from "node:module";
import { getPlatformPackage, getBinaryPath } from "./bin/platform.js";
import { getPlatformPackageCandidates, getBinaryPath } from "./bin/platform.js";
const require = createRequire(import.meta.url);
@@ -27,12 +27,28 @@ function main() {
const libcFamily = getLibcFamily();
try {
const pkg = getPlatformPackage({ platform, arch, libcFamily });
const binPath = getBinaryPath(pkg, platform);
// Try to resolve the binary
require.resolve(binPath);
console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch}`);
const packageCandidates = getPlatformPackageCandidates({
platform,
arch,
libcFamily,
});
const resolvedPackage = packageCandidates.find((pkg) => {
try {
require.resolve(getBinaryPath(pkg, platform));
return true;
} catch {
return false;
}
});
if (!resolvedPackage) {
throw new Error(
`No platform binary package installed. Tried: ${packageCandidates.join(", ")}`
);
}
console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch} (${resolvedPackage})`);
} catch (error) {
console.warn(`⚠ oh-my-opencode: ${error.message}`);
console.warn(` The CLI may not work on this platform.`);

View File

@@ -1719,6 +1719,54 @@
"created_at": "2026-02-24T17:12:31Z",
"repoId": 1108837393,
"pullRequestNo": 1983
},
{
"name": "east-shine",
"id": 20237288,
"comment_id": 3957576758,
"created_at": "2026-02-25T08:19:34Z",
"repoId": 1108837393,
"pullRequestNo": 2113
},
{
"name": "SupenBysz",
"id": 3314033,
"comment_id": 3962352704,
"created_at": "2026-02-25T22:00:54Z",
"repoId": 1108837393,
"pullRequestNo": 2119
},
{
"name": "zhzy0077",
"id": 8717471,
"comment_id": 3964015975,
"created_at": "2026-02-26T04:45:23Z",
"repoId": 1108837393,
"pullRequestNo": 2125
},
{
"name": "spacecowboy0416",
"id": 239068998,
"comment_id": 3964320737,
"created_at": "2026-02-26T06:05:27Z",
"repoId": 1108837393,
"pullRequestNo": 2126
},
{
"name": "imwxc",
"id": 49653609,
"comment_id": 3965127447,
"created_at": "2026-02-26T09:00:16Z",
"repoId": 1108837393,
"pullRequestNo": 2129
},
{
"name": "maou-shonen",
"id": 22576780,
"comment_id": 3965445132,
"created_at": "2026-02-26T09:50:46Z",
"repoId": 1108837393,
"pullRequestNo": 2131
}
]
}

View File

@@ -17,7 +17,6 @@ import type { AvailableAgent, AvailableSkill, AvailableCategory } from "../dynam
import { buildCategorySkillsDelegationGuide } from "../dynamic-agent-prompt-builder"
import type { CategoryConfig } from "../../config/schema"
import { mergeCategories } from "../../shared/merge-categories"
import { createAgentToolRestrictions } from "../../shared/permission-compat"
import { getDefaultAtlasPrompt } from "./default"
import { getGptAtlasPrompt } from "./gpt"
@@ -30,7 +29,7 @@ import {
buildDecisionMatrix,
} from "./prompt-section-builder"
const MODE: AgentMode = "primary"
const MODE: AgentMode = "all"
export type AtlasPromptSource = "default" | "gpt" | "gemini"
@@ -100,11 +99,6 @@ function buildDynamicOrchestratorPrompt(ctx?: OrchestratorContext): string {
}
export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
const restrictions = createAgentToolRestrictions([
"task",
"call_omo_agent",
])
const baseConfig = {
description:
"Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
@@ -113,7 +107,6 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
temperature: 0.1,
prompt: buildDynamicOrchestratorPrompt(ctx),
color: "#10B981",
...restrictions,
}
return baseConfig as AgentConfig

View File

@@ -0,0 +1,41 @@
/// <reference types="bun-types" />
import { describe, test, expect } from "bun:test"
import { createEnvContext } from "./env-context"
describe("createEnvContext", () => {
test("returns omo-env block with timezone and locale", () => {
// #given - no setup needed
// #when
const result = createEnvContext()
// #then
expect(result).toContain("<omo-env>")
expect(result).toContain("</omo-env>")
expect(result).toContain("Timezone:")
expect(result).toContain("Locale:")
expect(result).not.toContain("Current date:")
})
test("does not include time with seconds precision to preserve token cache", () => {
// #given - seconds-precision time changes every second, breaking cache on every request
// #when
const result = createEnvContext()
// #then - no HH:MM:SS pattern anywhere in the output
expect(result).not.toMatch(/\d{1,2}:\d{2}:\d{2}/)
})
test("does not include date or time fields since OpenCode already provides them", () => {
// #given - OpenCode's system.ts already injects date, platform, working directory
// #when
const result = createEnvContext()
// #then - only timezone and locale remain; both are stable across requests
expect(result).not.toContain("Current date:")
expect(result).not.toContain("Current time:")
})
})

View File

@@ -1,32 +1,15 @@
/**
* Creates OmO-specific environment context (time, timezone, locale).
* Creates OmO-specific environment context (timezone, locale).
* Note: Working directory, platform, and date are already provided by OpenCode's system.ts,
* so we only include fields that OpenCode doesn't provide to avoid duplication.
* See: https://github.com/code-yeongyu/oh-my-opencode/issues/379
*/
export function createEnvContext(): string {
const now = new Date()
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone
const locale = Intl.DateTimeFormat().resolvedOptions().locale
const dateStr = now.toLocaleDateString(locale, {
weekday: "short",
year: "numeric",
month: "short",
day: "numeric",
})
const timeStr = now.toLocaleTimeString(locale, {
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
hour12: true,
})
return `
<omo-env>
Current date: ${dateStr}
Current time: ${timeStr}
Timezone: ${timezone}
Locale: ${locale}
</omo-env>`

View File

@@ -19,7 +19,7 @@ import {
categorizeTools,
} from "./dynamic-agent-prompt-builder";
const MODE: AgentMode = "primary";
const MODE: AgentMode = "all";
function buildTodoDisciplineSection(useTaskSystem: boolean): string {
if (useTaskSystem) {
@@ -448,6 +448,21 @@ ${oracleSection}
4. **Run build** if applicable — exit code 0 required
5. **Tell user** what you verified and the results — keep it clear and helpful
### Auto-Commit Policy (MANDATORY for implementation/fix work)
1. **Auto-commit after implementation is complete** when the task includes feature/fix code changes
2. **Commit ONLY after verification gates pass**:
- \`lsp_diagnostics\` clean on all modified files
- Related tests pass
- Typecheck/build pass when applicable
3. **If any gate fails, DO NOT commit** — fix issues first, re-run verification, then commit
4. **Use Conventional Commits format** with meaningful intent-focused messages:
- \`feat(scope): add ...\` for new functionality
- \`fix(scope): resolve ...\` for bug fixes
- \`refactor(scope): simplify ...\` for internal restructuring
5. **Do not make placeholder commits** (\`wip\`, \`temp\`, \`update\`) or commit unverified code
6. **If user explicitly says not to commit**, skip commit and report that changes are left uncommitted
- **File edit** — \`lsp_diagnostics\` clean
- **Build** — Exit code 0
- **Tests** — Pass (or pre-existing failures noted)

View File

@@ -8,7 +8,7 @@ import {
buildGeminiIntentGateEnforcement,
} from "./sisyphus-gemini-overlays";
const MODE: AgentMode = "primary";
const MODE: AgentMode = "all";
export const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata = {
category: "utility",
cost: "EXPENSIVE",

View File

@@ -4,6 +4,7 @@ import { createLibrarianAgent } from "./librarian"
import { createExploreAgent } from "./explore"
import { createMomusAgent } from "./momus"
import { createMetisAgent } from "./metis"
import { createAtlasAgent } from "./atlas"
const TEST_MODEL = "anthropic/claude-sonnet-4-5"
@@ -96,4 +97,18 @@ describe("read-only agent tool restrictions", () => {
}
})
})
describe("Atlas", () => {
test("allows delegation tools for orchestration", () => {
// given
const agent = createAtlasAgent({ model: TEST_MODEL })
// when
const permission = (agent.permission ?? {}) as Record<string, string>
// then
expect(permission["task"]).toBeUndefined()
expect(permission["call_omo_agent"]).toBeUndefined()
})
})
})

View File

@@ -2,11 +2,17 @@ import { describe, test, expect } from "bun:test";
import { isGptModel, isGeminiModel } from "./types";
describe("isGptModel", () => {
test("standard openai provider models", () => {
test("standard openai provider gpt models", () => {
expect(isGptModel("openai/gpt-5.2")).toBe(true);
expect(isGptModel("openai/gpt-4o")).toBe(true);
expect(isGptModel("openai/o1")).toBe(true);
expect(isGptModel("openai/o3-mini")).toBe(true);
});
test("o-series models are not gpt by name", () => {
expect(isGptModel("openai/o1")).toBe(false);
expect(isGptModel("openai/o3-mini")).toBe(false);
expect(isGptModel("litellm/o1")).toBe(false);
expect(isGptModel("litellm/o3-mini")).toBe(false);
expect(isGptModel("litellm/o4-mini")).toBe(false);
});
test("github copilot gpt models", () => {
@@ -17,9 +23,6 @@ describe("isGptModel", () => {
test("litellm proxied gpt models", () => {
expect(isGptModel("litellm/gpt-5.2")).toBe(true);
expect(isGptModel("litellm/gpt-4o")).toBe(true);
expect(isGptModel("litellm/o1")).toBe(true);
expect(isGptModel("litellm/o3-mini")).toBe(true);
expect(isGptModel("litellm/o4-mini")).toBe(true);
});
test("other proxied gpt models", () => {
@@ -27,6 +30,11 @@ describe("isGptModel", () => {
expect(isGptModel("custom-provider/gpt-5.2")).toBe(true);
});
test("venice provider gpt models", () => {
expect(isGptModel("venice/gpt-5.2")).toBe(true);
expect(isGptModel("venice/gpt-4o")).toBe(true);
});
test("gpt4 prefix without hyphen (legacy naming)", () => {
expect(isGptModel("litellm/gpt4o")).toBe(true);
expect(isGptModel("ollama/gpt4")).toBe(true);
@@ -39,8 +47,8 @@ describe("isGptModel", () => {
});
test("gemini models are not gpt", () => {
expect(isGptModel("google/gemini-3-pro")).toBe(false);
expect(isGptModel("litellm/gemini-3-pro")).toBe(false);
expect(isGptModel("google/gemini-3.1-pro")).toBe(false);
expect(isGptModel("litellm/gemini-3.1-pro")).toBe(false);
});
test("opencode provider is not gpt", () => {
@@ -50,29 +58,29 @@ describe("isGptModel", () => {
describe("isGeminiModel", () => {
test("#given google provider models #then returns true", () => {
expect(isGeminiModel("google/gemini-3-pro")).toBe(true);
expect(isGeminiModel("google/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("google/gemini-3-flash")).toBe(true);
expect(isGeminiModel("google/gemini-2.5-pro")).toBe(true);
});
test("#given google-vertex provider models #then returns true", () => {
expect(isGeminiModel("google-vertex/gemini-3-pro")).toBe(true);
expect(isGeminiModel("google-vertex/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("google-vertex/gemini-3-flash")).toBe(true);
});
test("#given github copilot gemini models #then returns true", () => {
expect(isGeminiModel("github-copilot/gemini-3-pro")).toBe(true);
expect(isGeminiModel("github-copilot/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("github-copilot/gemini-3-flash")).toBe(true);
});
test("#given litellm proxied gemini models #then returns true", () => {
expect(isGeminiModel("litellm/gemini-3-pro")).toBe(true);
expect(isGeminiModel("litellm/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("litellm/gemini-3-flash")).toBe(true);
expect(isGeminiModel("litellm/gemini-2.5-pro")).toBe(true);
});
test("#given other proxied gemini models #then returns true", () => {
expect(isGeminiModel("custom-provider/gemini-3-pro")).toBe(true);
expect(isGeminiModel("custom-provider/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("ollama/gemini-3-flash")).toBe(true);
});

View File

@@ -70,14 +70,9 @@ function extractModelName(model: string): string {
return model.includes("/") ? model.split("/").pop() ?? model : model
}
const GPT_MODEL_PREFIXES = ["gpt-", "gpt4", "o1", "o3", "o4"]
export function isGptModel(model: string): boolean {
if (model.startsWith("openai/") || model.startsWith("github-copilot/gpt-"))
return true
const modelName = extractModelName(model).toLowerCase()
return GPT_MODEL_PREFIXES.some((prefix) => modelName.startsWith(prefix))
return modelName.includes("gpt")
}
const GEMINI_PROVIDERS = ["google/", "google-vertex/"]

View File

@@ -603,8 +603,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () =>
}
})
test("hephaestus is not created when only github-copilot provider is connected", async () => {
// #given - github-copilot provider has models available
test("hephaestus IS created when github-copilot is connected with a GPT model", async () => {
// #given - github-copilot provider has gpt-5.3-codex available
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
new Set(["github-copilot/gpt-5.3-codex"])
)
@@ -614,8 +614,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () =>
// #when
const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
// #then
expect(agents.hephaestus).toBeUndefined()
// #then - github-copilot is now a valid provider for hephaestus
expect(agents.hephaestus).toBeDefined()
} finally {
fetchSpy.mockRestore()
cacheSpy.mockRestore()
@@ -1002,7 +1002,7 @@ describe("buildAgent with category and skills", () => {
const agent = buildAgent(source["test-agent"], TEST_MODEL)
// #then - category's built-in model is applied
expect(agent.model).toBe("google/gemini-3-pro")
expect(agent.model).toBe("google/gemini-3.1-pro")
})
test("agent with category and existing model keeps existing model", () => {

View File

@@ -325,7 +325,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
},
"explore": {
"model": "opencode/gpt-5-nano",
@@ -334,34 +334,34 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"model": "opencode/glm-4.7-free",
},
"metis": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"momus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
},
"oracle": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"prometheus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
},
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"quick": {
"model": "google/gemini-3-flash-preview",
},
"ultrabrain": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"unspecified-high": {
@@ -371,7 +371,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"model": "google/gemini-3-flash-preview",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -386,7 +386,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
},
"explore": {
"model": "opencode/gpt-5-nano",
@@ -395,44 +395,44 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
"model": "opencode/glm-4.7-free",
},
"metis": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"momus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
},
"oracle": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"prometheus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
},
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"quick": {
"model": "google/gemini-3-flash-preview",
},
"ultrabrain": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"unspecified-high": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
},
"unspecified-low": {
"model": "google/gemini-3-flash-preview",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -485,7 +485,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"deep": {
@@ -506,7 +506,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -559,7 +559,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"deep": {
@@ -581,7 +581,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -634,7 +634,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
},
"categories": {
"artistry": {
"model": "opencode/gemini-3-pro",
"model": "opencode/gemini-3.1-pro",
"variant": "high",
},
"deep": {
@@ -655,7 +655,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"model": "opencode/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "opencode/gemini-3-pro",
"model": "opencode/gemini-3.1-pro",
"variant": "high",
},
"writing": {
@@ -708,7 +708,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
},
"categories": {
"artistry": {
"model": "opencode/gemini-3-pro",
"model": "opencode/gemini-3.1-pro",
"variant": "high",
},
"deep": {
@@ -730,7 +730,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"model": "opencode/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "opencode/gemini-3-pro",
"model": "opencode/gemini-3.1-pro",
"variant": "high",
},
"writing": {
@@ -779,14 +779,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"quick": {
"model": "github-copilot/claude-haiku-4.5",
},
"ultrabrain": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"unspecified-high": {
@@ -796,7 +796,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -845,14 +845,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"quick": {
"model": "github-copilot/claude-haiku-4.5",
},
"ultrabrain": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"unspecified-high": {
@@ -863,7 +863,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -1026,7 +1026,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
},
"categories": {
"artistry": {
"model": "opencode/gemini-3-pro",
"model": "opencode/gemini-3.1-pro",
"variant": "high",
},
"deep": {
@@ -1047,7 +1047,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "opencode/gemini-3-pro",
"model": "opencode/gemini-3.1-pro",
"variant": "high",
},
"writing": {
@@ -1100,7 +1100,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"deep": {
@@ -1121,7 +1121,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -1217,7 +1217,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"model": "google/gemini-3-flash-preview",
},
"oracle": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"prometheus": {
@@ -1231,14 +1231,14 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"quick": {
"model": "anthropic/claude-haiku-4-5",
},
"ultrabrain": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"unspecified-high": {
@@ -1248,7 +1248,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -1301,7 +1301,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"deep": {
@@ -1322,7 +1322,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -1375,7 +1375,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"deep": {
@@ -1396,7 +1396,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {
@@ -1449,7 +1449,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"deep": {
@@ -1471,7 +1471,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"writing": {

View File

@@ -178,7 +178,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
expect(models).toBeTruthy()
const required = [
"antigravity-gemini-3-pro",
"antigravity-gemini-3.1-pro",
"antigravity-gemini-3-flash",
"antigravity-claude-sonnet-4-6",
"antigravity-claude-sonnet-4-6-thinking",
@@ -206,7 +206,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
// #when checking Gemini Pro variants
const pro = models["antigravity-gemini-3-pro"]
const pro = models["antigravity-gemini-3.1-pro"]
// #then should have low and high variants
expect(pro.variants).toBeTruthy()
expect(pro.variants.low).toBeTruthy()

View File

@@ -4,10 +4,10 @@
* IMPORTANT: Model names MUST use `antigravity-` prefix for stability.
*
* Since opencode-antigravity-auth v1.3.0, models use a variant system:
* - `antigravity-gemini-3-pro` with variants: low, high
* - `antigravity-gemini-3.1-pro` with variants: low, high
* - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high
*
* Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work
* Legacy tier-suffixed names (e.g., `antigravity-gemini-3.1-pro-high`) still work
* but variants are the recommended approach.
*
* @see https://github.com/NoeFabris/opencode-antigravity-auth#models
@@ -16,7 +16,7 @@ export const ANTIGRAVITY_PROVIDER_CONFIG = {
google: {
name: "Google",
models: {
"antigravity-gemini-3-pro": {
"antigravity-gemini-3.1-pro": {
name: "Gemini 3 Pro (Antigravity)",
limit: { context: 1048576, output: 65535 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] },

View File

@@ -1,4 +1,5 @@
import { getConfigDir } from "./config-context"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
const BUN_INSTALL_TIMEOUT_SECONDS = 60
const BUN_INSTALL_TIMEOUT_MS = BUN_INSTALL_TIMEOUT_SECONDS * 1000
@@ -16,7 +17,7 @@ export async function runBunInstall(): Promise<boolean> {
export async function runBunInstallWithDetails(): Promise<BunInstallResult> {
try {
const proc = Bun.spawn(["bun", "install"], {
const proc = spawnWithWindowsHide(["bun", "install"], {
cwd: getConfigDir(),
stdout: "inherit",
stderr: "inherit",

View File

@@ -1,4 +1,5 @@
import type { OpenCodeBinaryType } from "../../shared/opencode-config-dir-types"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
import { initConfigContext } from "./config-context"
const OPENCODE_BINARIES = ["opencode", "opencode-desktop"] as const
@@ -11,7 +12,7 @@ interface OpenCodeBinaryResult {
async function findOpenCodeBinaryWithVersion(): Promise<OpenCodeBinaryResult | null> {
for (const binary of OPENCODE_BINARIES) {
try {
const proc = Bun.spawn([binary, "--version"], {
const proc = spawnWithWindowsHide([binary, "--version"], {
stdout: "pipe",
stderr: "pipe",
})

View File

@@ -0,0 +1,80 @@
import { afterEach, beforeEach, describe, expect, it } from "bun:test"
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { parseJsonc } from "../../shared/jsonc-parser"
import type { InstallConfig } from "../types"
import { resetConfigContext } from "./config-context"
import { generateOmoConfig } from "./generate-omo-config"
import { writeOmoConfig } from "./write-omo-config"
const installConfig: InstallConfig = {
hasClaude: true,
isMax20: true,
hasOpenAI: true,
hasGemini: true,
hasCopilot: false,
hasOpencodeZen: false,
hasZaiCodingPlan: false,
hasKimiForCoding: false,
}
function getRecord(value: unknown): Record<string, unknown> {
if (value && typeof value === "object" && !Array.isArray(value)) {
return value as Record<string, unknown>
}
return {}
}
describe("writeOmoConfig", () => {
let testConfigDir = ""
let testConfigPath = ""
beforeEach(() => {
testConfigDir = join(tmpdir(), `omo-write-config-${Date.now()}-${Math.random().toString(36).slice(2)}`)
testConfigPath = join(testConfigDir, "oh-my-opencode.json")
mkdirSync(testConfigDir, { recursive: true })
process.env.OPENCODE_CONFIG_DIR = testConfigDir
resetConfigContext()
})
afterEach(() => {
rmSync(testConfigDir, { recursive: true, force: true })
resetConfigContext()
delete process.env.OPENCODE_CONFIG_DIR
})
it("preserves existing user values while adding new defaults", () => {
// given
const existingConfig = {
agents: {
sisyphus: {
model: "custom/provider-model",
},
},
disabled_hooks: ["comment-checker"],
}
writeFileSync(testConfigPath, JSON.stringify(existingConfig, null, 2) + "\n", "utf-8")
const generatedDefaults = generateOmoConfig(installConfig)
// when
const result = writeOmoConfig(installConfig)
// then
expect(result.success).toBe(true)
const savedConfig = parseJsonc<Record<string, unknown>>(readFileSync(testConfigPath, "utf-8"))
const savedAgents = getRecord(savedConfig.agents)
const savedSisyphus = getRecord(savedAgents.sisyphus)
expect(savedSisyphus.model).toBe("custom/provider-model")
expect(savedConfig.disabled_hooks).toEqual(["comment-checker"])
for (const defaultKey of Object.keys(generatedDefaults)) {
expect(savedConfig).toHaveProperty(defaultKey)
}
})
})

View File

@@ -43,7 +43,7 @@ export function writeOmoConfig(installConfig: InstallConfig): ConfigMergeResult
return { success: true, configPath: omoConfigPath }
}
const merged = deepMergeRecord(existing, newConfig)
const merged = deepMergeRecord(newConfig, existing)
writeFileSync(omoConfigPath, JSON.stringify(merged, null, 2) + "\n")
} catch (parseErr) {
if (parseErr instanceof SyntaxError) {

View File

@@ -3,6 +3,7 @@ import { createRequire } from "node:module"
import { dirname, join } from "node:path"
import type { DependencyInfo } from "../types"
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
try {
@@ -18,7 +19,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
async function getBinaryVersion(binary: string): Promise<string | null> {
try {
const proc = Bun.spawn([binary, "--version"], { stdout: "pipe", stderr: "pipe" })
const proc = spawnWithWindowsHide([binary, "--version"], { stdout: "pipe", stderr: "pipe" })
const output = await new Response(proc.stdout).text()
await proc.exited
if (proc.exitCode === 0) {
@@ -140,4 +141,3 @@ export async function checkCommentChecker(): Promise<DependencyInfo> {
path: resolvedPath,
}
}

View File

@@ -26,7 +26,7 @@ describe("model-resolution check", () => {
// then: Should have category entries
const visual = info.categories.find((c) => c.name === "visual-engineering")
expect(visual).toBeDefined()
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3.1-pro")
expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
})
})

View File

@@ -1,6 +1,7 @@
import { existsSync } from "node:fs"
import { homedir } from "node:os"
import { join } from "node:path"
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
import { OPENCODE_BINARIES } from "../constants"
@@ -110,7 +111,7 @@ export async function getOpenCodeVersion(
): Promise<string | null> {
try {
const command = buildVersionCommand(binaryPath, platform)
const processResult = Bun.spawn(command, { stdout: "pipe", stderr: "pipe" })
const processResult = spawnWithWindowsHide(command, { stdout: "pipe", stderr: "pipe" })
const output = await new Response(processResult.stdout).text()
await processResult.exited

View File

@@ -1,3 +1,5 @@
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
export interface GhCliInfo {
installed: boolean
version: string | null
@@ -19,7 +21,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
async function getGhVersion(): Promise<string | null> {
try {
const processResult = Bun.spawn(["gh", "--version"], { stdout: "pipe", stderr: "pipe" })
const processResult = spawnWithWindowsHide(["gh", "--version"], { stdout: "pipe", stderr: "pipe" })
const output = await new Response(processResult.stdout).text()
await processResult.exited
if (processResult.exitCode !== 0) return null
@@ -38,7 +40,7 @@ async function getGhAuthStatus(): Promise<{
error: string | null
}> {
try {
const processResult = Bun.spawn(["gh", "auth", "status"], {
const processResult = spawnWithWindowsHide(["gh", "auth", "status"], {
stdout: "pipe",
stderr: "pipe",
env: { ...process.env, GH_NO_UPDATE_NOTIFIER: "1" },

View File

@@ -24,7 +24,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
oracle: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
],
},
@@ -59,7 +59,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
],
},
metis: {
@@ -68,14 +68,14 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
],
},
momus: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
],
},
atlas: {
@@ -84,7 +84,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
],
},
}
@@ -92,7 +92,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
"visual-engineering": {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["zai-coding-plan"], model: "glm-5" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
@@ -101,7 +101,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
ultrabrain: {
fallbackChain: [
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
],
},
@@ -109,17 +109,17 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
fallbackChain: [
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
],
requiresModel: "gpt-5.3-codex",
},
artistry: {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
],
requiresModel: "gemini-3-pro",
requiresModel: "gemini-3.1-pro",
},
quick: {
fallbackChain: [
@@ -139,7 +139,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
],
},
writing: {

View File

@@ -40,16 +40,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("claude-haiku-4.5")
})
test("transforms gemini-3-pro to gemini-3-pro-preview", () => {
// #given github-copilot provider and gemini-3-pro model
test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
// #given github-copilot provider and gemini-3.1-pro model
const provider = "github-copilot"
const model = "gemini-3-pro"
const model = "gemini-3.1-pro"
// #when transformModelForProvider is called
const result = transformModelForProvider(provider, model)
// #then should transform to gemini-3-pro-preview
expect(result).toBe("gemini-3-pro-preview")
// #then should transform to gemini-3.1-pro-preview
expect(result).toBe("gemini-3.1-pro-preview")
})
test("transforms gemini-3-flash to gemini-3-flash-preview", () => {
@@ -64,16 +64,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("gemini-3-flash-preview")
})
test("prevents double transformation of gemini-3-pro-preview", () => {
// #given github-copilot provider and gemini-3-pro-preview model (already transformed)
test("prevents double transformation of gemini-3.1-pro-preview", () => {
// #given github-copilot provider and gemini-3.1-pro-preview model (already transformed)
const provider = "github-copilot"
const model = "gemini-3-pro-preview"
const model = "gemini-3.1-pro-preview"
// #when transformModelForProvider is called
const result = transformModelForProvider(provider, model)
// #then should NOT become gemini-3-pro-preview-preview
expect(result).toBe("gemini-3-pro-preview")
// #then should NOT become gemini-3.1-pro-preview-preview
expect(result).toBe("gemini-3.1-pro-preview")
})
test("prevents double transformation of gemini-3-flash-preview", () => {
@@ -102,16 +102,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("gemini-3-flash-preview")
})
test("transforms gemini-3-pro to gemini-3-pro-preview", () => {
// #given google provider and gemini-3-pro model
test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
// #given google provider and gemini-3.1-pro model
const provider = "google"
const model = "gemini-3-pro"
const model = "gemini-3.1-pro"
// #when transformModelForProvider is called
const result = transformModelForProvider(provider, model)
// #then should transform to gemini-3-pro-preview
expect(result).toBe("gemini-3-pro-preview")
// #then should transform to gemini-3.1-pro-preview
expect(result).toBe("gemini-3.1-pro-preview")
})
test("passes through other gemini models unchanged", () => {
@@ -138,16 +138,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("gemini-3-flash-preview")
})
test("prevents double transformation of gemini-3-pro-preview", () => {
// #given google provider and gemini-3-pro-preview model (already transformed)
test("prevents double transformation of gemini-3.1-pro-preview", () => {
// #given google provider and gemini-3.1-pro-preview model (already transformed)
const provider = "google"
const model = "gemini-3-pro-preview"
const model = "gemini-3.1-pro-preview"
// #when transformModelForProvider is called
const result = transformModelForProvider(provider, model)
// #then should NOT become gemini-3-pro-preview-preview
expect(result).toBe("gemini-3-pro-preview")
// #then should NOT become gemini-3.1-pro-preview-preview
expect(result).toBe("gemini-3.1-pro-preview")
})
test("does not transform claude models for google provider", () => {

View File

@@ -1,4 +1,4 @@
import { describe, it, expect, spyOn } from "bun:test"
const { describe, it, expect, spyOn } = require("bun:test")
import type { RunContext } from "./types"
import { createEventState } from "./events"
import { handleSessionStatus, handleMessagePartUpdated, handleMessageUpdated, handleTuiToast } from "./event-handlers"
@@ -235,9 +235,7 @@ describe("handleMessagePartUpdated", () => {
it("prints completion metadata once when assistant text part is completed", () => {
// given
const nowSpy = spyOn(Date, "now")
nowSpy.mockReturnValueOnce(1000)
nowSpy.mockReturnValueOnce(3400)
const nowSpy = spyOn(Date, "now").mockReturnValue(3400)
const ctx = createMockContext("ses_main")
const state = createEventState()
@@ -259,6 +257,7 @@ describe("handleMessagePartUpdated", () => {
} as any,
state,
)
state.messageStartedAtById["msg_1"] = 1000
// when
handleMessagePartUpdated(

View File

@@ -7,6 +7,8 @@ export interface EventState {
currentTool: string | null
/** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
hasReceivedMeaningfulWork: boolean
/** Timestamp of the last received event (for watchdog detection) */
lastEventTimestamp: number
/** Count of assistant messages for the main session */
messageCount: number
/** Current agent name from the latest assistant message */
@@ -54,6 +56,7 @@ export function createEventState(): EventState {
lastPartText: "",
currentTool: null,
hasReceivedMeaningfulWork: false,
lastEventTimestamp: Date.now(),
messageCount: 0,
currentAgent: null,
currentModel: null,

View File

@@ -35,6 +35,9 @@ export async function processEvents(
logEventVerbose(ctx, payload)
}
// Update last event timestamp for watchdog detection
state.lastEventTimestamp = Date.now()
handleSessionError(ctx, payload, state)
handleSessionIdle(ctx, payload, state)
handleSessionStatus(ctx, payload, state)

View File

@@ -3,6 +3,7 @@ import type { RunResult } from "./types"
import { createJsonOutputManager } from "./json-output"
import { resolveSession } from "./session-resolver"
import { executeOnCompleteHook } from "./on-complete-hook"
import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
import type { OpencodeClient } from "./types"
import * as originalSdk from "@opencode-ai/sdk"
import * as originalPortUtils from "../../shared/port-utils"
@@ -147,7 +148,7 @@ describe("integration: --session-id", () => {
const result = resolveSession({ client: mockClient, sessionId, directory: "/test" })
// then
await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
expect(mockClient.session.get).toHaveBeenCalledWith({
path: { id: sessionId },
query: { directory: "/test" },
@@ -161,10 +162,13 @@ describe("integration: --on-complete", () => {
beforeEach(() => {
spyOn(console, "error").mockImplementation(() => {})
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
exited: Promise.resolve(0),
exitCode: 0,
} as unknown as ReturnType<typeof Bun.spawn>)
stdout: undefined,
stderr: undefined,
kill: () => {},
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
})
afterEach(() => {
@@ -186,7 +190,7 @@ describe("integration: --on-complete", () => {
// then
expect(spawnSpy).toHaveBeenCalledTimes(1)
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(options?.env?.SESSION_ID).toBe("session-123")
expect(options?.env?.EXIT_CODE).toBe("0")
expect(options?.env?.DURATION_MS).toBe("5000")
@@ -208,10 +212,13 @@ describe("integration: option combinations", () => {
spyOn(console, "error").mockImplementation(() => {})
mockStdout = createMockWriteStream()
mockStderr = createMockWriteStream()
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
exited: Promise.resolve(0),
exitCode: 0,
} as unknown as ReturnType<typeof Bun.spawn>)
stdout: undefined,
stderr: undefined,
kill: () => {},
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
})
afterEach(() => {
@@ -249,9 +256,9 @@ describe("integration: option combinations", () => {
const emitted = mockStdout.writes[0]!
expect(() => JSON.parse(emitted)).not.toThrow()
expect(spawnSpy).toHaveBeenCalledTimes(1)
const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
const [args] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(args).toEqual(["sh", "-c", "echo done"])
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(options?.env?.SESSION_ID).toBe("session-123")
expect(options?.env?.EXIT_CODE).toBe("0")
expect(options?.env?.DURATION_MS).toBe("5000")

View File

@@ -1,4 +1,5 @@
import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
import { executeOnCompleteHook } from "./on-complete-hook"
describe("executeOnCompleteHook", () => {
@@ -6,7 +7,10 @@ describe("executeOnCompleteHook", () => {
return {
exited: Promise.resolve(exitCode),
exitCode,
} as unknown as ReturnType<typeof Bun.spawn>
stdout: undefined,
stderr: undefined,
kill: () => {},
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
}
let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
@@ -21,7 +25,7 @@ describe("executeOnCompleteHook", () => {
it("executes command with correct env vars", async () => {
// given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try {
// when
@@ -35,7 +39,7 @@ describe("executeOnCompleteHook", () => {
// then
expect(spawnSpy).toHaveBeenCalledTimes(1)
const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(args).toEqual(["sh", "-c", "echo test"])
expect(options?.env?.SESSION_ID).toBe("session-123")
@@ -51,7 +55,7 @@ describe("executeOnCompleteHook", () => {
it("env var values are strings", async () => {
// given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try {
// when
@@ -64,7 +68,7 @@ describe("executeOnCompleteHook", () => {
})
// then
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(options?.env?.EXIT_CODE).toBe("1")
expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
@@ -79,7 +83,7 @@ describe("executeOnCompleteHook", () => {
it("empty command string is no-op", async () => {
// given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try {
// when
@@ -100,7 +104,7 @@ describe("executeOnCompleteHook", () => {
it("whitespace-only command is no-op", async () => {
// given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try {
// when
@@ -121,11 +125,11 @@ describe("executeOnCompleteHook", () => {
it("command failure logs warning but does not throw", async () => {
// given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1))
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(1))
try {
// when
await expect(
expect(
executeOnCompleteHook({
command: "false",
sessionId: "session-123",
@@ -149,13 +153,13 @@ describe("executeOnCompleteHook", () => {
it("spawn error logs warning but does not throw", async () => {
// given
const spawnError = new Error("Command not found")
const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => {
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockImplementation(() => {
throw spawnError
})
try {
// when
await expect(
expect(
executeOnCompleteHook({
command: "nonexistent-command",
sessionId: "session-123",

View File

@@ -1,4 +1,5 @@
import pc from "picocolors"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
export async function executeOnCompleteHook(options: {
command: string
@@ -17,7 +18,7 @@ export async function executeOnCompleteHook(options: {
console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
try {
const proc = Bun.spawn(["sh", "-c", trimmedCommand], {
const proc = spawnWithWindowsHide(["sh", "-c", trimmedCommand], {
env: {
...process.env,
SESSION_ID: sessionId,

View File

@@ -1,4 +1,5 @@
import { delimiter, dirname, join } from "node:path"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
const OPENCODE_COMMANDS = ["opencode", "opencode-desktop"] as const
const WINDOWS_SUFFIXES = ["", ".exe", ".cmd", ".bat", ".ps1"] as const
@@ -41,7 +42,7 @@ export function collectCandidateBinaryPaths(
export async function canExecuteBinary(binaryPath: string): Promise<boolean> {
try {
const proc = Bun.spawn([binaryPath, "--version"], {
const proc = spawnWithWindowsHide([binaryPath, "--version"], {
stdout: "pipe",
stderr: "pipe",
})

View File

@@ -8,11 +8,15 @@ const DEFAULT_POLL_INTERVAL_MS = 500
const DEFAULT_REQUIRED_CONSECUTIVE = 1
const ERROR_GRACE_CYCLES = 3
const MIN_STABILIZATION_MS = 1_000
const DEFAULT_EVENT_WATCHDOG_MS = 30_000 // 30 seconds
const DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS = 60_000 // 60 seconds
export interface PollOptions {
pollIntervalMs?: number
requiredConsecutive?: number
minStabilizationMs?: number
eventWatchdogMs?: number
secondaryMeaningfulWorkTimeoutMs?: number
}
export async function pollForCompletion(
@@ -28,9 +32,15 @@ export async function pollForCompletion(
options.minStabilizationMs ?? MIN_STABILIZATION_MS
const minStabilizationMs =
rawMinStabilizationMs > 0 ? rawMinStabilizationMs : MIN_STABILIZATION_MS
const eventWatchdogMs =
options.eventWatchdogMs ?? DEFAULT_EVENT_WATCHDOG_MS
const secondaryMeaningfulWorkTimeoutMs =
options.secondaryMeaningfulWorkTimeoutMs ??
DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS
let consecutiveCompleteChecks = 0
let errorCycleCount = 0
let firstWorkTimestamp: number | null = null
let secondaryTimeoutChecked = false
const pollStartTimestamp = Date.now()
while (!abortController.signal.aborted) {
@@ -59,7 +69,37 @@ export async function pollForCompletion(
errorCycleCount = 0
}
const mainSessionStatus = await getMainSessionStatus(ctx)
// Watchdog: if no events received for N seconds, verify session status via API
let mainSessionStatus: "idle" | "busy" | "retry" | null = null
if (eventState.lastEventTimestamp !== null) {
const timeSinceLastEvent = Date.now() - eventState.lastEventTimestamp
if (timeSinceLastEvent > eventWatchdogMs) {
// Events stopped coming - verify actual session state
console.log(
pc.yellow(
`\n No events for ${Math.round(
timeSinceLastEvent / 1000
)}s, verifying session status...`
)
)
// Force check session status directly
mainSessionStatus = await getMainSessionStatus(ctx)
if (mainSessionStatus === "idle") {
eventState.mainSessionIdle = true
} else if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
eventState.mainSessionIdle = false
}
// Reset timestamp to avoid repeated checks
eventState.lastEventTimestamp = Date.now()
}
}
// Only call getMainSessionStatus if watchdog didn't already check
if (mainSessionStatus === null) {
mainSessionStatus = await getMainSessionStatus(ctx)
}
if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
eventState.mainSessionIdle = false
} else if (mainSessionStatus === "idle") {
@@ -81,6 +121,50 @@ export async function pollForCompletion(
consecutiveCompleteChecks = 0
continue
}
// Secondary timeout: if we've been polling for reasonable time but haven't
// received meaningful work via events, check if there's active work via API
// Only check once to avoid unnecessary API calls every poll cycle
if (
Date.now() - pollStartTimestamp > secondaryMeaningfulWorkTimeoutMs &&
!secondaryTimeoutChecked
) {
secondaryTimeoutChecked = true
// Check if session actually has pending work (children, todos, etc.)
const childrenRes = await ctx.client.session.children({
path: { id: ctx.sessionID },
query: { directory: ctx.directory },
})
const children = normalizeSDKResponse(childrenRes, [] as unknown[])
const todosRes = await ctx.client.session.todo({
path: { id: ctx.sessionID },
query: { directory: ctx.directory },
})
const todos = normalizeSDKResponse(todosRes, [] as unknown[])
const hasActiveChildren =
Array.isArray(children) && children.length > 0
const hasActiveTodos =
Array.isArray(todos) &&
todos.some(
(t: unknown) =>
(t as { status?: string })?.status !== "completed" &&
(t as { status?: string })?.status !== "cancelled"
)
const hasActiveWork = hasActiveChildren || hasActiveTodos
if (hasActiveWork) {
// Assume meaningful work is happening even without events
eventState.hasReceivedMeaningfulWork = true
console.log(
pc.yellow(
`\n No meaningful work events for ${Math.round(
secondaryMeaningfulWorkTimeoutMs / 1000
)}s but session has active work - assuming in progress`
)
)
}
}
} else {
// Track when first meaningful work was received
if (firstWorkTimestamp === null) {

View File

@@ -60,7 +60,9 @@ const BuiltinAgentOverridesSchema = z.object({
build: AgentOverrideConfigSchema.optional(),
plan: AgentOverrideConfigSchema.optional(),
sisyphus: AgentOverrideConfigSchema.optional(),
hephaestus: AgentOverrideConfigSchema.optional(),
hephaestus: AgentOverrideConfigSchema.extend({
allow_non_gpt_model: z.boolean().optional(),
}).optional(),
"sisyphus-junior": AgentOverrideConfigSchema.optional(),
"OpenCode-Builder": AgentOverrideConfigSchema.optional(),
prometheus: AgentOverrideConfigSchema.optional(),

View File

@@ -20,6 +20,7 @@ export const CategoryConfigSchema = z.object({
textVerbosity: z.enum(["low", "medium", "high"]).optional(),
tools: z.record(z.string(), z.boolean()).optional(),
prompt_append: z.string().optional(),
max_prompt_tokens: z.number().int().positive().optional(),
/** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini/minimax models. */
is_unstable_agent: z.boolean().optional(),
/** Disable this category. Disabled categories are excluded from task delegation. */

View File

@@ -27,7 +27,7 @@ export const OhMyOpenCodeConfigSchema = z.object({
/** Default agent name for `oh-my-opencode run` (env: OPENCODE_DEFAULT_AGENT) */
default_run_agent: z.string().optional(),
disabled_mcps: z.array(AnyMcpNameSchema).optional(),
disabled_agents: z.array(BuiltinAgentNameSchema).optional(),
disabled_agents: z.array(z.string()).optional(),
disabled_skills: z.array(BuiltinSkillNameSchema).optional(),
disabled_hooks: z.array(z.string()).optional(),
disabled_commands: z.array(BuiltinCommandNameSchema).optional(),

View File

@@ -34,7 +34,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
test("should return provider limit even when modelConcurrency exists but doesn't match", () => {
// given
const config: BackgroundTaskConfig = {
modelConcurrency: { "google/gemini-3-pro": 5 },
modelConcurrency: { "google/gemini-3.1-pro": 5 },
providerConcurrency: { anthropic: 3 }
}
const manager = new ConcurrencyManager(config)
@@ -95,7 +95,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
// when
const modelLimit = manager.getConcurrencyLimit("anthropic/claude-sonnet-4-6")
const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-6")
const defaultLimit = manager.getConcurrencyLimit("google/gemini-3-pro")
const defaultLimit = manager.getConcurrencyLimit("google/gemini-3.1-pro")
// then
expect(modelLimit).toBe(10)

View File

@@ -191,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>
return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent
}
function getPendingNotifications(manager: BackgroundManager): Map<string, string[]> {
return (manager as unknown as { pendingNotifications: Map<string, string[]> }).pendingNotifications
}
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
}
@@ -1057,6 +1061,49 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
manager.shutdown()
})
test("should queue notification when promptAsync aborts while parent is idle", async () => {
//#given
const promptMock = async () => {
const error = new Error("Request aborted while waiting for input")
error.name = "MessageAbortedError"
throw error
}
const client = {
session: {
prompt: promptMock,
promptAsync: promptMock,
abort: async () => ({}),
messages: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
const task: BackgroundTask = {
id: "task-aborted-idle-queue",
sessionID: "session-child",
parentSessionID: "session-parent",
parentMessageID: "msg-parent",
description: "task idle queue",
prompt: "test",
agent: "explore",
status: "completed",
startedAt: new Date(),
completedAt: new Date(),
}
getPendingByParent(manager).set("session-parent", new Set([task.id]))
//#when
await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })
.notifyParentSession(task)
//#then
const queuedNotifications = getPendingNotifications(manager).get("session-parent") ?? []
expect(queuedNotifications).toHaveLength(1)
expect(queuedNotifications[0]).toContain("<system-reminder>")
expect(queuedNotifications[0]).toContain("[ALL BACKGROUND TASKS COMPLETE]")
manager.shutdown()
})
})
describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
@@ -1105,6 +1152,29 @@ describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
})
})
describe("BackgroundManager.injectPendingNotificationsIntoChatMessage", () => {
test("should prepend queued notifications to first text part and clear queue", () => {
// given
const manager = createBackgroundManager()
manager.queuePendingNotification("session-parent", "<system-reminder>queued-one</system-reminder>")
manager.queuePendingNotification("session-parent", "<system-reminder>queued-two</system-reminder>")
const output = {
parts: [{ type: "text", text: "User prompt" }],
}
// when
manager.injectPendingNotificationsIntoChatMessage(output, "session-parent")
// then
expect(output.parts[0].text).toContain("<system-reminder>queued-one</system-reminder>")
expect(output.parts[0].text).toContain("<system-reminder>queued-two</system-reminder>")
expect(output.parts[0].text).toContain("User prompt")
expect(getPendingNotifications(manager).get("session-parent")).toBeUndefined()
manager.shutdown()
})
})
function buildNotificationPromptBody(
task: BackgroundTask,
currentMessage: CurrentMessage | null

View File

@@ -93,6 +93,7 @@ export class BackgroundManager {
private tasks: Map<string, BackgroundTask>
private notifications: Map<string, BackgroundTask[]>
private pendingNotifications: Map<string, string[]>
private pendingByParent: Map<string, Set<string>> // Track pending tasks per parent for batching
private client: OpencodeClient
private directory: string
@@ -125,6 +126,7 @@ export class BackgroundManager {
) {
this.tasks = new Map()
this.notifications = new Map()
this.pendingNotifications = new Map()
this.pendingByParent = new Map()
this.client = ctx.client
this.directory = ctx.directory
@@ -917,6 +919,32 @@ export class BackgroundManager {
this.notifications.delete(sessionID)
}
queuePendingNotification(sessionID: string | undefined, notification: string): void {
if (!sessionID) return
const existingNotifications = this.pendingNotifications.get(sessionID) ?? []
existingNotifications.push(notification)
this.pendingNotifications.set(sessionID, existingNotifications)
}
injectPendingNotificationsIntoChatMessage(output: { parts: Array<{ type: string; text?: string; [key: string]: unknown }> }, sessionID: string): void {
const pendingNotifications = this.pendingNotifications.get(sessionID)
if (!pendingNotifications || pendingNotifications.length === 0) {
return
}
this.pendingNotifications.delete(sessionID)
const notificationContent = pendingNotifications.join("\n\n")
const firstTextPartIndex = output.parts.findIndex((part) => part.type === "text")
if (firstTextPartIndex === -1) {
output.parts.unshift(createInternalAgentTextPart(notificationContent))
return
}
const originalText = output.parts[firstTextPartIndex].text ?? ""
output.parts[firstTextPartIndex].text = `${notificationContent}\n\n---\n\n${originalText}`
}
/**
* Validates that a session has actual assistant/tool output before marking complete.
* Prevents premature completion when session.idle fires before agent responds.
@@ -1340,6 +1368,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
taskId: task.id,
parentSessionID: task.parentSessionID,
})
this.queuePendingNotification(task.parentSessionID, notification)
} else {
log("[background-agent] Failed to send notification:", error)
}
@@ -1568,6 +1597,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
this.concurrencyManager.clear()
this.tasks.clear()
this.notifications.clear()
this.pendingNotifications.clear()
this.pendingByParent.clear()
this.notificationQueueByParent.clear()
this.queuesByKey.clear()

View File

@@ -269,6 +269,71 @@ describe("boulder-state", () => {
expect(progress.isComplete).toBe(false)
})
test("should count space-indented unchecked checkbox", () => {
// given - plan file with a two-space indented checkbox
const planPath = join(TEST_DIR, "space-indented-plan.md")
writeFileSync(planPath, `# Plan
- [ ] indented task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(1)
expect(progress.completed).toBe(0)
expect(progress.isComplete).toBe(false)
})
test("should count tab-indented unchecked checkbox", () => {
// given - plan file with a tab-indented checkbox
const planPath = join(TEST_DIR, "tab-indented-plan.md")
writeFileSync(planPath, `# Plan
- [ ] tab-indented task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(1)
expect(progress.completed).toBe(0)
expect(progress.isComplete).toBe(false)
})
test("should count mixed top-level checked and indented unchecked checkboxes", () => {
// given - plan file with checked top-level and unchecked indented task
const planPath = join(TEST_DIR, "mixed-indented-plan.md")
writeFileSync(planPath, `# Plan
- [x] top-level completed task
- [ ] nested unchecked task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(2)
expect(progress.completed).toBe(1)
expect(progress.isComplete).toBe(false)
})
test("should count space-indented completed checkbox", () => {
// given - plan file with a two-space indented completed checkbox
const planPath = join(TEST_DIR, "indented-completed-plan.md")
writeFileSync(planPath, `# Plan
- [x] indented completed task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(1)
expect(progress.completed).toBe(1)
expect(progress.isComplete).toBe(true)
})
test("should return isComplete true when all checked", () => {
// given - all tasks completed
const planPath = join(TEST_DIR, "complete-plan.md")

View File

@@ -121,8 +121,8 @@ export function getPlanProgress(planPath: string): PlanProgress {
const content = readFileSync(planPath, "utf-8")
// Match markdown checkboxes: - [ ] or - [x] or - [X]
const uncheckedMatches = content.match(/^[-*]\s*\[\s*\]/gm) || []
const checkedMatches = content.match(/^[-*]\s*\[[xX]\]/gm) || []
const uncheckedMatches = content.match(/^\s*[-*]\s*\[\s*\]/gm) || []
const checkedMatches = content.match(/^\s*[-*]\s*\[[xX]\]/gm) || []
const total = uncheckedMatches.length + checkedMatches.length
const completed = checkedMatches.length
@@ -150,7 +150,8 @@ export function getPlanName(planPath: string): string {
export function createBoulderState(
planPath: string,
sessionId: string,
agent?: string
agent?: string,
worktreePath?: string,
): BoulderState {
return {
active_plan: planPath,
@@ -158,5 +159,6 @@ export function createBoulderState(
session_ids: [sessionId],
plan_name: getPlanName(planPath),
...(agent !== undefined ? { agent } : {}),
...(worktreePath !== undefined ? { worktree_path: worktreePath } : {}),
}
}

View File

@@ -16,6 +16,8 @@ export interface BoulderState {
plan_name: string
/** Agent type to use when resuming (e.g., 'atlas') */
agent?: string
/** Absolute path to the git worktree root where work happens */
worktree_path?: string
}
export interface PlanProgress {

View File

@@ -1,5 +1,14 @@
export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
## ARGUMENTS
- \`/start-work [plan-name] [--worktree <path>]\`
- \`plan-name\` (optional): name or partial match of the plan to start
- \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
- If specified and valid: hook pre-sets worktree_path in boulder.json
- If specified but invalid: you must run \`git worktree add <path> <branch>\` first
- If omitted: you MUST choose or create a worktree (see Worktree Setup below)
## WHAT TO DO
1. **Find available plans**: Search for Prometheus-generated plan files at \`.sisyphus/plans/\`
@@ -15,17 +24,24 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
- If ONE plan: auto-select it
- If MULTIPLE plans: show list with timestamps, ask user to select
4. **Create/Update boulder.json**:
4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json):
1. \`git worktree list --porcelain\` — see available worktrees
2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
4. All work happens inside that worktree directory
5. **Create/Update boulder.json**:
\`\`\`json
{
"active_plan": "/absolute/path/to/plan.md",
"started_at": "ISO_TIMESTAMP",
"session_ids": ["session_id_1", "session_id_2"],
"plan_name": "plan-name"
"plan_name": "plan-name",
"worktree_path": "/absolute/path/to/git/worktree"
}
\`\`\`
5. **Read the plan file** and start executing tasks according to atlas workflow
6. **Read the plan file** and start executing tasks according to atlas workflow
## OUTPUT FORMAT
@@ -49,6 +65,7 @@ Resuming Work Session
Active Plan: {plan-name}
Progress: {completed}/{total} tasks
Sessions: {count} (appending current session)
Worktree: {worktree_path}
Reading plan and continuing from last incomplete task...
\`\`\`
@@ -60,6 +77,7 @@ Starting Work Session
Plan: {plan-name}
Session ID: {session_id}
Started: {timestamp}
Worktree: {worktree_path}
Reading plan and beginning execution...
\`\`\`
@@ -68,5 +86,6 @@ Reading plan and beginning execution...
- The session_id is injected by the hook - use it directly
- Always update boulder.json BEFORE starting work
- Always set worktree_path in boulder.json before executing any tasks
- Read the FULL plan file before delegating any tasks
- Follow atlas delegation protocols (7-section format)`

View File

@@ -162,7 +162,7 @@ describe("TaskToastManager", () => {
description: "Task with category default model",
agent: "sisyphus-junior",
isBackground: false,
modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const },
modelInfo: { model: "google/gemini-3.1-pro", type: "category-default" as const },
}
// when - addTask is called

View File

@@ -6,7 +6,7 @@ export function getOrCreateRetryState(
): RetryState {
let state = autoCompactState.retryStateBySession.get(sessionID)
if (!state) {
state = { attempt: 0, lastAttemptTime: 0 }
state = { attempt: 0, lastAttemptTime: 0, firstAttemptTime: 0 }
autoCompactState.retryStateBySession.set(sessionID, state)
}
return state

View File

@@ -0,0 +1,122 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"
import { runSummarizeRetryStrategy } from "./summarize-retry-strategy"
import type { AutoCompactState, ParsedTokenLimitError, RetryState } from "./types"
import type { OhMyOpenCodeConfig } from "../../config"
type TimeoutCall = {
delay: number
}
function createAutoCompactState(): AutoCompactState {
return {
pendingCompact: new Set<string>(),
errorDataBySession: new Map<string, ParsedTokenLimitError>(),
retryStateBySession: new Map<string, RetryState>(),
truncateStateBySession: new Map(),
emptyContentAttemptBySession: new Map(),
compactionInProgress: new Set<string>(),
}
}
describe("runSummarizeRetryStrategy", () => {
const sessionID = "ses_retry_timeout"
const directory = "/tmp"
let autoCompactState: AutoCompactState
const summarizeMock = mock(() => Promise.resolve())
const showToastMock = mock(() => Promise.resolve())
const client = {
session: {
summarize: summarizeMock,
messages: mock(() => Promise.resolve({ data: [] })),
promptAsync: mock(() => Promise.resolve()),
revert: mock(() => Promise.resolve()),
},
tui: {
showToast: showToastMock,
},
}
beforeEach(() => {
autoCompactState = createAutoCompactState()
summarizeMock.mockReset()
showToastMock.mockReset()
summarizeMock.mockResolvedValue(undefined)
showToastMock.mockResolvedValue(undefined)
})
afterEach(() => {
globalThis.setTimeout = originalSetTimeout
})
const originalSetTimeout = globalThis.setTimeout
test("stops retries when total summarize timeout is exceeded", async () => {
//#given
autoCompactState.pendingCompact.add(sessionID)
autoCompactState.errorDataBySession.set(sessionID, {
currentTokens: 250000,
maxTokens: 200000,
errorType: "token_limit_exceeded",
})
autoCompactState.retryStateBySession.set(sessionID, {
attempt: 1,
lastAttemptTime: Date.now(),
firstAttemptTime: Date.now() - 130000,
})
//#when
await runSummarizeRetryStrategy({
sessionID,
msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
autoCompactState,
client: client as never,
directory,
pluginConfig: {} as OhMyOpenCodeConfig,
})
//#then
expect(summarizeMock).not.toHaveBeenCalled()
expect(autoCompactState.pendingCompact.has(sessionID)).toBe(false)
expect(autoCompactState.errorDataBySession.has(sessionID)).toBe(false)
expect(autoCompactState.retryStateBySession.has(sessionID)).toBe(false)
expect(showToastMock).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
title: "Auto Compact Timed Out",
}),
}),
)
})
test("caps retry delay by remaining total timeout window", async () => {
//#given
const timeoutCalls: TimeoutCall[] = []
globalThis.setTimeout = ((_: (...args: unknown[]) => void, delay?: number) => {
timeoutCalls.push({ delay: delay ?? 0 })
return 1 as unknown as ReturnType<typeof setTimeout>
}) as typeof setTimeout
autoCompactState.retryStateBySession.set(sessionID, {
attempt: 1,
lastAttemptTime: Date.now(),
firstAttemptTime: Date.now() - 119700,
})
summarizeMock.mockRejectedValueOnce(new Error("rate limited"))
//#when
await runSummarizeRetryStrategy({
sessionID,
msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
autoCompactState,
client: client as never,
directory,
pluginConfig: {} as OhMyOpenCodeConfig,
})
//#then
expect(timeoutCalls.length).toBe(1)
expect(timeoutCalls[0]!.delay).toBeGreaterThan(0)
expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(500)
})
})

View File

@@ -7,6 +7,8 @@ import { sanitizeEmptyMessagesBeforeSummarize } from "./message-builder"
import { fixEmptyMessages } from "./empty-content-recovery"
import { resolveCompactionModel } from "../shared/compaction-model-resolver"
const SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS = 120_000
export async function runSummarizeRetryStrategy(params: {
sessionID: string
msg: Record<string, unknown>
@@ -18,6 +20,27 @@ export async function runSummarizeRetryStrategy(params: {
messageIndex?: number
}): Promise<void> {
const retryState = getOrCreateRetryState(params.autoCompactState, params.sessionID)
const now = Date.now()
if (retryState.firstAttemptTime === 0) {
retryState.firstAttemptTime = now
}
const elapsedTimeMs = now - retryState.firstAttemptTime
if (elapsedTimeMs >= SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS) {
clearSessionState(params.autoCompactState, params.sessionID)
await params.client.tui
.showToast({
body: {
title: "Auto Compact Timed Out",
message: "Compaction retries exceeded the timeout window. Please start a new session.",
variant: "error",
duration: 5000,
},
})
.catch(() => {})
return
}
if (params.errorType?.includes("non-empty content")) {
const attempt = getEmptyContentAttempt(params.autoCompactState, params.sessionID)
@@ -52,6 +75,7 @@ export async function runSummarizeRetryStrategy(params: {
if (Date.now() - retryState.lastAttemptTime > 300000) {
retryState.attempt = 0
retryState.firstAttemptTime = Date.now()
params.autoCompactState.truncateStateBySession.delete(params.sessionID)
}
@@ -92,10 +116,26 @@ export async function runSummarizeRetryStrategy(params: {
})
return
} catch {
const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime)
if (remainingTimeMs <= 0) {
clearSessionState(params.autoCompactState, params.sessionID)
await params.client.tui
.showToast({
body: {
title: "Auto Compact Timed Out",
message: "Compaction retries exceeded the timeout window. Please start a new session.",
variant: "error",
duration: 5000,
},
})
.catch(() => {})
return
}
const delay =
RETRY_CONFIG.initialDelayMs *
Math.pow(RETRY_CONFIG.backoffFactor, retryState.attempt - 1)
const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs)
const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs, remainingTimeMs)
setTimeout(() => {
void runSummarizeRetryStrategy(params)

View File

@@ -11,6 +11,7 @@ export interface ParsedTokenLimitError {
export interface RetryState {
attempt: number
lastAttemptTime: number
firstAttemptTime: number
}
export interface TruncateState {

View File

@@ -14,6 +14,7 @@ export async function injectBoulderContinuation(input: {
remaining: number
total: number
agent?: string
worktreePath?: string
backgroundManager?: BackgroundManager
sessionState: SessionState
}): Promise<void> {
@@ -24,6 +25,7 @@ export async function injectBoulderContinuation(input: {
remaining,
total,
agent,
worktreePath,
backgroundManager,
sessionState,
} = input
@@ -37,9 +39,11 @@ export async function injectBoulderContinuation(input: {
return
}
const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
const prompt =
BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]`
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
worktreeContext
try {
log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining })
@@ -62,6 +66,7 @@ export async function injectBoulderContinuation(input: {
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
} catch (err) {
sessionState.promptFailureCount += 1
sessionState.lastFailureAt = Date.now()
log(`[${HOOK_NAME}] Boulder continuation failed`, {
sessionID,
error: String(err),

View File

@@ -10,6 +10,7 @@ import { getLastAgentFromSession } from "./session-last-agent"
import type { AtlasHookOptions, SessionState } from "./types"
const CONTINUATION_COOLDOWN_MS = 5000
const FAILURE_BACKOFF_MS = 5 * 60 * 1000
export function createAtlasEventHandler(input: {
ctx: PluginInput
@@ -53,6 +54,7 @@ export function createAtlasEventHandler(input: {
}
const state = getState(sessionID)
const now = Date.now()
if (state.lastEventWasAbortError) {
state.lastEventWasAbortError = false
@@ -61,11 +63,18 @@ export function createAtlasEventHandler(input: {
}
if (state.promptFailureCount >= 2) {
log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, {
sessionID,
promptFailureCount: state.promptFailureCount,
})
return
const timeSinceLastFailure = state.lastFailureAt !== undefined ? now - state.lastFailureAt : Number.POSITIVE_INFINITY
if (timeSinceLastFailure < FAILURE_BACKOFF_MS) {
log(`[${HOOK_NAME}] Skipped: continuation in backoff after repeated failures`, {
sessionID,
promptFailureCount: state.promptFailureCount,
backoffRemaining: FAILURE_BACKOFF_MS - timeSinceLastFailure,
})
return
}
state.promptFailureCount = 0
state.lastFailureAt = undefined
}
const backgroundManager = options?.backgroundManager
@@ -92,17 +101,15 @@ export function createAtlasEventHandler(input: {
const lastAgentKey = getAgentConfigKey(lastAgent ?? "")
const requiredAgent = getAgentConfigKey(boulderState.agent ?? "atlas")
const lastAgentMatchesRequired = lastAgentKey === requiredAgent
const boulderAgentWasNotExplicitlySet = boulderState.agent === undefined
const boulderAgentDefaultsToAtlas = requiredAgent === "atlas"
const lastAgentIsSisyphus = lastAgentKey === "sisyphus"
const allowSisyphusWhenDefaultAtlas = boulderAgentWasNotExplicitlySet && boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
const agentMatches = lastAgentMatchesRequired || allowSisyphusWhenDefaultAtlas
const allowSisyphusForAtlasBoulder = boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
const agentMatches = lastAgentMatchesRequired || allowSisyphusForAtlasBoulder
if (!agentMatches) {
log(`[${HOOK_NAME}] Skipped: last agent does not match boulder agent`, {
sessionID,
lastAgent: lastAgent ?? "unknown",
requiredAgent,
boulderAgentExplicitlySet: boulderState.agent !== undefined,
})
return
}
@@ -113,7 +120,6 @@ export function createAtlasEventHandler(input: {
return
}
const now = Date.now()
if (state.lastContinuationInjectedAt && now - state.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) {
log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, {
sessionID,
@@ -132,6 +138,7 @@ export function createAtlasEventHandler(input: {
remaining,
total: progress.total,
agent: boulderState.agent,
worktreePath: boulderState.worktree_path,
backgroundManager,
sessionState: state,
})

View File

@@ -933,8 +933,8 @@ describe("atlas hook", () => {
expect(callArgs.body.parts[0].text).toContain("2 remaining")
})
test("should not inject when last agent does not match boulder agent", async () => {
// given - boulder state with incomplete plan, but last agent does NOT match
test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
// given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
@@ -947,7 +947,7 @@ describe("atlas hook", () => {
}
writeBoulderState(TEST_DIR, state)
// given - last agent is NOT the boulder agent
// given - last agent is sisyphus (typical state right after /start-work)
cleanupMessageStorage(MAIN_SESSION_ID)
setupMessageStorage(MAIN_SESSION_ID, "sisyphus")
@@ -962,7 +962,39 @@ describe("atlas hook", () => {
},
})
// then - should NOT call prompt because agent does not match
// then - should call prompt because sisyphus is always allowed for atlas boulders
expect(mockInput._promptMock).toHaveBeenCalled()
})
test("should not inject when last agent is non-sisyphus and does not match boulder agent", async () => {
// given - boulder explicitly set to atlas, last agent is hephaestus (unrelated agent)
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
agent: "atlas",
}
writeBoulderState(TEST_DIR, state)
cleanupMessageStorage(MAIN_SESSION_ID)
setupMessageStorage(MAIN_SESSION_ID, "hephaestus")
const mockInput = createMockPluginInput()
const hook = createAtlasHook(mockInput)
// when
await hook.handler({
event: {
type: "session.idle",
properties: { sessionID: MAIN_SESSION_ID },
},
})
// then - should NOT call prompt because hephaestus does not match atlas or sisyphus
expect(mockInput._promptMock).not.toHaveBeenCalled()
})
@@ -1122,6 +1154,144 @@ describe("atlas hook", () => {
}
})
test("should keep skipping continuation during 5-minute backoff after 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - third idle occurs inside 5-minute backoff window
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 60000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - third attempt should still be skipped
expect(promptMock).toHaveBeenCalledTimes(2)
} finally {
Date.now = originalDateNow
}
})
test("should retry continuation after 5-minute backoff expires following 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - third idle occurs after 5+ minutes
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 300000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - third attempt should run after backoff expiration
expect(promptMock).toHaveBeenCalledTimes(3)
} finally {
Date.now = originalDateNow
}
})
test("should reset prompt failure counter after successful retry beyond backoff window", async () => {
//#given - boulder state with incomplete plan and success on first retry after backoff
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.resolve(undefined))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - fail twice, recover after backoff with success, then fail twice again
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 300000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - success retry resets counter, so two additional failures are allowed before skip
expect(promptMock).toHaveBeenCalledTimes(5)
} finally {
Date.now = originalDateNow
}
})
test("should reset continuation failure state on session.compacted event", async () => {
//#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md")

View File

@@ -26,4 +26,5 @@ export interface SessionState {
lastEventWasAbortError?: boolean
lastContinuationInjectedAt?: number
promptFailureCount: number
lastFailureAt?: number
}

View File

@@ -9,6 +9,14 @@ interface EventInput {
event: Event
}
interface ChatMessageInput {
sessionID: string
}
interface ChatMessageOutput {
parts: Array<{ type: string; text?: string; [key: string]: unknown }>
}
/**
* Background notification hook - handles event routing to BackgroundManager.
*
@@ -20,7 +28,15 @@ export function createBackgroundNotificationHook(manager: BackgroundManager) {
manager.handleEvent(event)
}
const chatMessageHandler = async (
input: ChatMessageInput,
output: ChatMessageOutput,
): Promise<void> => {
manager.injectPendingNotificationsIntoChatMessage(output, input.sessionID)
}
return {
"chat.message": chatMessageHandler,
event: eventHandler,
}
}

View File

@@ -6,6 +6,7 @@ import {
import { OMO_SESSION_PREFIX, buildSessionReminderMessage } from "./constants";
import type { InteractiveBashSessionState } from "./types";
import { subagentSessions } from "../../features/claude-code-session-state";
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
type AbortSession = (args: { path: { id: string } }) => Promise<unknown>
@@ -19,7 +20,7 @@ async function killAllTrackedSessions(
): Promise<void> {
for (const sessionName of state.tmuxSessions) {
try {
const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], {
const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
stdout: "ignore",
stderr: "ignore",
})

View File

@@ -1,6 +1,7 @@
import type { InteractiveBashSessionState } from "./types";
import { loadInteractiveBashSessionState } from "./storage";
import { OMO_SESSION_PREFIX } from "./constants";
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
export function getOrCreateState(sessionID: string, sessionStates: Map<string, InteractiveBashSessionState>): InteractiveBashSessionState {
if (!sessionStates.has(sessionID)) {
@@ -24,7 +25,7 @@ export async function killAllTrackedSessions(
): Promise<void> {
for (const sessionName of state.tmuxSessions) {
try {
const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], {
const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
stdout: "ignore",
stderr: "ignore",
});

View File

@@ -12,12 +12,16 @@ const TOAST_MESSAGE = [
].join("\n")
const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus")
function showToast(ctx: PluginInput, sessionID: string): void {
type NoHephaestusNonGptHookOptions = {
allowNonGptModel?: boolean
}
function showToast(ctx: PluginInput, sessionID: string, variant: "error" | "warning"): void {
ctx.client.tui.showToast({
body: {
title: TOAST_TITLE,
message: TOAST_MESSAGE,
variant: "error",
variant,
duration: 10000,
},
}).catch((error) => {
@@ -28,7 +32,10 @@ function showToast(ctx: PluginInput, sessionID: string): void {
})
}
export function createNoHephaestusNonGptHook(ctx: PluginInput) {
export function createNoHephaestusNonGptHook(
ctx: PluginInput,
options?: NoHephaestusNonGptHookOptions,
) {
return {
"chat.message": async (input: {
sessionID: string
@@ -40,9 +47,13 @@ export function createNoHephaestusNonGptHook(ctx: PluginInput) {
const rawAgent = input.agent ?? getSessionAgent(input.sessionID) ?? ""
const agentKey = getAgentConfigKey(rawAgent)
const modelID = input.model?.modelID
const allowNonGptModel = options?.allowNonGptModel === true
if (agentKey === "hephaestus" && modelID && !isGptModel(modelID)) {
showToast(ctx, input.sessionID)
showToast(ctx, input.sessionID, allowNonGptModel ? "warning" : "error")
if (allowNonGptModel) {
return
}
input.agent = SISYPHUS_DISPLAY
if (output?.message) {
output.message.agent = SISYPHUS_DISPLAY

View File

@@ -1,3 +1,5 @@
/// <reference types="bun-types" />
import { describe, expect, spyOn, test } from "bun:test"
import { _resetForTesting, updateSessionAgent } from "../../features/claude-code-session-state"
import { getAgentDisplayName } from "../../shared/agent-display-names"
@@ -8,7 +10,7 @@ const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus")
function createOutput() {
return {
message: {},
message: {} as { agent?: string; [key: string]: unknown },
parts: [],
}
}
@@ -16,7 +18,7 @@ function createOutput() {
describe("no-hephaestus-non-gpt hook", () => {
test("shows toast on every chat.message when hephaestus uses non-gpt model", async () => {
// given - hephaestus with claude model
const showToast = spyOn({ fn: async () => ({}) }, "fn")
const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } },
} as any)
@@ -49,9 +51,38 @@ describe("no-hephaestus-non-gpt hook", () => {
})
})
test("shows warning and does not switch agent when allow_non_gpt_model is enabled", async () => {
// given - hephaestus with claude model and opt-out enabled
const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } },
} as any, {
allowNonGptModel: true,
})
const output = createOutput()
// when - chat.message runs
await hook["chat.message"]?.({
sessionID: "ses_opt_out",
agent: HEPHAESTUS_DISPLAY,
model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
}, output)
// then - warning toast is shown but agent is not switched
expect(showToast).toHaveBeenCalledTimes(1)
expect(output.message.agent).toBeUndefined()
expect(showToast.mock.calls[0]?.[0]).toMatchObject({
body: {
title: "NEVER Use Hephaestus with Non-GPT",
variant: "warning",
},
})
})
test("does not show toast when hephaestus uses gpt model", async () => {
// given - hephaestus with gpt model
const showToast = spyOn({ fn: async () => ({}) }, "fn")
const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } },
} as any)
@@ -72,7 +103,7 @@ describe("no-hephaestus-non-gpt hook", () => {
test("does not show toast for non-hephaestus agent", async () => {
// given - sisyphus with claude model (non-gpt)
const showToast = spyOn({ fn: async () => ({}) }, "fn")
const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } },
} as any)
@@ -95,7 +126,7 @@ describe("no-hephaestus-non-gpt hook", () => {
// given - session agent saved as hephaestus
_resetForTesting()
updateSessionAgent("ses_4", HEPHAESTUS_DISPLAY)
const showToast = spyOn({ fn: async () => ({}) }, "fn")
const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } },
} as any)

View File

@@ -45,6 +45,23 @@ function createMockCtx() {
}
}
function setupImmediateTimeouts(): () => void {
const originalSetTimeout = globalThis.setTimeout
const originalClearTimeout = globalThis.clearTimeout
globalThis.setTimeout = ((callback: (...args: unknown[]) => void, _delay?: number, ...args: unknown[]) => {
callback(...args)
return 1 as unknown as ReturnType<typeof setTimeout>
}) as typeof setTimeout
globalThis.clearTimeout = (() => {}) as typeof clearTimeout
return () => {
globalThis.setTimeout = originalSetTimeout
globalThis.clearTimeout = originalClearTimeout
}
}
describe("preemptive-compaction", () => {
let ctx: ReturnType<typeof createMockCtx>
@@ -63,7 +80,7 @@ describe("preemptive-compaction", () => {
// #when tool.execute.after is called
// #then session.messages() should NOT be called
it("should use cached token info instead of fetching session.messages()", async () => {
const hook = createPreemptiveCompactionHook(ctx as never)
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_test1"
// Simulate message.updated with token info below threshold
@@ -101,7 +118,7 @@ describe("preemptive-compaction", () => {
// #when tool.execute.after is called
// #then should skip without fetching
it("should skip gracefully when no cached token info exists", async () => {
const hook = createPreemptiveCompactionHook(ctx as never)
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const output = { title: "", output: "test", metadata: null }
await hook["tool.execute.after"](
@@ -116,7 +133,7 @@ describe("preemptive-compaction", () => {
// #when tool.execute.after runs
// #then should trigger summarize
it("should trigger compaction when usage exceeds threshold", async () => {
const hook = createPreemptiveCompactionHook(ctx as never)
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_high"
// 170K input + 10K cache = 180K → 90% of 200K
@@ -153,7 +170,7 @@ describe("preemptive-compaction", () => {
it("should trigger compaction for google-vertex-anthropic provider", async () => {
//#given google-vertex-anthropic usage above threshold
const hook = createPreemptiveCompactionHook(ctx as never)
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_vertex_anthropic_high"
await hook.event({
@@ -191,7 +208,7 @@ describe("preemptive-compaction", () => {
// #given session deleted
// #then cache should be cleaned up
it("should clean up cache on session.deleted", async () => {
const hook = createPreemptiveCompactionHook(ctx as never)
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_del"
await hook.event({
@@ -228,7 +245,7 @@ describe("preemptive-compaction", () => {
it("should log summarize errors instead of swallowing them", async () => {
//#given
const hook = createPreemptiveCompactionHook(ctx as never)
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_log_error"
const summarizeError = new Error("summarize failed")
ctx.client.session.summarize.mockRejectedValueOnce(summarizeError)
@@ -343,4 +360,58 @@ describe("preemptive-compaction", () => {
//#then
expect(ctx.client.session.summarize).not.toHaveBeenCalled()
})
it("should clear in-progress lock when summarize times out", async () => {
//#given
const restoreTimeouts = setupImmediateTimeouts()
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_timeout"
ctx.client.session.summarize
.mockImplementationOnce(() => new Promise(() => {}))
.mockResolvedValueOnce({})
try {
await hook.event({
event: {
type: "message.updated",
properties: {
info: {
role: "assistant",
sessionID,
providerID: "anthropic",
modelID: "claude-sonnet-4-6",
finish: true,
tokens: {
input: 170000,
output: 0,
reasoning: 0,
cache: { read: 10000, write: 0 },
},
},
},
},
})
//#when
await hook["tool.execute.after"](
{ tool: "bash", sessionID, callID: "call_timeout_1" },
{ title: "", output: "test", metadata: null },
)
await hook["tool.execute.after"](
{ tool: "bash", sessionID, callID: "call_timeout_2" },
{ title: "", output: "test", metadata: null },
)
//#then
expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2)
expect(logMock).toHaveBeenCalledWith("[preemptive-compaction] Compaction failed", {
sessionID,
error: expect.stringContaining("Compaction summarize timed out"),
})
} finally {
restoreTimeouts()
}
})
})

View File

@@ -3,6 +3,7 @@ import type { OhMyOpenCodeConfig } from "../config"
import { resolveCompactionModel } from "./shared/compaction-model-resolver"
const DEFAULT_ACTUAL_LIMIT = 200_000
const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000
type ModelCacheStateLike = {
anthropicContext1MEnabled: boolean
@@ -31,6 +32,26 @@ interface CachedCompactionState {
tokens: TokenInfo
}
function withTimeout<TValue>(
promise: Promise<TValue>,
timeoutMs: number,
errorMessage: string,
): Promise<TValue> {
let timeoutID: ReturnType<typeof setTimeout> | undefined
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutID = setTimeout(() => {
reject(new Error(errorMessage))
}, timeoutMs)
})
return Promise.race([promise, timeoutPromise]).finally(() => {
if (timeoutID !== undefined) {
clearTimeout(timeoutID)
}
})
}
function isAnthropicProvider(providerID: string): boolean {
return providerID === "anthropic" || providerID === "google-vertex-anthropic"
}
@@ -94,11 +115,15 @@ export function createPreemptiveCompactionHook(
modelID
)
await ctx.client.session.summarize({
path: { id: sessionID },
body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never,
query: { directory: ctx.directory },
})
await withTimeout(
ctx.client.session.summarize({
path: { id: sessionID },
body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never,
query: { directory: ctx.directory },
}),
PREEMPTIVE_COMPACTION_TIMEOUT_MS,
`Compaction summarize timed out after ${PREEMPTIVE_COMPACTION_TIMEOUT_MS}ms`,
)
compactedSessions.add(sessionID)
} catch (error) {

View File

@@ -79,8 +79,8 @@ export async function detectCompletionInSessionMessages(
if (assistantMessages.length === 0) return false
const pattern = buildPromisePattern(options.promise)
const recentAssistants = assistantMessages.slice(-3)
for (const assistant of recentAssistants) {
for (let index = assistantMessages.length - 1; index >= 0; index -= 1) {
const assistant = assistantMessages[index]
if (!assistant.parts) continue
let responseText = ""

View File

@@ -494,6 +494,7 @@ describe("ralph-loop", () => {
config: {
enabled: true,
default_max_iterations: 200,
default_strategy: "continue",
},
})
@@ -708,6 +709,57 @@ describe("ralph-loop", () => {
expect(promptCalls[0].text).toContain("<promise>CALCULATOR_DONE</promise>")
})
test("should skip concurrent idle events for same session when handler is in flight", async () => {
// given - active loop with delayed prompt injection
let releasePromptAsync: (() => void) | undefined
const promptAsyncBlocked = new Promise<void>((resolve) => {
releasePromptAsync = resolve
})
let firstPromptStartedResolve: (() => void) | undefined
const firstPromptStarted = new Promise<void>((resolve) => {
firstPromptStartedResolve = resolve
})
const mockInput = createMockPluginInput() as {
client: {
session: {
promptAsync: (opts: { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } }) => Promise<unknown>
}
}
}
const originalPromptAsync = mockInput.client.session.promptAsync
let promptAsyncCalls = 0
mockInput.client.session.promptAsync = async (opts) => {
promptAsyncCalls += 1
if (promptAsyncCalls === 1) {
firstPromptStartedResolve?.()
}
await promptAsyncBlocked
return originalPromptAsync(opts)
}
const hook = createRalphLoopHook(mockInput as Parameters<typeof createRalphLoopHook>[0])
hook.startLoop("session-123", "Build feature", { maxIterations: 10 })
// when - second idle arrives while first idle processing is still in flight
const firstIdle = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
await firstPromptStarted
const secondIdle = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
releasePromptAsync?.()
await Promise.all([firstIdle, secondIdle])
// then - only one continuation should be injected
expect(promptAsyncCalls).toBe(1)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
test("should clear loop state on user abort (MessageAbortedError)", async () => {
// given - active loop
const hook = createRalphLoopHook(createMockPluginInput())
@@ -782,8 +834,8 @@ describe("ralph-loop", () => {
expect(hook.getState()).toBeNull()
})
test("should NOT detect completion if promise is older than last 3 assistant messages", async () => {
// given - promise appears in an assistant message older than last 3
test("should detect completion even when promise is older than previous narrow window", async () => {
// given - promise appears in an older assistant message with additional assistant output after it
mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] },
@@ -801,9 +853,40 @@ describe("ralph-loop", () => {
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
// then - loop should continue (promise is older than last 3 assistant messages)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
// then - loop should complete because all assistant messages are scanned
expect(promptCalls.length).toBe(0)
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
expect(hook.getState()).toBeNull()
})
test("should detect completion when many assistant messages are emitted after promise", async () => {
// given - completion promise followed by long assistant output sequence
mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done now <promise>DONE</promise>" }] },
]
for (let index = 1; index <= 25; index += 1) {
mockSessionMessages.push({
info: { role: "assistant" },
parts: [{ type: "text", text: `Post-completion assistant output ${index}` }],
})
}
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
})
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
// when - session goes idle
await hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
// then - loop should complete despite large trailing output
expect(promptCalls.length).toBe(0)
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
expect(hook.getState()).toBeNull()
})
test("should allow starting new loop while previous loop is active (different session)", async () => {

View File

@@ -33,15 +33,6 @@ export async function continueIteration(
return
}
const boundState = options.loopState.setSessionID(newSessionID)
if (!boundState) {
log(`[${HOOK_NAME}] Failed to bind loop state to new session`, {
previousSessionID: options.previousSessionID,
newSessionID,
})
return
}
await injectContinuationPrompt(ctx, {
sessionID: newSessionID,
inheritFromSessionID: options.previousSessionID,
@@ -51,6 +42,16 @@ export async function continueIteration(
})
await selectSessionInTui(ctx.client, newSessionID)
const boundState = options.loopState.setSessionID(newSessionID)
if (!boundState) {
log(`[${HOOK_NAME}] Failed to bind loop state to new session`, {
previousSessionID: options.previousSessionID,
newSessionID,
})
return
}
return
}

View File

@@ -25,6 +25,8 @@ export function createRalphLoopEventHandler(
ctx: PluginInput,
options: RalphLoopEventHandlerOptions,
) {
const inFlightSessions = new Set<string>()
return async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
const props = event.properties as Record<string, unknown> | undefined
@@ -32,115 +34,127 @@ export function createRalphLoopEventHandler(
const sessionID = props?.sessionID as string | undefined
if (!sessionID) return
if (options.sessionRecovery.isRecovering(sessionID)) {
log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID })
if (inFlightSessions.has(sessionID)) {
log(`[${HOOK_NAME}] Skipped: handler in flight`, { sessionID })
return
}
const state = options.loopState.getState()
if (!state || !state.active) {
return
}
if (state.session_id && state.session_id !== sessionID) {
if (options.checkSessionExists) {
try {
const exists = await options.checkSessionExists(state.session_id)
if (!exists) {
options.loopState.clear()
log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, {
orphanedSessionId: state.session_id,
currentSessionId: sessionID,
})
return
}
} catch (err) {
log(`[${HOOK_NAME}] Failed to check session existence`, {
sessionId: state.session_id,
error: String(err),
})
}
}
return
}
const transcriptPath = options.getTranscriptPath(sessionID)
const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise)
const completionViaApi = completionViaTranscript
? false
: await detectCompletionInSessionMessages(ctx, {
sessionID,
promise: state.completion_promise,
apiTimeoutMs: options.apiTimeoutMs,
directory: options.directory,
})
if (completionViaTranscript || completionViaApi) {
log(`[${HOOK_NAME}] Completion detected!`, {
sessionID,
iteration: state.iteration,
promise: state.completion_promise,
detectedVia: completionViaTranscript
? "transcript_file"
: "session_messages_api",
})
options.loopState.clear()
const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!"
const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)`
await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {})
return
}
if (state.iteration >= state.max_iterations) {
log(`[${HOOK_NAME}] Max iterations reached`, {
sessionID,
iteration: state.iteration,
max: state.max_iterations,
})
options.loopState.clear()
await ctx.client.tui?.showToast?.({
body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 },
}).catch(() => {})
return
}
const newState = options.loopState.incrementIteration()
if (!newState) {
log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID })
return
}
log(`[${HOOK_NAME}] Continuing loop`, {
sessionID,
iteration: newState.iteration,
max: newState.max_iterations,
})
await ctx.client.tui?.showToast?.({
body: {
title: "Ralph Loop",
message: `Iteration ${newState.iteration}/${newState.max_iterations}`,
variant: "info",
duration: 2000,
},
}).catch(() => {})
inFlightSessions.add(sessionID)
try {
await continueIteration(ctx, newState, {
previousSessionID: sessionID,
directory: options.directory,
apiTimeoutMs: options.apiTimeoutMs,
loopState: options.loopState,
})
} catch (err) {
log(`[${HOOK_NAME}] Failed to inject continuation`, {
if (options.sessionRecovery.isRecovering(sessionID)) {
log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID })
return
}
const state = options.loopState.getState()
if (!state || !state.active) {
return
}
if (state.session_id && state.session_id !== sessionID) {
if (options.checkSessionExists) {
try {
const exists = await options.checkSessionExists(state.session_id)
if (!exists) {
options.loopState.clear()
log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, {
orphanedSessionId: state.session_id,
currentSessionId: sessionID,
})
return
}
} catch (err) {
log(`[${HOOK_NAME}] Failed to check session existence`, {
sessionId: state.session_id,
error: String(err),
})
}
}
return
}
const transcriptPath = options.getTranscriptPath(sessionID)
const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise)
const completionViaApi = completionViaTranscript
? false
: await detectCompletionInSessionMessages(ctx, {
sessionID,
promise: state.completion_promise,
apiTimeoutMs: options.apiTimeoutMs,
directory: options.directory,
})
if (completionViaTranscript || completionViaApi) {
log(`[${HOOK_NAME}] Completion detected!`, {
sessionID,
iteration: state.iteration,
promise: state.completion_promise,
detectedVia: completionViaTranscript
? "transcript_file"
: "session_messages_api",
})
options.loopState.clear()
const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!"
const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)`
await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {})
return
}
if (state.iteration >= state.max_iterations) {
log(`[${HOOK_NAME}] Max iterations reached`, {
sessionID,
iteration: state.iteration,
max: state.max_iterations,
})
options.loopState.clear()
await ctx.client.tui?.showToast?.({
body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 },
}).catch(() => {})
return
}
const newState = options.loopState.incrementIteration()
if (!newState) {
log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID })
return
}
log(`[${HOOK_NAME}] Continuing loop`, {
sessionID,
error: String(err),
iteration: newState.iteration,
max: newState.max_iterations,
})
await ctx.client.tui?.showToast?.({
body: {
title: "Ralph Loop",
message: `Iteration ${newState.iteration}/${newState.max_iterations}`,
variant: "info",
duration: 2000,
},
}).catch(() => {})
try {
await continueIteration(ctx, newState, {
previousSessionID: sessionID,
directory: options.directory,
apiTimeoutMs: options.apiTimeoutMs,
loopState: options.loopState,
})
} catch (err) {
log(`[${HOOK_NAME}] Failed to inject continuation`, {
sessionID,
error: String(err),
})
}
return
} finally {
inFlightSessions.delete(sessionID)
}
return
}
if (event.type === "session.deleted") {

View File

@@ -0,0 +1,111 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test"
import { createRalphLoopHook } from "./index"
function createDeferred(): {
promise: Promise<void>
resolve: () => void
} {
let resolvePromise: (() => void) | null = null
const promise = new Promise<void>((resolve) => {
resolvePromise = resolve
})
return {
promise,
resolve: () => {
if (resolvePromise) {
resolvePromise()
}
},
}
}
async function waitUntil(condition: () => boolean): Promise<void> {
for (let index = 0; index < 100; index++) {
if (condition()) {
return
}
await new Promise<void>((resolve) => {
setTimeout(resolve, 0)
})
}
throw new Error("Condition was not met in time")
}
describe("ralph-loop reset strategy race condition", () => {
test("should skip duplicate idle while reset iteration handling is in flight", async () => {
// given - reset strategy loop with blocked TUI session switch
const promptCalls: Array<{ sessionID: string; text: string }> = []
const createSessionCalls: Array<{ parentID?: string }> = []
let selectSessionCalls = 0
const selectSessionDeferred = createDeferred()
const hook = createRalphLoopHook({
directory: process.cwd(),
client: {
session: {
prompt: async (options: {
path: { id: string }
body: { parts: Array<{ type: string; text: string }> }
}) => {
promptCalls.push({
sessionID: options.path.id,
text: options.body.parts[0].text,
})
return {}
},
promptAsync: async (options: {
path: { id: string }
body: { parts: Array<{ type: string; text: string }> }
}) => {
promptCalls.push({
sessionID: options.path.id,
text: options.body.parts[0].text,
})
return {}
},
create: async (options: {
body: { parentID?: string; title?: string }
query?: { directory?: string }
}) => {
createSessionCalls.push({ parentID: options.body.parentID })
return { data: { id: `new-session-${createSessionCalls.length}` } }
},
messages: async () => ({ data: [] }),
},
tui: {
showToast: async () => ({}),
selectSession: async () => {
selectSessionCalls += 1
await selectSessionDeferred.promise
return {}
},
},
},
} as unknown as Parameters<typeof createRalphLoopHook>[0])
hook.startLoop("session-old", "Build feature", { strategy: "reset" })
// when - first idle is in-flight and old session fires idle again before TUI switch resolves
const firstIdleEvent = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-old" } },
})
await waitUntil(() => selectSessionCalls > 0)
const secondIdleEvent = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-old" } },
})
selectSessionDeferred.resolve()
await Promise.all([firstIdleEvent, secondIdleEvent])
// then - duplicate idle should be skipped to prevent concurrent continuation injection
expect(createSessionCalls.length).toBe(1)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
})

View File

@@ -125,7 +125,7 @@ describe("runtime-fallback", () => {
await hook.event({
event: {
type: "session.created",
properties: { info: { id: sessionID, model: "google/gemini-3-pro" } },
properties: { info: { id: sessionID, model: "google/gemini-3.1-pro" } },
},
})
@@ -1841,7 +1841,7 @@ describe("runtime-fallback", () => {
test("should apply fallback model on next chat.message after error", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig({ notify_on_fallback: false }),
pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3-pro"]),
pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3.1-pro"]),
})
const sessionID = "test-session-switch"
SessionCategoryRegistry.register(sessionID, "test")
@@ -1916,7 +1916,7 @@ describe("runtime-fallback", () => {
const input = createMockPluginInput()
const hook = createRuntimeFallbackHook(input, {
config: createMockConfig({ notify_on_fallback: false }),
pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3-pro"]),
pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3.1-pro"]),
})
const sessionID = "test-agent-fallback"

View File

@@ -3,6 +3,7 @@ const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:te
const { createSessionNotification } = require("./session-notification")
const { setMainSession, subagentSessions, _resetForTesting } = require("../features/claude-code-session-state")
const utils = require("./session-notification-utils")
const sender = require("./session-notification-sender")
describe("session-notification input-needed events", () => {
let notificationCalls: string[]
@@ -37,6 +38,10 @@ describe("session-notification input-needed events", () => {
spyOn(utils, "getNotifySendPath").mockResolvedValue("/usr/bin/notify-send")
spyOn(utils, "getPowershellPath").mockResolvedValue("powershell")
spyOn(utils, "startBackgroundCheck").mockImplementation(() => {})
spyOn(sender, "detectPlatform").mockReturnValue("darwin")
spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx: unknown, _platform: unknown, _title: unknown, message: string) => {
notificationCalls.push(message)
})
})
afterEach(() => {
@@ -47,7 +52,7 @@ describe("session-notification input-needed events", () => {
test("sends question notification when question tool asks for input", async () => {
const sessionID = "main-question"
setMainSession(sessionID)
const hook = createSessionNotification(createMockPluginInput())
const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false })
await hook({
event: {
@@ -74,7 +79,7 @@ describe("session-notification input-needed events", () => {
test("sends permission notification for permission events", async () => {
const sessionID = "main-permission"
setMainSession(sessionID)
const hook = createSessionNotification(createMockPluginInput())
const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false })
await hook({
event: {

View File

@@ -1,8 +1,9 @@
import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:test")
import { createSessionNotification } from "./session-notification"
import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state"
import * as utils from "./session-notification-utils"
import * as sender from "./session-notification-sender"
describe("session-notification", () => {
let notificationCalls: string[]
@@ -40,6 +41,10 @@ describe("session-notification", () => {
spyOn(utils, "getPaplayPath").mockResolvedValue("/usr/bin/paplay")
spyOn(utils, "getAplayPath").mockResolvedValue("/usr/bin/aplay")
spyOn(utils, "startBackgroundCheck").mockImplementation(() => {})
spyOn(sender, "detectPlatform").mockReturnValue("darwin")
spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx, _platform, _title, message) => {
notificationCalls.push(message)
})
})
afterEach(() => {
@@ -105,6 +110,7 @@ describe("session-notification", () => {
const hook = createSessionNotification(createMockPluginInput(), {
idleConfirmationDelay: 10,
skipIfIncompleteTodos: false,
enforceMainSessionFilter: false,
})
// when - main session goes idle
@@ -332,6 +338,7 @@ describe("session-notification", () => {
const hook = createSessionNotification(createMockPluginInput(), {
idleConfirmationDelay: 10,
skipIfIncompleteTodos: false,
enforceMainSessionFilter: false,
})
// when - session goes idle twice

View File

@@ -4,11 +4,9 @@ import {
startBackgroundCheck,
} from "./session-notification-utils"
import {
detectPlatform,
getDefaultSoundPath,
playSessionNotificationSound,
sendSessionNotification,
type Platform,
} from "./session-notification-sender"
import * as sessionNotificationSender from "./session-notification-sender"
import { hasIncompleteTodos } from "./session-todo-status"
import { createIdleNotificationScheduler } from "./session-notification-scheduler"
@@ -25,13 +23,14 @@ interface SessionNotificationConfig {
skipIfIncompleteTodos?: boolean
/** Maximum number of sessions to track before cleanup (default: 100) */
maxTrackedSessions?: number
enforceMainSessionFilter?: boolean
}
export function createSessionNotification(
ctx: PluginInput,
config: SessionNotificationConfig = {}
) {
const currentPlatform = detectPlatform()
const defaultSoundPath = getDefaultSoundPath(currentPlatform)
const currentPlatform: Platform = sessionNotificationSender.detectPlatform()
const defaultSoundPath = sessionNotificationSender.getDefaultSoundPath(currentPlatform)
startBackgroundCheck(currentPlatform)
@@ -45,6 +44,7 @@ export function createSessionNotification(
idleConfirmationDelay: 1500,
skipIfIncompleteTodos: true,
maxTrackedSessions: 100,
enforceMainSessionFilter: true,
...config,
}
@@ -53,8 +53,8 @@ export function createSessionNotification(
platform: currentPlatform,
config: mergedConfig,
hasIncompleteTodos,
send: sendSessionNotification,
playSound: playSessionNotificationSound,
send: sessionNotificationSender.sendSessionNotification,
playSound: sessionNotificationSender.playSessionNotificationSound,
})
const QUESTION_TOOLS = new Set(["question", "ask_user_question", "askuserquestion"])
@@ -81,8 +81,10 @@ export function createSessionNotification(
const shouldNotifyForSession = (sessionID: string): boolean => {
if (subagentSessions.has(sessionID)) return false
const mainSessionID = getMainSessionID()
if (mainSessionID && sessionID !== mainSessionID) return false
if (mergedConfig.enforceMainSessionFilter) {
const mainSessionID = getMainSessionID()
if (mainSessionID && sessionID !== mainSessionID) return false
}
return true
}
@@ -146,9 +148,14 @@ export function createSessionNotification(
if (!shouldNotifyForSession(sessionID)) return
scheduler.markSessionActivity(sessionID)
await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, mergedConfig.permissionMessage)
await sessionNotificationSender.sendSessionNotification(
ctx,
currentPlatform,
mergedConfig.title,
mergedConfig.permissionMessage,
)
if (mergedConfig.playSound && mergedConfig.soundPath) {
await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
}
return
}
@@ -168,9 +175,9 @@ export function createSessionNotification(
? mergedConfig.permissionMessage
: mergedConfig.questionMessage
await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message)
await sessionNotificationSender.sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message)
if (mergedConfig.playSound && mergedConfig.soundPath) {
await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
}
}
}

View File

@@ -7,9 +7,12 @@ import { createStartWorkHook } from "./index"
import {
writeBoulderState,
clearBoulderState,
readBoulderState,
} from "../../features/boulder-state"
import type { BoulderState } from "../../features/boulder-state"
import * as sessionState from "../../features/claude-code-session-state"
import * as worktreeDetector from "./worktree-detector"
import * as worktreeDetector from "./worktree-detector"
describe("start-work hook", () => {
let testDir: string
@@ -402,4 +405,152 @@ describe("start-work hook", () => {
updateSpy.mockRestore()
})
})
describe("worktree support", () => {
let detectSpy: ReturnType<typeof spyOn>
beforeEach(() => {
detectSpy = spyOn(worktreeDetector, "detectWorktreePath").mockReturnValue(null)
})
afterEach(() => {
detectSpy.mockRestore()
})
test("should inject model-decides instructions when no --worktree flag", async () => {
// given - single plan, no worktree flag
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context></session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - model-decides instructions should appear
expect(output.parts[0].text).toContain("Worktree Setup Required")
expect(output.parts[0].text).toContain("git worktree list --porcelain")
expect(output.parts[0].text).toContain("git worktree add")
})
test("should inject worktree path when --worktree flag is valid", async () => {
// given - single plan + valid worktree path
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
detectSpy.mockReturnValue("/validated/worktree")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /validated/worktree</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - validated path shown, no model-decides instructions
expect(output.parts[0].text).toContain("**Worktree**: /validated/worktree")
expect(output.parts[0].text).not.toContain("Worktree Setup Required")
})
test("should store worktree_path in boulder when --worktree is valid", async () => {
// given - plan + valid worktree
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
detectSpy.mockReturnValue("/valid/wt")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /valid/wt</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - boulder.json has worktree_path
const state = readBoulderState(testDir)
expect(state?.worktree_path).toBe("/valid/wt")
})
test("should NOT store worktree_path when --worktree path is invalid", async () => {
// given - plan + invalid worktree path (detectWorktreePath returns null)
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
// detectSpy already returns null by default
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /nonexistent/wt</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - worktree_path absent, setup instructions present
const state = readBoulderState(testDir)
expect(state?.worktree_path).toBeUndefined()
expect(output.parts[0].text).toContain("needs setup")
expect(output.parts[0].text).toContain("git worktree add /nonexistent/wt")
})
test("should update boulder worktree_path on resume when new --worktree given", async () => {
// given - existing boulder with old worktree, user provides new worktree
const planPath = join(testDir, "plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
const existingState: BoulderState = {
active_plan: planPath,
started_at: "2026-01-01T00:00:00Z",
session_ids: ["old-session"],
plan_name: "plan",
worktree_path: "/old/wt",
}
writeBoulderState(testDir, existingState)
detectSpy.mockReturnValue("/new/wt")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /new/wt</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-456" }, output)
// then - boulder reflects updated worktree and new session appended
const state = readBoulderState(testDir)
expect(state?.worktree_path).toBe("/new/wt")
expect(state?.session_ids).toContain("session-456")
})
test("should show existing worktree on resume when no --worktree flag", async () => {
// given - existing boulder already has worktree_path, no flag given
const planPath = join(testDir, "plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
const existingState: BoulderState = {
active_plan: planPath,
started_at: "2026-01-01T00:00:00Z",
session_ids: ["old-session"],
plan_name: "plan",
worktree_path: "/existing/wt",
}
writeBoulderState(testDir, existingState)
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context></session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-789" }, output)
// then - shows existing worktree, no model-decides instructions
expect(output.parts[0].text).toContain("/existing/wt")
expect(output.parts[0].text).not.toContain("Worktree Setup Required")
})
})
})

View File

@@ -1 +1,4 @@
export { HOOK_NAME, createStartWorkHook } from "./start-work-hook"
export { detectWorktreePath } from "./worktree-detector"
export type { ParsedUserRequest } from "./parse-user-request"
export { parseUserRequest } from "./parse-user-request"

View File

@@ -0,0 +1,78 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test"
import { parseUserRequest } from "./parse-user-request"
describe("parseUserRequest", () => {
describe("when no user-request tag", () => {
test("#given prompt without tag #when parsing #then returns nulls", () => {
const result = parseUserRequest("Just a regular message without any tags")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when user-request tag is empty", () => {
test("#given empty user-request tag #when parsing #then returns nulls", () => {
const result = parseUserRequest("<user-request> </user-request>")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when only plan name given", () => {
test("#given plan name without worktree flag #when parsing #then returns plan name with null worktree", () => {
const result = parseUserRequest("<session-context>\n<user-request>my-plan</user-request>\n</session-context>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when only --worktree flag given", () => {
test("#given --worktree with path only #when parsing #then returns worktree path with null plan", () => {
const result = parseUserRequest("<user-request>--worktree /home/user/repo-feat</user-request>")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBe("/home/user/repo-feat")
})
})
describe("when plan name and --worktree are both given", () => {
test("#given plan name before --worktree #when parsing #then returns both", () => {
const result = parseUserRequest("<user-request>my-plan --worktree /path/to/worktree</user-request>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBe("/path/to/worktree")
})
test("#given --worktree before plan name #when parsing #then returns both", () => {
const result = parseUserRequest("<user-request>--worktree /path/to/worktree my-plan</user-request>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBe("/path/to/worktree")
})
})
describe("when --worktree flag has no path", () => {
test("#given --worktree without path #when parsing #then worktree path is null", () => {
const result = parseUserRequest("<user-request>--worktree</user-request>")
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when ultrawork keywords are present", () => {
test("#given plan name with ultrawork keyword #when parsing #then strips keyword from plan name", () => {
const result = parseUserRequest("<user-request>my-plan ultrawork</user-request>")
expect(result.planName).toBe("my-plan")
})
test("#given plan name with ulw keyword and worktree #when parsing #then strips ulw, preserves worktree", () => {
const result = parseUserRequest("<user-request>my-plan ulw --worktree /path/to/wt</user-request>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBe("/path/to/wt")
})
test("#given only ultrawork keyword with worktree #when parsing #then plan name is null, worktree preserved", () => {
const result = parseUserRequest("<user-request>ultrawork --worktree /wt</user-request>")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBe("/wt")
})
})
})

View File

@@ -0,0 +1,29 @@
const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi
const WORKTREE_FLAG_PATTERN = /--worktree(?:\s+(\S+))?/
export interface ParsedUserRequest {
planName: string | null
explicitWorktreePath: string | null
}
export function parseUserRequest(promptText: string): ParsedUserRequest {
const match = promptText.match(/<user-request>\s*([\s\S]*?)\s*<\/user-request>/i)
if (!match) return { planName: null, explicitWorktreePath: null }
let rawArg = match[1].trim()
if (!rawArg) return { planName: null, explicitWorktreePath: null }
const worktreeMatch = rawArg.match(WORKTREE_FLAG_PATTERN)
const explicitWorktreePath = worktreeMatch ? (worktreeMatch[1] ?? null) : null
if (worktreeMatch) {
rawArg = rawArg.replace(worktreeMatch[0], "").trim()
}
const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim()
return {
planName: cleanedArg || null,
explicitWorktreePath,
}
}

View File

@@ -1,3 +1,4 @@
import { statSync } from "node:fs"
import type { PluginInput } from "@opencode-ai/plugin"
import {
readBoulderState,
@@ -11,11 +12,11 @@ import {
} from "../../features/boulder-state"
import { log } from "../../shared/logger"
import { updateSessionAgent } from "../../features/claude-code-session-state"
import { detectWorktreePath } from "./worktree-detector"
import { parseUserRequest } from "./parse-user-request"
export const HOOK_NAME = "start-work" as const
const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi
interface StartWorkHookInput {
sessionID: string
messageID?: string
@@ -25,73 +26,76 @@ interface StartWorkHookOutput {
parts: Array<{ type: string; text?: string }>
}
function extractUserRequestPlanName(promptText: string): string | null {
const userRequestMatch = promptText.match(/<user-request>\s*([\s\S]*?)\s*<\/user-request>/i)
if (!userRequestMatch) return null
const rawArg = userRequestMatch[1].trim()
if (!rawArg) return null
const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim()
return cleanedArg || null
}
function findPlanByName(plans: string[], requestedName: string): string | null {
const lowerName = requestedName.toLowerCase()
const exactMatch = plans.find(p => getPlanName(p).toLowerCase() === lowerName)
const exactMatch = plans.find((p) => getPlanName(p).toLowerCase() === lowerName)
if (exactMatch) return exactMatch
const partialMatch = plans.find(p => getPlanName(p).toLowerCase().includes(lowerName))
const partialMatch = plans.find((p) => getPlanName(p).toLowerCase().includes(lowerName))
return partialMatch || null
}
const MODEL_DECIDES_WORKTREE_BLOCK = `
## Worktree Setup Required
No worktree specified. Before starting work, you MUST choose or create one:
1. \`git worktree list --porcelain\` — list existing worktrees
2. Create if needed: \`git worktree add <absolute-path> <branch-or-HEAD>\`
3. Update \`.sisyphus/boulder.json\` — add \`"worktree_path": "<absolute-path>"\`
4. Work exclusively inside that worktree directory`
function resolveWorktreeContext(
explicitWorktreePath: string | null,
): { worktreePath: string | undefined; block: string } {
if (explicitWorktreePath === null) {
return { worktreePath: undefined, block: MODEL_DECIDES_WORKTREE_BLOCK }
}
const validatedPath = detectWorktreePath(explicitWorktreePath)
if (validatedPath) {
return { worktreePath: validatedPath, block: `\n**Worktree**: ${validatedPath}` }
}
return {
worktreePath: undefined,
block: `\n**Worktree** (needs setup): \`git worktree add ${explicitWorktreePath} <branch>\`, then add \`"worktree_path"\` to boulder.json`,
}
}
export function createStartWorkHook(ctx: PluginInput) {
return {
"chat.message": async (
input: StartWorkHookInput,
output: StartWorkHookOutput
): Promise<void> => {
"chat.message": async (input: StartWorkHookInput, output: StartWorkHookOutput): Promise<void> => {
const parts = output.parts
const promptText = parts
?.filter((p) => p.type === "text" && p.text)
.map((p) => p.text)
.join("\n")
.trim() || ""
const promptText =
parts
?.filter((p) => p.type === "text" && p.text)
.map((p) => p.text)
.join("\n")
.trim() || ""
// Only trigger on actual command execution (contains <session-context> tag)
// NOT on description text like "Start Sisyphus work session from Prometheus plan"
const isStartWorkCommand = promptText.includes("<session-context>")
if (!promptText.includes("<session-context>")) return
if (!isStartWorkCommand) {
return
}
log(`[${HOOK_NAME}] Processing start-work command`, {
sessionID: input.sessionID,
})
updateSessionAgent(input.sessionID, "atlas") // Always switch: fixes #1298
log(`[${HOOK_NAME}] Processing start-work command`, { sessionID: input.sessionID })
updateSessionAgent(input.sessionID, "atlas")
const existingState = readBoulderState(ctx.directory)
const sessionId = input.sessionID
const timestamp = new Date().toISOString()
const { planName: explicitPlanName, explicitWorktreePath } = parseUserRequest(promptText)
const { worktreePath, block: worktreeBlock } = resolveWorktreeContext(explicitWorktreePath)
let contextInfo = ""
const explicitPlanName = extractUserRequestPlanName(promptText)
if (explicitPlanName) {
log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, {
sessionID: input.sessionID,
})
log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, { sessionID: input.sessionID })
const allPlans = findPrometheusPlans(ctx.directory)
const matchedPlan = findPlanByName(allPlans, explicitPlanName)
if (matchedPlan) {
const progress = getPlanProgress(matchedPlan)
if (progress.isComplete) {
contextInfo = `
## Plan Already Complete
@@ -99,12 +103,10 @@ export function createStartWorkHook(ctx: PluginInput) {
The requested plan "${getPlanName(matchedPlan)}" has been completed.
All ${progress.total} tasks are done. Create a new plan with: /plan "your task"`
} else {
if (existingState) {
clearBoulderState(ctx.directory)
}
const newState = createBoulderState(matchedPlan, sessionId, "atlas")
if (existingState) clearBoulderState(ctx.directory)
const newState = createBoulderState(matchedPlan, sessionId, "atlas", worktreePath)
writeBoulderState(ctx.directory, newState)
contextInfo = `
## Auto-Selected Plan
@@ -113,17 +115,20 @@ All ${progress.total} tasks are done. Create a new plan with: /plan "your task"`
**Progress**: ${progress.completed}/${progress.total} tasks
**Session ID**: ${sessionId}
**Started**: ${timestamp}
${worktreeBlock}
boulder.json has been created. Read the plan and begin execution.`
}
} else {
const incompletePlans = allPlans.filter(p => !getPlanProgress(p).isComplete)
const incompletePlans = allPlans.filter((p) => !getPlanProgress(p).isComplete)
if (incompletePlans.length > 0) {
const planList = incompletePlans.map((p, i) => {
const prog = getPlanProgress(p)
return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}`
}).join("\n")
const planList = incompletePlans
.map((p, i) => {
const prog = getPlanProgress(p)
return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}`
})
.join("\n")
contextInfo = `
## Plan Not Found
@@ -143,9 +148,25 @@ No incomplete plans available. Create a new plan with: /plan "your task"`
}
} else if (existingState) {
const progress = getPlanProgress(existingState.active_plan)
if (!progress.isComplete) {
appendSessionId(ctx.directory, sessionId)
const effectiveWorktree = worktreePath ?? existingState.worktree_path
if (worktreePath !== undefined) {
const updatedSessions = existingState.session_ids.includes(sessionId)
? existingState.session_ids
: [...existingState.session_ids, sessionId]
writeBoulderState(ctx.directory, {
...existingState,
worktree_path: worktreePath,
session_ids: updatedSessions,
})
} else {
appendSessionId(ctx.directory, sessionId)
}
const worktreeDisplay = effectiveWorktree ? `\n**Worktree**: ${effectiveWorktree}` : worktreeBlock
contextInfo = `
## Active Work Session Found
@@ -155,6 +176,7 @@ No incomplete plans available. Create a new plan with: /plan "your task"`
**Progress**: ${progress.completed}/${progress.total} tasks completed
**Sessions**: ${existingState.session_ids.length + 1} (current session appended)
**Started**: ${existingState.started_at}
${worktreeDisplay}
The current session (${sessionId}) has been added to session_ids.
Read the plan file and continue from the first unchecked task.`
@@ -167,13 +189,15 @@ Looking for new plans...`
}
}
if ((!existingState && !explicitPlanName) || (existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete)) {
if (
(!existingState && !explicitPlanName) ||
(existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete)
) {
const plans = findPrometheusPlans(ctx.directory)
const incompletePlans = plans.filter(p => !getPlanProgress(p).isComplete)
const incompletePlans = plans.filter((p) => !getPlanProgress(p).isComplete)
if (plans.length === 0) {
contextInfo += `
## No Plans Found
No Prometheus plan files found at .sisyphus/plans/
@@ -187,7 +211,7 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta
} else if (incompletePlans.length === 1) {
const planPath = incompletePlans[0]
const progress = getPlanProgress(planPath)
const newState = createBoulderState(planPath, sessionId, "atlas")
const newState = createBoulderState(planPath, sessionId, "atlas", worktreePath)
writeBoulderState(ctx.directory, newState)
contextInfo += `
@@ -199,15 +223,17 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta
**Progress**: ${progress.completed}/${progress.total} tasks
**Session ID**: ${sessionId}
**Started**: ${timestamp}
${worktreeBlock}
boulder.json has been created. Read the plan and begin execution.`
} else {
const planList = incompletePlans.map((p, i) => {
const progress = getPlanProgress(p)
const stat = require("node:fs").statSync(p)
const modified = new Date(stat.mtimeMs).toISOString()
return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}`
}).join("\n")
const planList = incompletePlans
.map((p, i) => {
const progress = getPlanProgress(p)
const modified = new Date(statSync(p).mtimeMs).toISOString()
return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}`
})
.join("\n")
contextInfo += `
@@ -220,6 +246,7 @@ Session ID: ${sessionId}
${planList}
Ask the user which plan to work on. Present the options above and wait for their response.
${worktreeBlock}
</system-reminder>`
}
}
@@ -229,13 +256,14 @@ Ask the user which plan to work on. Present the options above and wait for their
output.parts[idx].text = output.parts[idx].text
.replace(/\$SESSION_ID/g, sessionId)
.replace(/\$TIMESTAMP/g, timestamp)
output.parts[idx].text += `\n\n---\n${contextInfo}`
}
log(`[${HOOK_NAME}] Context injected`, {
sessionID: input.sessionID,
hasExistingState: !!existingState,
worktreePath,
})
},
}

View File

@@ -0,0 +1,79 @@
/// <reference types="bun-types" />
import { describe, expect, test, spyOn, beforeEach, afterEach } from "bun:test"
import * as childProcess from "node:child_process"
import { detectWorktreePath } from "./worktree-detector"
describe("detectWorktreePath", () => {
let execFileSyncSpy: ReturnType<typeof spyOn>
beforeEach(() => {
execFileSyncSpy = spyOn(childProcess, "execFileSync").mockImplementation(
((_file: string, _args: string[]) => "") as typeof childProcess.execFileSync,
)
})
afterEach(() => {
execFileSyncSpy.mockRestore()
})
describe("when directory is a valid git worktree", () => {
test("#given valid git dir #when detecting #then returns worktree root path", () => {
execFileSyncSpy.mockImplementation(
((_file: string, _args: string[]) => "/home/user/my-repo\n") as typeof childProcess.execFileSync,
)
// when
const result = detectWorktreePath("/home/user/my-repo/src")
// then
expect(result).toBe("/home/user/my-repo")
})
test("#given git output with trailing newline #when detecting #then trims output", () => {
execFileSyncSpy.mockImplementation(
((_file: string, _args: string[]) => "/projects/worktree-a\n\n") as typeof childProcess.execFileSync,
)
const result = detectWorktreePath("/projects/worktree-a")
expect(result).toBe("/projects/worktree-a")
})
test("#given valid dir #when detecting #then calls git rev-parse with cwd", () => {
execFileSyncSpy.mockImplementation(
((_file: string, _args: string[]) => "/repo\n") as typeof childProcess.execFileSync,
)
detectWorktreePath("/repo/some/subdir")
expect(execFileSyncSpy).toHaveBeenCalledWith(
"git",
["rev-parse", "--show-toplevel"],
expect.objectContaining({ cwd: "/repo/some/subdir" }),
)
})
})
describe("when directory is not a git worktree", () => {
test("#given non-git directory #when detecting #then returns null", () => {
execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => {
throw new Error("not a git repository")
})
const result = detectWorktreePath("/tmp/not-a-repo")
expect(result).toBeNull()
})
test("#given non-existent directory #when detecting #then returns null", () => {
execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => {
throw new Error("ENOENT: no such file or directory")
})
const result = detectWorktreePath("/nonexistent/path")
expect(result).toBeNull()
})
})
})

View File

@@ -0,0 +1,14 @@
import { execFileSync } from "node:child_process"
export function detectWorktreePath(directory: string): string | null {
try {
return execFileSync("git", ["rev-parse", "--show-toplevel"], {
cwd: directory,
encoding: "utf-8",
timeout: 5000,
stdio: ["pipe", "pipe", "pipe"],
}).trim()
} catch {
return null
}
}

View File

@@ -1,4 +1,5 @@
import type { PluginInput } from "@opencode-ai/plugin"
import type { BackgroundManager } from "../../features/background-agent"
import {
clearContinuationMarker,
@@ -8,6 +9,11 @@ import { log } from "../../shared/logger"
const HOOK_NAME = "stop-continuation-guard"
type StopContinuationBackgroundManager = Pick<
BackgroundManager,
"getAllDescendantTasks" | "cancelTask"
>
export interface StopContinuationGuard {
event: (input: { event: { type: string; properties?: unknown } }) => Promise<void>
"chat.message": (input: { sessionID?: string }) => Promise<void>
@@ -17,7 +23,10 @@ export interface StopContinuationGuard {
}
export function createStopContinuationGuardHook(
ctx: PluginInput
ctx: PluginInput,
options?: {
backgroundManager?: StopContinuationBackgroundManager
}
): StopContinuationGuard {
const stoppedSessions = new Set<string>()
@@ -25,6 +34,38 @@ export function createStopContinuationGuardHook(
stoppedSessions.add(sessionID)
setContinuationMarkerSource(ctx.directory, sessionID, "stop", "stopped", "continuation stopped")
log(`[${HOOK_NAME}] Continuation stopped for session`, { sessionID })
const backgroundManager = options?.backgroundManager
if (!backgroundManager) {
return
}
const cancellableTasks = backgroundManager
.getAllDescendantTasks(sessionID)
.filter((task) => task.status === "running" || task.status === "pending")
if (cancellableTasks.length === 0) {
return
}
void Promise.allSettled(
cancellableTasks.map(async (task) => {
await backgroundManager.cancelTask(task.id, {
source: "stop-continuation",
reason: "Continuation stopped via /stop-continuation",
abortSession: task.status === "running",
skipNotification: true,
})
})
).then((results) => {
const cancelledCount = results.filter((result) => result.status === "fulfilled").length
const failedCount = results.length - cancelledCount
log(`[${HOOK_NAME}] Cancelled background tasks for stopped session`, {
sessionID,
cancelledCount,
failedCount,
})
})
}
const isStopped = (sessionID: string): boolean => {

View File

@@ -2,9 +2,15 @@ import { afterEach, describe, expect, test } from "bun:test"
import { mkdtempSync, rmSync } from "node:fs"
import { join } from "node:path"
import { tmpdir } from "node:os"
import type { BackgroundManager, BackgroundTask } from "../../features/background-agent"
import { readContinuationMarker } from "../../features/run-continuation-state"
import { createStopContinuationGuardHook } from "./index"
type CancelCall = {
taskId: string
options?: Parameters<BackgroundManager["cancelTask"]>[1]
}
describe("stop-continuation-guard", () => {
const tempDirs: string[] = []
@@ -34,6 +40,33 @@ describe("stop-continuation-guard", () => {
} as any
}
function createBackgroundTask(status: BackgroundTask["status"], id: string): BackgroundTask {
return {
id,
status,
description: `${id} description`,
parentSessionID: "parent-session",
parentMessageID: "parent-message",
prompt: "prompt",
agent: "sisyphus-junior",
}
}
function createMockBackgroundManager(tasks: BackgroundTask[], cancelCalls: CancelCall[]): Pick<BackgroundManager, "getAllDescendantTasks" | "cancelTask"> {
return {
getAllDescendantTasks: () => tasks,
cancelTask: async (taskId: string, options?: Parameters<BackgroundManager["cancelTask"]>[1]) => {
cancelCalls.push({ taskId, options })
return true
},
}
}
async function flushMicrotasks(): Promise<void> {
await Promise.resolve()
await Promise.resolve()
}
test("should mark session as stopped", () => {
// given - a guard hook with no stopped sessions
const input = createMockPluginInput()
@@ -166,4 +199,31 @@ describe("stop-continuation-guard", () => {
// then - should not throw and stopped session remains stopped
expect(guard.isStopped("some-session")).toBe(true)
})
test("should cancel only running and pending background tasks on stop", async () => {
// given - a background manager with mixed task statuses
const cancelCalls: CancelCall[] = []
const backgroundManager = createMockBackgroundManager(
[
createBackgroundTask("running", "task-running"),
createBackgroundTask("pending", "task-pending"),
createBackgroundTask("completed", "task-completed"),
],
cancelCalls,
)
const guard = createStopContinuationGuardHook(createMockPluginInput(), {
backgroundManager,
})
// when - stop continuation is triggered
guard.stop("test-session-bg")
await flushMicrotasks()
// then - only running and pending tasks are cancelled
expect(cancelCalls).toHaveLength(2)
expect(cancelCalls[0]?.taskId).toBe("task-running")
expect(cancelCalls[0]?.options?.abortSession).toBe(true)
expect(cancelCalls[1]?.taskId).toBe("task-pending")
expect(cancelCalls[1]?.options?.abortSession).toBe(false)
})
})

View File

@@ -1,6 +1,6 @@
import { detectThinkKeyword, extractPromptText } from "./detector"
import { getHighVariant, getThinkingConfig, isAlreadyHighVariant } from "./switcher"
import type { ThinkModeInput, ThinkModeState } from "./types"
import { getHighVariant, isAlreadyHighVariant } from "./switcher"
import type { ThinkModeState } from "./types"
import { log } from "../../shared"
const thinkModeState = new Map<string, ThinkModeState>()
@@ -10,53 +10,24 @@ export function clearThinkModeState(sessionID: string): void {
}
export function createThinkModeHook() {
function isDisabledThinkingConfig(config: Record<string, unknown>): boolean {
const thinkingConfig = config.thinking
if (
typeof thinkingConfig === "object" &&
thinkingConfig !== null &&
"type" in thinkingConfig &&
(thinkingConfig as { type?: string }).type === "disabled"
) {
return true
}
const providerOptions = config.providerOptions
if (typeof providerOptions !== "object" || providerOptions === null) {
return false
}
return Object.values(providerOptions as Record<string, unknown>).some(
(providerConfig) => {
if (typeof providerConfig !== "object" || providerConfig === null) {
return false
}
const providerConfigMap = providerConfig as Record<string, unknown>
const extraBody = providerConfigMap.extra_body
if (typeof extraBody !== "object" || extraBody === null) {
return false
}
const extraBodyMap = extraBody as Record<string, unknown>
const extraThinking = extraBodyMap.thinking
return (
typeof extraThinking === "object" &&
extraThinking !== null &&
(extraThinking as { type?: string }).type === "disabled"
)
}
)
}
return {
"chat.params": async (output: ThinkModeInput, sessionID: string): Promise<void> => {
"chat.message": async (
input: {
sessionID: string
model?: { providerID: string; modelID: string }
},
output: {
message: Record<string, unknown>
parts: Array<{ type: string; text?: string; [key: string]: unknown }>
}
): Promise<void> => {
const promptText = extractPromptText(output.parts)
const sessionID = input.sessionID
const state: ThinkModeState = {
requested: false,
modelSwitched: false,
thinkingConfigInjected: false,
variantSet: false,
}
if (!detectThinkKeyword(promptText)) {
@@ -66,7 +37,12 @@ export function createThinkModeHook() {
state.requested = true
const currentModel = output.message.model
if (typeof output.message.variant === "string") {
thinkModeState.set(sessionID, state)
return
}
const currentModel = input.model
if (!currentModel) {
thinkModeState.set(sessionID, state)
return
@@ -81,14 +57,15 @@ export function createThinkModeHook() {
}
const highVariant = getHighVariant(currentModel.modelID)
const thinkingConfig = getThinkingConfig(currentModel.providerID, currentModel.modelID)
if (highVariant) {
output.message.model = {
providerID: currentModel.providerID,
modelID: highVariant,
}
output.message.variant = "high"
state.modelSwitched = true
state.variantSet = true
log("Think mode: model switched to high variant", {
sessionID,
from: currentModel.modelID,
@@ -96,42 +73,6 @@ export function createThinkModeHook() {
})
}
if (thinkingConfig) {
const messageData = output.message as Record<string, unknown>
const agentThinking = messageData.thinking as { type?: string } | undefined
const agentProviderOptions = messageData.providerOptions
const agentDisabledThinking = agentThinking?.type === "disabled"
const agentHasCustomProviderOptions = Boolean(agentProviderOptions)
if (agentDisabledThinking) {
log("Think mode: skipping - agent has thinking disabled", {
sessionID,
provider: currentModel.providerID,
})
} else if (agentHasCustomProviderOptions) {
log("Think mode: skipping - agent has custom providerOptions", {
sessionID,
provider: currentModel.providerID,
})
} else if (
!isDisabledThinkingConfig(thinkingConfig as Record<string, unknown>)
) {
Object.assign(output.message, thinkingConfig)
state.thinkingConfigInjected = true
log("Think mode: thinking config injected", {
sessionID,
provider: currentModel.providerID,
config: thinkingConfig,
})
} else {
log("Think mode: skipping disabled thinking config", {
sessionID,
provider: currentModel.providerID,
})
}
}
thinkModeState.set(sessionID, state)
},

View File

@@ -1,452 +1,155 @@
import { describe, expect, it, beforeEach } from "bun:test"
import type { ThinkModeInput } from "./types"
import { beforeEach, describe, expect, it } from "bun:test"
const { createThinkModeHook, clearThinkModeState } = await import("./index")
const { clearThinkModeState, createThinkModeHook } = await import("./index")
type ThinkModeHookInput = {
sessionID: string
model?: { providerID: string; modelID: string }
}
type ThinkModeHookOutput = {
message: Record<string, unknown>
parts: Array<{ type: string; text?: string; [key: string]: unknown }>
}
function createHookInput(args: {
sessionID?: string
providerID?: string
modelID?: string
}): ThinkModeHookInput {
const { sessionID = "test-session-id", providerID, modelID } = args
if (!providerID || !modelID) {
return { sessionID }
}
/**
* Helper to create a mock ThinkModeInput for testing
*/
function createMockInput(
providerID: string,
modelID: string,
promptText: string
): ThinkModeInput {
return {
parts: [{ type: "text", text: promptText }],
message: {
model: {
providerID,
modelID,
},
},
sessionID,
model: { providerID, modelID },
}
}
/**
* Type helper for accessing dynamically injected properties on message
*/
type MessageWithInjectedProps = Record<string, unknown>
function createHookOutput(promptText: string, variant?: string): ThinkModeHookOutput {
return {
message: variant ? { variant } : {},
parts: [{ type: "text", text: promptText }],
}
}
describe("createThinkModeHook integration", () => {
describe("createThinkModeHook", () => {
const sessionID = "test-session-id"
beforeEach(() => {
clearThinkModeState(sessionID)
})
describe("GitHub Copilot provider integration", () => {
describe("Claude models", () => {
it("should activate thinking mode for github-copilot Claude with think keyword", async () => {
// given a github-copilot Claude model and prompt with "think" keyword
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-opus-4-6",
"Please think deeply about this problem"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant and inject thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
expect(message.thinking).toBeDefined()
expect((message.thinking as Record<string, unknown>)?.type).toBe(
"enabled"
)
expect(
(message.thinking as Record<string, unknown>)?.budgetTokens
).toBe(64000)
})
it("should handle github-copilot Claude with dots in version", async () => {
// given a github-copilot Claude model with dot format (claude-opus-4.6)
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-opus-4.6",
"ultrathink mode"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant (hyphen format)
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
expect(message.thinking).toBeDefined()
})
it("should handle github-copilot Claude Sonnet", async () => {
// given a github-copilot Claude Sonnet model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-sonnet-4-6",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
expect(message.thinking).toBeDefined()
})
it("sets high variant and switches model when think keyword is present", async () => {
// given
const hook = createThinkModeHook()
const input = createHookInput({
sessionID,
providerID: "github-copilot",
modelID: "claude-opus-4-6",
})
const output = createHookOutput("Please think deeply about this")
describe("Gemini models", () => {
it("should activate thinking mode for github-copilot Gemini Pro", async () => {
// given a github-copilot Gemini Pro model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gemini-3-pro",
"think about this"
)
// when
await hook["chat.message"](input, output)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant and inject google thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
expect(message.providerOptions).toBeDefined()
const googleOptions = (
message.providerOptions as Record<string, unknown>
)?.google as Record<string, unknown>
expect(googleOptions?.thinkingConfig).toBeDefined()
})
it("should activate thinking mode for github-copilot Gemini Flash", async () => {
// given a github-copilot Gemini Flash model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gemini-3-flash",
"ultrathink"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gemini-3-flash-high")
expect(message.providerOptions).toBeDefined()
})
})
describe("GPT models", () => {
it("should activate thinking mode for github-copilot GPT-5.2", async () => {
// given a github-copilot GPT-5.2 model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gpt-5.2",
"please think"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant and inject openai thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5-2-high")
expect(message.reasoning_effort).toBe("high")
})
it("should activate thinking mode for github-copilot GPT-5", async () => {
// given a github-copilot GPT-5 model
const hook = createThinkModeHook()
const input = createMockInput("github-copilot", "gpt-5", "think deeply")
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5-high")
expect(message.reasoning_effort).toBe("high")
})
})
describe("No think keyword", () => {
it("should NOT activate for github-copilot without think keyword", async () => {
// given a prompt without any think keyword
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-opus-4-6",
"Just do this task"
)
const originalModelID = input.message.model?.modelID
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT change model or inject config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe(originalModelID)
expect(message.thinking).toBeUndefined()
})
// then
expect(output.message.variant).toBe("high")
expect(output.message.model).toEqual({
providerID: "github-copilot",
modelID: "claude-opus-4-6-high",
})
})
describe("Backwards compatibility with direct providers", () => {
it("should still work for direct anthropic provider", async () => {
// given direct anthropic provider
const hook = createThinkModeHook()
const input = createMockInput(
"anthropic",
"claude-sonnet-4-6",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should work as before
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
expect(message.thinking).toBeDefined()
it("supports dotted model IDs by switching to normalized high variant", async () => {
// given
const hook = createThinkModeHook()
const input = createHookInput({
sessionID,
providerID: "github-copilot",
modelID: "gpt-5.2",
})
const output = createHookOutput("ultrathink about this")
it("should work for direct google-vertex-anthropic provider", async () => {
//#given direct google-vertex-anthropic provider
const hook = createThinkModeHook()
const input = createMockInput(
"google-vertex-anthropic",
"claude-opus-4-6",
"think deeply"
)
// when
await hook["chat.message"](input, output)
//#when the chat.params hook is called
await hook["chat.params"](input, sessionID)
//#then should upgrade model and inject Claude thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
expect(message.thinking).toBeDefined()
expect((message.thinking as Record<string, unknown>)?.budgetTokens).toBe(
64000
)
})
it("should still work for direct google provider", async () => {
// given direct google provider
const hook = createThinkModeHook()
const input = createMockInput(
"google",
"gemini-3-pro",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should work as before
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
expect(message.providerOptions).toBeDefined()
})
it("should still work for direct openai provider", async () => {
// given direct openai provider
const hook = createThinkModeHook()
const input = createMockInput("openai", "gpt-5", "think about this")
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should work
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5-high")
expect(message.reasoning_effort).toBe("high")
})
it("should still work for amazon-bedrock provider", async () => {
// given amazon-bedrock provider
const hook = createThinkModeHook()
const input = createMockInput(
"amazon-bedrock",
"claude-sonnet-4-6",
"think"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should inject bedrock thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
expect(message.reasoningConfig).toBeDefined()
// then
expect(output.message.variant).toBe("high")
expect(output.message.model).toEqual({
providerID: "github-copilot",
modelID: "gpt-5-2-high",
})
})
describe("Already-high variants", () => {
it("should NOT re-upgrade already-high variants", async () => {
// given an already-high variant model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-opus-4-6-high",
"think deeply"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT modify the model (already high)
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
// No additional thinking config should be injected
expect(message.thinking).toBeUndefined()
it("skips when message variant is already set", async () => {
// given
const hook = createThinkModeHook()
const input = createHookInput({
sessionID,
providerID: "github-copilot",
modelID: "claude-sonnet-4-6",
})
const output = createHookOutput("think through this", "max")
it("should NOT re-upgrade already-high GPT variants", async () => {
// given an already-high GPT variant
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gpt-5.2-high",
"ultrathink"
)
// when
await hook["chat.message"](input, output)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT modify the model
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5.2-high")
expect(message.reasoning_effort).toBeUndefined()
})
// then
expect(output.message.variant).toBe("max")
expect(output.message.model).toBeUndefined()
})
describe("Unknown models", () => {
it("should not crash for unknown models via github-copilot", async () => {
// given an unknown model type
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"llama-3-70b",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should not crash and model should remain unchanged
expect(input.message.model?.modelID).toBe("llama-3-70b")
it("does nothing when think keyword is absent", async () => {
// given
const hook = createThinkModeHook()
const input = createHookInput({
sessionID,
providerID: "google",
modelID: "gemini-3.1-pro",
})
const output = createHookOutput("Please solve this directly")
// when
await hook["chat.message"](input, output)
// then
expect(output.message.variant).toBeUndefined()
expect(output.message.model).toBeUndefined()
})
describe("Edge cases", () => {
it("should handle missing model gracefully", async () => {
// given input without a model
const hook = createThinkModeHook()
const input: ThinkModeInput = {
parts: [{ type: "text", text: "think about this" }],
message: {},
}
// when the chat.params hook is called
// then should not crash
await expect(
hook["chat.params"](input, sessionID)
).resolves.toBeUndefined()
it("does not modify already-high models", async () => {
// given
const hook = createThinkModeHook()
const input = createHookInput({
sessionID,
providerID: "openai",
modelID: "gpt-5-high",
})
const output = createHookOutput("think deeply")
it("should handle empty prompt gracefully", async () => {
// given empty prompt
const hook = createThinkModeHook()
const input = createMockInput("github-copilot", "claude-opus-4-6", "")
// when
await hook["chat.message"](input, output)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should not upgrade (no think keyword)
expect(input.message.model?.modelID).toBe("claude-opus-4-6")
})
// then
expect(output.message.variant).toBeUndefined()
expect(output.message.model).toBeUndefined()
})
describe("Agent-level thinking configuration respect", () => {
it("should omit Z.ai GLM disabled thinking config", async () => {
//#given a Z.ai GLM model with think prompt
const hook = createThinkModeHook()
const input = createMockInput(
"zai-coding-plan",
"glm-5",
"ultrathink mode"
)
it("handles missing input model without crashing", async () => {
// given
const hook = createThinkModeHook()
const input = createHookInput({ sessionID })
const output = createHookOutput("think about this")
//#when think mode resolves Z.ai thinking configuration
await hook["chat.params"](input, sessionID)
// when
await expect(hook["chat.message"](input, output)).resolves.toBeUndefined()
//#then thinking config should be omitted from request
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("glm-5")
expect(message.thinking).toBeUndefined()
expect(message.providerOptions).toBeUndefined()
})
it("should NOT inject thinking config when agent has thinking disabled", async () => {
// given agent with thinking explicitly disabled
const hook = createThinkModeHook()
const input: ThinkModeInput = {
parts: [{ type: "text", text: "ultrathink deeply" }],
message: {
model: { providerID: "google", modelID: "gemini-3-pro" },
thinking: { type: "disabled" },
} as ThinkModeInput["message"],
}
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT override agent's thinking disabled setting
const message = input.message as MessageWithInjectedProps
expect((message.thinking as { type: string }).type).toBe("disabled")
expect(message.providerOptions).toBeUndefined()
})
it("should NOT inject thinking config when agent has custom providerOptions", async () => {
// given agent with custom providerOptions
const hook = createThinkModeHook()
const input: ThinkModeInput = {
parts: [{ type: "text", text: "ultrathink" }],
message: {
model: { providerID: "google", modelID: "gemini-3-flash" },
providerOptions: {
google: { thinkingConfig: { thinkingBudget: 0 } },
},
} as ThinkModeInput["message"],
}
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT override agent's providerOptions
const message = input.message as MessageWithInjectedProps
const providerOpts = message.providerOptions as Record<string, unknown>
expect((providerOpts.google as Record<string, unknown>).thinkingConfig).toEqual({
thinkingBudget: 0,
})
})
it("should still inject thinking config when agent has no thinking override", async () => {
// given agent without thinking override
const hook = createThinkModeHook()
const input = createMockInput("google", "gemini-3-pro", "ultrathink")
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should inject thinking config as normal
const message = input.message as MessageWithInjectedProps
expect(message.providerOptions).toBeDefined()
})
// then
expect(output.message.variant).toBeUndefined()
expect(output.message.model).toBeUndefined()
})
})

View File

@@ -1,128 +1,10 @@
import { describe, expect, it } from "bun:test"
import {
getHighVariant,
getThinkingConfig,
isAlreadyHighVariant,
THINKING_CONFIGS,
} from "./switcher"
describe("think-mode switcher", () => {
describe("GitHub Copilot provider support", () => {
describe("Claude models via github-copilot", () => {
it("should resolve github-copilot Claude Opus to anthropic config", () => {
// given a github-copilot provider with Claude Opus model
const providerID = "github-copilot"
const modelID = "claude-opus-4-6"
// when getting thinking config
const config = getThinkingConfig(providerID, modelID)
// then should return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
expect((config?.thinking as Record<string, unknown>)?.type).toBe(
"enabled"
)
expect((config?.thinking as Record<string, unknown>)?.budgetTokens).toBe(
64000
)
})
it("should resolve github-copilot Claude Sonnet to anthropic config", () => {
// given a github-copilot provider with Claude Sonnet model
const config = getThinkingConfig("github-copilot", "claude-sonnet-4-6")
// then should return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
})
it("should handle Claude with dots in version number", () => {
// given a model ID with dots (claude-opus-4.6)
const config = getThinkingConfig("github-copilot", "claude-opus-4.6")
// then should still return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
})
})
describe("Gemini models via github-copilot", () => {
it("should resolve github-copilot Gemini Pro to google config", () => {
// given a github-copilot provider with Gemini Pro model
const config = getThinkingConfig("github-copilot", "gemini-3-pro")
// then should return google thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
const googleOptions = (
config?.providerOptions as Record<string, unknown>
)?.google as Record<string, unknown>
expect(googleOptions?.thinkingConfig).toBeDefined()
})
it("should resolve github-copilot Gemini Flash to google config", () => {
// given a github-copilot provider with Gemini Flash model
const config = getThinkingConfig(
"github-copilot",
"gemini-3-flash"
)
// then should return google thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
})
})
describe("GPT models via github-copilot", () => {
it("should resolve github-copilot GPT-5.2 to openai config", () => {
// given a github-copilot provider with GPT-5.2 model
const config = getThinkingConfig("github-copilot", "gpt-5.2")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
it("should resolve github-copilot GPT-5 to openai config", () => {
// given a github-copilot provider with GPT-5 model
const config = getThinkingConfig("github-copilot", "gpt-5")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
it("should resolve github-copilot o1 to openai config", () => {
// given a github-copilot provider with o1 model
const config = getThinkingConfig("github-copilot", "o1-preview")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
it("should resolve github-copilot o3 to openai config", () => {
// given a github-copilot provider with o3 model
const config = getThinkingConfig("github-copilot", "o3-mini")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
})
describe("Unknown models via github-copilot", () => {
it("should return null for unknown model types", () => {
// given a github-copilot provider with unknown model
const config = getThinkingConfig("github-copilot", "llama-3-70b")
// then should return null (no matching provider)
expect(config).toBeNull()
})
})
})
describe("Model ID normalization", () => {
describe("getHighVariant with dots vs hyphens", () => {
it("should handle dots in Claude version numbers", () => {
@@ -167,8 +49,8 @@ describe("think-mode switcher", () => {
it("should handle Gemini preview variants", () => {
// given Gemini preview model IDs
expect(getHighVariant("gemini-3-pro")).toBe(
"gemini-3-pro-high"
expect(getHighVariant("gemini-3.1-pro")).toBe(
"gemini-3-1-pro-high"
)
expect(getHighVariant("gemini-3-flash")).toBe(
"gemini-3-flash-high"
@@ -179,7 +61,7 @@ describe("think-mode switcher", () => {
// given model IDs that are already high variants
expect(getHighVariant("claude-opus-4-6-high")).toBeNull()
expect(getHighVariant("gpt-5-2-high")).toBeNull()
expect(getHighVariant("gemini-3-pro-high")).toBeNull()
expect(getHighVariant("gemini-3-1-pro-high")).toBeNull()
})
it("should return null for unknown models", () => {
@@ -195,7 +77,7 @@ describe("think-mode switcher", () => {
// given model IDs with -high suffix
expect(isAlreadyHighVariant("claude-opus-4-6-high")).toBe(true)
expect(isAlreadyHighVariant("gpt-5-2-high")).toBe(true)
expect(isAlreadyHighVariant("gemini-3-pro-high")).toBe(true)
expect(isAlreadyHighVariant("gemini-3.1-pro-high")).toBe(true)
})
it("should detect -high suffix after normalization", () => {
@@ -208,7 +90,7 @@ describe("think-mode switcher", () => {
expect(isAlreadyHighVariant("claude-opus-4-6")).toBe(false)
expect(isAlreadyHighVariant("claude-opus-4.6")).toBe(false)
expect(isAlreadyHighVariant("gpt-5.2")).toBe(false)
expect(isAlreadyHighVariant("gemini-3-pro")).toBe(false)
expect(isAlreadyHighVariant("gemini-3.1-pro")).toBe(false)
})
it("should return false for models with 'high' in name but not suffix", () => {
@@ -217,149 +99,6 @@ describe("think-mode switcher", () => {
})
})
describe("getThinkingConfig", () => {
describe("Already high variants", () => {
it("should return null for already-high variants", () => {
// given already-high model variants
expect(
getThinkingConfig("anthropic", "claude-opus-4-6-high")
).toBeNull()
expect(getThinkingConfig("openai", "gpt-5-2-high")).toBeNull()
expect(getThinkingConfig("google", "gemini-3-pro-high")).toBeNull()
})
it("should return null for already-high variants via github-copilot", () => {
// given already-high model variants via github-copilot
expect(
getThinkingConfig("github-copilot", "claude-opus-4-6-high")
).toBeNull()
expect(getThinkingConfig("github-copilot", "gpt-5.2-high")).toBeNull()
})
})
describe("Non-thinking-capable models", () => {
it("should return null for non-thinking-capable models", () => {
// given models that don't support thinking mode
expect(getThinkingConfig("anthropic", "claude-2")).toBeNull()
expect(getThinkingConfig("openai", "gpt-4")).toBeNull()
expect(getThinkingConfig("google", "gemini-1")).toBeNull()
})
})
describe("Unknown providers", () => {
it("should return null for unknown providers", () => {
// given unknown provider IDs
expect(getThinkingConfig("unknown-provider", "some-model")).toBeNull()
expect(getThinkingConfig("azure", "gpt-5")).toBeNull()
})
})
})
describe("Direct provider configs (backwards compatibility)", () => {
it("should still work for direct anthropic provider", () => {
// given direct anthropic provider
const config = getThinkingConfig("anthropic", "claude-opus-4-6")
// then should return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
})
it("should work for direct google-vertex-anthropic provider", () => {
//#given direct google-vertex-anthropic provider
const config = getThinkingConfig(
"google-vertex-anthropic",
"claude-opus-4-6"
)
//#when thinking config is resolved
//#then it should return anthropic-style thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
expect((config?.thinking as Record<string, unknown>)?.budgetTokens).toBe(
64000
)
})
it("should still work for direct google provider", () => {
// given direct google provider
const config = getThinkingConfig("google", "gemini-3-pro")
// then should return google thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
})
it("should still work for amazon-bedrock provider", () => {
// given amazon-bedrock provider with claude model
const config = getThinkingConfig("amazon-bedrock", "claude-sonnet-4-6")
// then should return bedrock thinking config
expect(config).not.toBeNull()
expect(config?.reasoningConfig).toBeDefined()
})
it("should still work for google-vertex provider", () => {
// given google-vertex provider
const config = getThinkingConfig("google-vertex", "gemini-3-pro")
// then should return google-vertex thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
const vertexOptions = (config?.providerOptions as Record<string, unknown>)?.[
"google-vertex"
] as Record<string, unknown>
expect(vertexOptions?.thinkingConfig).toBeDefined()
})
it("should work for direct openai provider", () => {
// given direct openai provider
const config = getThinkingConfig("openai", "gpt-5")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
})
describe("THINKING_CONFIGS structure", () => {
it("should have correct structure for anthropic", () => {
const config = THINKING_CONFIGS.anthropic
expect(config.thinking).toBeDefined()
expect(config.maxTokens).toBe(128000)
})
it("should have correct structure for google-vertex-anthropic", () => {
//#given google-vertex-anthropic config entry
const config = THINKING_CONFIGS["google-vertex-anthropic"]
//#when structure is validated
//#then it should match anthropic style structure
expect(config.thinking).toBeDefined()
expect(config.maxTokens).toBe(128000)
})
it("should have correct structure for google", () => {
const config = THINKING_CONFIGS.google
expect(config.providerOptions).toBeDefined()
})
it("should have correct structure for openai", () => {
const config = THINKING_CONFIGS.openai
expect(config.reasoning_effort).toBe("high")
})
it("should have correct structure for amazon-bedrock", () => {
const config = THINKING_CONFIGS["amazon-bedrock"]
expect(config.reasoningConfig).toBeDefined()
expect(config.maxTokens).toBe(64000)
})
})
describe("Custom provider prefixes support", () => {
describe("getHighVariant with prefixes", () => {
it("should preserve vertex_ai/ prefix when getting high variant", () => {
@@ -390,7 +129,7 @@ describe("think-mode switcher", () => {
// given various custom prefixes
expect(getHighVariant("azure/gpt-5")).toBe("azure/gpt-5-high")
expect(getHighVariant("bedrock/claude-sonnet-4-6")).toBe("bedrock/claude-sonnet-4-6-high")
expect(getHighVariant("custom-llm/gemini-3-pro")).toBe("custom-llm/gemini-3-pro-high")
expect(getHighVariant("custom-llm/gemini-3.1-pro")).toBe("custom-llm/gemini-3-1-pro-high")
})
it("should return null for prefixed models without high variant mapping", () => {
@@ -411,7 +150,7 @@ describe("think-mode switcher", () => {
// given prefixed model IDs with -high suffix
expect(isAlreadyHighVariant("vertex_ai/claude-opus-4-6-high")).toBe(true)
expect(isAlreadyHighVariant("openai/gpt-5-2-high")).toBe(true)
expect(isAlreadyHighVariant("custom/gemini-3-pro-high")).toBe(true)
expect(isAlreadyHighVariant("custom/gemini-3.1-pro-high")).toBe(true)
})
it("should return false for prefixed base models", () => {
@@ -426,141 +165,5 @@ describe("think-mode switcher", () => {
expect(isAlreadyHighVariant("vertex_ai/gpt-5.2-high")).toBe(true)
})
})
describe("getThinkingConfig with prefixes", () => {
it("should return null for custom providers (not in THINKING_CONFIGS)", () => {
// given custom provider with prefixed Claude model
const config = getThinkingConfig("dia-llm", "vertex_ai/claude-sonnet-4-6")
// then should return null (custom provider not in THINKING_CONFIGS)
expect(config).toBeNull()
})
it("should work with prefixed models on known providers", () => {
// given known provider (anthropic) with prefixed model
// This tests that the base model name is correctly extracted for capability check
const config = getThinkingConfig("anthropic", "custom-prefix/claude-opus-4-6")
// then should return thinking config (base model is capable)
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
})
it("should return null for prefixed models that are already high", () => {
// given prefixed already-high model
const config = getThinkingConfig("anthropic", "vertex_ai/claude-opus-4-6-high")
// then should return null
expect(config).toBeNull()
})
})
describe("Real-world custom provider scenario", () => {
it("should handle LLM proxy with vertex_ai prefix correctly", () => {
// given a custom LLM proxy provider using vertex_ai/ prefix
const providerID = "dia-llm"
const modelID = "vertex_ai/claude-sonnet-4-6"
// when getting high variant
const highVariant = getHighVariant(modelID)
// then should preserve the prefix
expect(highVariant).toBe("vertex_ai/claude-sonnet-4-6-high")
// #and when checking if already high
expect(isAlreadyHighVariant(modelID)).toBe(false)
expect(isAlreadyHighVariant(highVariant!)).toBe(true)
// #and when getting thinking config for custom provider
const config = getThinkingConfig(providerID, modelID)
// then should return null (custom provider, not anthropic)
// This prevents applying incompatible thinking configs to custom providers
expect(config).toBeNull()
})
it("should not break when switching to high variant in think mode", () => {
// given think mode switching vertex_ai/claude model to high variant
const original = "vertex_ai/claude-opus-4-6"
const high = getHighVariant(original)
// then the high variant should be valid
expect(high).toBe("vertex_ai/claude-opus-4-6-high")
// #and should be recognized as already high
expect(isAlreadyHighVariant(high!)).toBe(true)
// #and switching again should return null (already high)
expect(getHighVariant(high!)).toBeNull()
})
})
})
describe("Z.AI GLM-4.7 provider support", () => {
describe("getThinkingConfig for zai-coding-plan", () => {
it("should return thinking config for glm-5", () => {
//#given a Z.ai GLM model
const config = getThinkingConfig("zai-coding-plan", "glm-5")
//#when thinking config is resolved
//#then thinking type is "disabled"
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
const zaiOptions = (config?.providerOptions as Record<string, unknown>)?.[
"zai-coding-plan"
] as Record<string, unknown>
expect(zaiOptions?.extra_body).toBeDefined()
const extraBody = zaiOptions?.extra_body as Record<string, unknown>
expect(extraBody?.thinking).toBeDefined()
expect((extraBody?.thinking as Record<string, unknown>)?.type).toBe("disabled")
})
it("should return thinking config for glm-4.6v (multimodal)", () => {
// given zai-coding-plan provider with glm-4.6v model
const config = getThinkingConfig("zai-coding-plan", "glm-4.6v")
// then should return zai-coding-plan thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
})
it("should return null for non-GLM models on zai-coding-plan", () => {
// given zai-coding-plan provider with unknown model
const config = getThinkingConfig("zai-coding-plan", "some-other-model")
// then should return null
expect(config).toBeNull()
})
})
describe("HIGH_VARIANT_MAP for GLM", () => {
it("should NOT have high variant for glm-5", () => {
// given glm-5 model
const variant = getHighVariant("glm-5")
// then should return null (no high variant needed)
expect(variant).toBeNull()
})
it("should NOT have high variant for glm-4.6v", () => {
// given glm-4.6v model
const variant = getHighVariant("glm-4.6v")
// then should return null
expect(variant).toBeNull()
})
})
})
describe("THINKING_CONFIGS structure for zai-coding-plan", () => {
it("should have correct structure for zai-coding-plan", () => {
const config = THINKING_CONFIGS["zai-coding-plan"]
expect(config.providerOptions).toBeDefined()
const zaiOptions = (config.providerOptions as Record<string, unknown>)?.[
"zai-coding-plan"
] as Record<string, unknown>
expect(zaiOptions?.extra_body).toBeDefined()
})
})
})
})

View File

@@ -53,35 +53,7 @@ function normalizeModelID(modelID: string): string {
return modelID.replace(/\.(\d+)/g, "-$1")
}
/**
* Resolves proxy providers (like github-copilot) to their underlying provider.
* This allows GitHub Copilot to inherit thinking configurations from the actual
* model provider (Anthropic, Google, OpenAI).
*
* @example
* resolveProvider("github-copilot", "claude-opus-4-6") // "anthropic"
* resolveProvider("github-copilot", "gemini-3-pro") // "google"
* resolveProvider("github-copilot", "gpt-5.2") // "openai"
* resolveProvider("anthropic", "claude-opus-4-6") // "anthropic" (unchanged)
*/
function resolveProvider(providerID: string, modelID: string): string {
// GitHub Copilot is a proxy - infer actual provider from model name
if (providerID === "github-copilot") {
const modelLower = modelID.toLowerCase()
if (modelLower.includes("claude")) return "anthropic"
if (modelLower.includes("gemini")) return "google"
if (
modelLower.includes("gpt") ||
modelLower.includes("o1") ||
modelLower.includes("o3")
) {
return "openai"
}
}
// Direct providers or unknown - return as-is
return providerID
}
// Maps model IDs to their "high reasoning" variant (internal convention)
// For OpenAI models, this signals that reasoning_effort should be set to "high"
@@ -90,8 +62,8 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
"claude-sonnet-4-6": "claude-sonnet-4-6-high",
"claude-opus-4-6": "claude-opus-4-6-high",
// Gemini
"gemini-3-pro": "gemini-3-pro-high",
"gemini-3-pro-low": "gemini-3-pro-high",
"gemini-3-1-pro": "gemini-3-1-pro-high",
"gemini-3-1-pro-low": "gemini-3-1-pro-high",
"gemini-3-flash": "gemini-3-flash-high",
// GPT-5
"gpt-5": "gpt-5-high",
@@ -110,77 +82,12 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
"gpt-5-2-chat-latest": "gpt-5-2-chat-latest-high",
"gpt-5-2-pro": "gpt-5-2-pro-high",
// Antigravity (Google)
"antigravity-gemini-3-pro": "antigravity-gemini-3-pro-high",
"antigravity-gemini-3-1-pro": "antigravity-gemini-3-1-pro-high",
"antigravity-gemini-3-flash": "antigravity-gemini-3-flash-high",
}
const ALREADY_HIGH: Set<string> = new Set(Object.values(HIGH_VARIANT_MAP))
export const THINKING_CONFIGS = {
anthropic: {
thinking: {
type: "enabled",
budgetTokens: 64000,
},
maxTokens: 128000,
},
"google-vertex-anthropic": {
thinking: {
type: "enabled",
budgetTokens: 64000,
},
maxTokens: 128000,
},
"amazon-bedrock": {
reasoningConfig: {
type: "enabled",
budgetTokens: 32000,
},
maxTokens: 64000,
},
google: {
providerOptions: {
google: {
thinkingConfig: {
thinkingLevel: "HIGH",
},
},
},
},
"google-vertex": {
providerOptions: {
"google-vertex": {
thinkingConfig: {
thinkingLevel: "HIGH",
},
},
},
},
openai: {
reasoning_effort: "high",
},
"zai-coding-plan": {
providerOptions: {
"zai-coding-plan": {
extra_body: {
thinking: {
type: "disabled",
},
},
},
},
},
} as const satisfies Record<string, Record<string, unknown>>
const THINKING_CAPABLE_MODELS = {
anthropic: ["claude-sonnet-4", "claude-opus-4", "claude-3"],
"google-vertex-anthropic": ["claude-sonnet-4", "claude-opus-4", "claude-3"],
"amazon-bedrock": ["claude", "anthropic"],
google: ["gemini-2", "gemini-3"],
"google-vertex": ["gemini-2", "gemini-3"],
openai: ["gpt-5", "o1", "o3"],
"zai-coding-plan": ["glm"],
} as const satisfies Record<string, readonly string[]>
export function getHighVariant(modelID: string): string | null {
const normalized = normalizeModelID(modelID)
@@ -207,37 +114,3 @@ export function isAlreadyHighVariant(modelID: string): boolean {
return ALREADY_HIGH.has(base) || base.endsWith("-high")
}
type ThinkingProvider = keyof typeof THINKING_CONFIGS
function isThinkingProvider(provider: string): provider is ThinkingProvider {
return provider in THINKING_CONFIGS
}
export function getThinkingConfig(
providerID: string,
modelID: string
): Record<string, unknown> | null {
const normalized = normalizeModelID(modelID)
const { base } = extractModelPrefix(normalized)
if (isAlreadyHighVariant(normalized)) {
return null
}
const resolvedProvider = resolveProvider(providerID, modelID)
if (!isThinkingProvider(resolvedProvider)) {
return null
}
const config = THINKING_CONFIGS[resolvedProvider]
const capablePatterns = THINKING_CAPABLE_MODELS[resolvedProvider]
// Check capability using base model name (without prefix)
const baseLower = base.toLowerCase()
const isCapable = capablePatterns.some((pattern) =>
baseLower.includes(pattern.toLowerCase())
)
return isCapable ? config : null
}

View File

@@ -1,21 +1,16 @@
export interface ThinkModeState {
requested: boolean
modelSwitched: boolean
thinkingConfigInjected: boolean
variantSet: boolean
providerID?: string
modelID?: string
}
export interface ModelRef {
interface ModelRef {
providerID: string
modelID: string
}
export interface MessageWithModel {
interface MessageWithModel {
model?: ModelRef
}
export interface ThinkModeInput {
parts: Array<{ type: string; text?: string }>
message: MessageWithModel
}

View File

@@ -17,6 +17,6 @@ export const TOAST_DURATION_MS = 900
export const COUNTDOWN_GRACE_PERIOD_MS = 500
export const ABORT_WINDOW_MS = 3000
export const CONTINUATION_COOLDOWN_MS = 30_000
export const CONTINUATION_COOLDOWN_MS = 5_000
export const MAX_CONSECUTIVE_FAILURES = 5
export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000

View File

@@ -15,6 +15,7 @@ import {
MAX_CONSECUTIVE_FAILURES,
} from "./constants"
import { isLastAssistantMessageAborted } from "./abort-detection"
import { hasUnansweredQuestion } from "./pending-question-detection"
import { getIncompleteCount } from "./todo"
import type { MessageInfo, ResolvedMessageInfo, Todo } from "./types"
import type { SessionStateStore } from "./session-state"
@@ -74,6 +75,10 @@ export async function handleSessionIdle(args: {
log(`[${HOOK_NAME}] Skipped: last assistant message was aborted (API fallback)`, { sessionID })
return
}
if (hasUnansweredQuestion(messages)) {
log(`[${HOOK_NAME}] Skipped: pending question awaiting user response`, { sessionID })
return
}
} catch (error) {
log(`[${HOOK_NAME}] Messages fetch failed, continuing`, { sessionID, error: String(error) })
}

View File

@@ -0,0 +1,100 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test"
import { hasUnansweredQuestion } from "./pending-question-detection"
describe("hasUnansweredQuestion", () => {
test("given empty messages, returns false", () => {
expect(hasUnansweredQuestion([])).toBe(false)
})
test("given null-ish input, returns false", () => {
expect(hasUnansweredQuestion(undefined as never)).toBe(false)
})
test("given last assistant message with question tool_use, returns true", () => {
const messages = [
{ info: { role: "user" } },
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
test("given last assistant message with question tool-invocation, returns true", () => {
const messages = [
{ info: { role: "user" } },
{
info: { role: "assistant" },
parts: [
{ type: "tool-invocation", toolName: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
test("given user message after question (answered), returns false", () => {
const messages = [
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "question" },
],
},
{ info: { role: "user" } },
]
expect(hasUnansweredQuestion(messages)).toBe(false)
})
test("given assistant message with non-question tool, returns false", () => {
const messages = [
{ info: { role: "user" } },
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "bash" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(false)
})
test("given assistant message with no parts, returns false", () => {
const messages = [
{ info: { role: "user" } },
{ info: { role: "assistant" } },
]
expect(hasUnansweredQuestion(messages)).toBe(false)
})
test("given role on message directly (not in info), returns true for question", () => {
const messages = [
{ role: "user" },
{
role: "assistant",
parts: [
{ type: "tool_use", name: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
test("given mixed tools including question, returns true", () => {
const messages = [
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "bash" },
{ type: "tool_use", name: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
})

View File

@@ -0,0 +1,40 @@
import { log } from "../../shared/logger"
import { HOOK_NAME } from "./constants"
interface MessagePart {
type: string
name?: string
toolName?: string
}
interface Message {
info?: { role?: string }
role?: string
parts?: MessagePart[]
}
export function hasUnansweredQuestion(messages: Message[]): boolean {
if (!messages || messages.length === 0) return false
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]
const role = msg.info?.role ?? msg.role
if (role === "user") return false
if (role === "assistant" && msg.parts) {
const hasQuestion = msg.parts.some(
(part) =>
(part.type === "tool_use" || part.type === "tool-invocation") &&
(part.name === "question" || part.toolName === "question"),
)
if (hasQuestion) {
log(`[${HOOK_NAME}] Detected pending question tool in last assistant message`)
return true
}
return false
}
}
return false
}

View File

@@ -297,6 +297,31 @@ describe("todo-continuation-enforcer", () => {
expect(promptCalls).toHaveLength(0)
})
test("should not inject when remaining todos are blocked or deleted", async () => {
// given - session where non-completed todos are only blocked/deleted
const sessionID = "main-blocked-deleted"
setMainSession(sessionID)
const mockInput = createMockPluginInput()
mockInput.client.session.todo = async () => ({ data: [
{ id: "1", content: "Blocked task", status: "blocked", priority: "high" },
{ id: "2", content: "Deleted task", status: "deleted", priority: "medium" },
{ id: "3", content: "Done task", status: "completed", priority: "low" },
]})
const hook = createTodoContinuationEnforcer(mockInput, {})
// when - session goes idle
await hook.handler({
event: { type: "session.idle", properties: { sessionID } },
})
await fakeTimers.advanceBy(3000)
// then - no continuation injected
expect(promptCalls).toHaveLength(0)
})
test("should not inject when background tasks are running", async () => {
// given - session with running background tasks
const sessionID = "main-789"
@@ -1663,7 +1688,6 @@ describe("todo-continuation-enforcer", () => {
test("should cancel all countdowns via cancelAllCountdowns", async () => {
// given - multiple sessions with running countdowns
const session1 = "main-cancel-all-1"
const session2 = "main-cancel-all-2"
setMainSession(session1)
const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

Some files were not shown because too many files have changed in this diff Show More