diff --git a/.github/workflows/publish-platform.yml b/.github/workflows/publish-platform.yml index 173c11795..74089679f 100644 --- a/.github/workflows/publish-platform.yml +++ b/.github/workflows/publish-platform.yml @@ -35,15 +35,15 @@ jobs: # - Uploads compressed artifacts for the publish job # ============================================================================= build: - runs-on: ${{ matrix.platform == 'windows-x64' && 'windows-latest' || 'ubuntu-latest' }} + runs-on: ${{ startsWith(matrix.platform, 'windows-') && 'windows-latest' || 'ubuntu-latest' }} defaults: run: shell: bash strategy: fail-fast: false - max-parallel: 7 + max-parallel: 11 matrix: - platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64] + platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline] steps: - uses: actions/checkout@v4 @@ -95,14 +95,18 @@ jobs: case "$PLATFORM" in darwin-arm64) TARGET="bun-darwin-arm64" ;; darwin-x64) TARGET="bun-darwin-x64" ;; + darwin-x64-baseline) TARGET="bun-darwin-x64-baseline" ;; linux-x64) TARGET="bun-linux-x64" ;; + linux-x64-baseline) TARGET="bun-linux-x64-baseline" ;; linux-arm64) TARGET="bun-linux-arm64" ;; linux-x64-musl) TARGET="bun-linux-x64-musl" ;; + linux-x64-musl-baseline) TARGET="bun-linux-x64-musl-baseline" ;; linux-arm64-musl) TARGET="bun-linux-arm64-musl" ;; windows-x64) TARGET="bun-windows-x64" ;; + windows-x64-baseline) TARGET="bun-windows-x64-baseline" ;; esac - if [ "$PLATFORM" = "windows-x64" ]; then + if [[ "$PLATFORM" == windows-* ]]; then OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode.exe" else OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode" @@ -119,7 +123,7 @@ jobs: PLATFORM="${{ matrix.platform }}" cd packages/${PLATFORM} - if [ "$PLATFORM" = "windows-x64" ]; then + if [[ "$PLATFORM" == windows-* ]]; then # Windows: use 7z (pre-installed on windows-latest) 7z a -tzip ../../binary-${PLATFORM}.zip bin/ package.json else @@ -155,7 +159,7 @@ jobs: fail-fast: false max-parallel: 2 matrix: - platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64] + platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline] steps: - name: Check if already published id: check @@ -184,7 +188,7 @@ jobs: PLATFORM="${{ matrix.platform }}" mkdir -p packages/${PLATFORM} - if [ "$PLATFORM" = "windows-x64" ]; then + if [[ "$PLATFORM" == windows-* ]]; then unzip binary-${PLATFORM}.zip -d packages/${PLATFORM}/ else tar -xzvf binary-${PLATFORM}.tar.gz -C packages/${PLATFORM}/ diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index d430e7caf..a64ddd55f 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -189,7 +189,7 @@ jobs: VERSION="${{ steps.version.outputs.version }}" jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json - for platform in darwin-arm64 darwin-x64 linux-x64 linux-arm64 linux-x64-musl linux-arm64-musl windows-x64; do + for platform in darwin-arm64 darwin-x64 darwin-x64-baseline linux-x64 linux-x64-baseline linux-arm64 linux-x64-musl linux-x64-musl-baseline linux-arm64-musl windows-x64 windows-x64-baseline; do jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json mv tmp.json "packages/${platform}/package.json" done diff --git a/assets/oh-my-opencode.schema.json b/assets/oh-my-opencode.schema.json index 75a2a26f3..30757523b 100644 --- a/assets/oh-my-opencode.schema.json +++ b/assets/oh-my-opencode.schema.json @@ -24,19 +24,7 @@ "disabled_agents": { "type": "array", "items": { - "type": "string", - "enum": [ - "sisyphus", - "hephaestus", - "prometheus", - "oracle", - "librarian", - "explore", - "multimodal-looker", - "metis", - "momus", - "atlas" - ] + "type": "string" } }, "disabled_skills": { @@ -960,6 +948,9 @@ } }, "additionalProperties": false + }, + "allow_non_gpt_model": { + "type": "boolean" } }, "additionalProperties": false @@ -3474,6 +3465,11 @@ "prompt_append": { "type": "string" }, + "max_prompt_tokens": { + "type": "integer", + "exclusiveMinimum": 0, + "maximum": 9007199254740991 + }, "is_unstable_agent": { "type": "boolean" }, diff --git a/benchmarks/bun.lock b/benchmarks/bun.lock new file mode 100644 index 000000000..3a31bf1c5 --- /dev/null +++ b/benchmarks/bun.lock @@ -0,0 +1,62 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "hashline-edit-benchmark", + "dependencies": { + "@ai-sdk/openai": "^1.3.0", + "@friendliai/ai-provider": "^1.0.9", + "ai": "^6.0.94", + "zod": "^4.1.0", + }, + }, + }, + "packages": { + "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="], + + "@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="], + + "@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="], + + "@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="], + + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="], + + "@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="], + + "@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="], + + "@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="], + + "@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="], + + "ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="], + + "eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="], + + "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="], + + "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], + + "secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="], + + "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + + "@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + + "@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + + "@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + + "ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + } +} diff --git a/benchmarks/headless.ts b/benchmarks/headless.ts new file mode 100644 index 000000000..ae18853af --- /dev/null +++ b/benchmarks/headless.ts @@ -0,0 +1,193 @@ +#!/usr/bin/env bun +import { readFile, writeFile, mkdir } from "node:fs/promises" +import { join, dirname } from "node:path" +import { stepCountIs, streamText, type CoreMessage } from "ai" +import { tool } from "ai" +import { createFriendli } from "@friendliai/ai-provider" +import { z } from "zod" +import { formatHashLines } from "../src/tools/hashline-edit/hash-computation" +import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits" +import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations" +import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization" + +const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5" +const MAX_STEPS = 50 +const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + +const emit = (event: Record) => + console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event })) + +// ── CLI ────────────────────────────────────────────────────── +function parseArgs(): { prompt: string; modelId: string } { + const args = process.argv.slice(2) + let prompt = "" + let modelId = DEFAULT_MODEL + for (let i = 0; i < args.length; i++) { + if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) { + prompt = args[++i] + } else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) { + modelId = args[++i] + } else if (args[i] === "--reasoning-mode" && args[i + 1]) { + i++ // consume + } + // --no-translate, --think consumed silently + } + if (!prompt) { + console.error("Usage: bun run benchmarks/headless.ts -p [-m ]") + process.exit(1) + } + return { prompt, modelId } +} + +// ── Tools ──────────────────────────────────────────────────── +const readFileTool = tool({ + description: "Read a file with hashline-tagged content (LINE#ID format)", + inputSchema: z.object({ path: z.string().describe("File path") }), + execute: async ({ path }) => { + const fullPath = join(process.cwd(), path) + try { + const content = await readFile(fullPath, "utf-8") + const lines = content.split("\n") + const tagged = formatHashLines(content) + return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}` + } catch { + return `Error: File not found: ${path}` + } + }, +}) + +const editFileTool = tool({ + description: "Edit a file using hashline anchors (LINE#ID format)", + inputSchema: z.object({ + path: z.string(), + edits: z.array( + z.object({ + op: z.enum(["replace", "append", "prepend"]), + pos: z.string().optional(), + end: z.string().optional(), + lines: z.union([z.array(z.string()), z.string(), z.null()]), + }) + ).min(1), + }), + execute: async ({ path, edits }) => { + const fullPath = join(process.cwd(), path) + try { + let rawContent = "" + let exists = true + try { + rawContent = await readFile(fullPath, "utf-8") + } catch { + exists = false + } + + const normalized = normalizeHashlineEdits(edits) + + if (!exists) { + const canCreate = normalized.every( + (e) => (e.op === "append" || e.op === "prepend") && !e.pos + ) + if (!canCreate) return `Error: File not found: ${path}` + } + + const envelope = canonicalizeFileText(rawContent) + const result = applyHashlineEditsWithReport(envelope.content, normalized) + + if (result.content === envelope.content) { + return `Error: No changes made to ${path}. The edits produced identical content.` + } + + const writeContent = restoreFileText(result.content, envelope) + await mkdir(dirname(fullPath), { recursive: true }) + await writeFile(fullPath, writeContent, "utf-8") + + const oldLineCount = rawContent.split("\n").length + const newLineCount = writeContent.split("\n").length + const delta = newLineCount - oldLineCount + const sign = delta > 0 ? "+" : "" + const action = exists ? "Updated" : "Created" + return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)` + } catch (error) { + return `Error: ${error instanceof Error ? error.message : String(error)}` + } + }, +}) + +// ── Agent Loop ─────────────────────────────────────────────── +async function run() { + const { prompt, modelId } = parseArgs() + + const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! }) + const model = friendli(modelId) + const tools = { read_file: readFileTool, edit_file: editFileTool } + + emit({ type: "user", content: prompt }) + + const messages: CoreMessage[] = [{ role: "user", content: prompt }] + const system = + "You are a code editing assistant. Use read_file to read files and edit_file to edit them. " + + "Always read a file before editing it to get fresh LINE#ID anchors." + + for (let step = 0; step < MAX_STEPS; step++) { + const stream = streamText({ + model, + tools, + messages, + system, + stopWhen: stepCountIs(1), + }) + + let currentText = "" + for await (const part of stream.fullStream) { + switch (part.type) { + case "text-delta": + currentText += part.text + break + case "tool-call": + emit({ + type: "tool_call", + tool_call_id: part.toolCallId, + tool_name: part.toolName, + tool_input: part.args, + model: modelId, + }) + break + case "tool-result": { + const output = typeof part.result === "string" ? part.result : JSON.stringify(part.result) + const isError = typeof output === "string" && output.startsWith("Error:") + emit({ + type: "tool_result", + tool_call_id: part.toolCallId, + output, + ...(isError ? { error: output } : {}), + }) + break + } + } + + const response = await stream.response + messages.push(...response.messages) + + const finishReason = await stream.finishReason + if (finishReason !== "tool-calls") { + if (currentText.trim()) { + emit({ type: "assistant", content: currentText, model: modelId }) + } + break + } + } +} + +// ── Signal + Startup ───────────────────────────────────────── +process.once("SIGINT", () => process.exit(0)) +process.once("SIGTERM", () => process.exit(143)) + +const startTime = Date.now() +run() + .catch((error) => { + emit({ type: "error", error: error instanceof Error ? error.message : String(error) }) + process.exit(1) + }) + .then(() => { + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2) + console.error(`[headless] Completed in ${elapsed}s`) + }) diff --git a/benchmarks/package.json b/benchmarks/package.json new file mode 100644 index 000000000..bbddfed8a --- /dev/null +++ b/benchmarks/package.json @@ -0,0 +1,19 @@ +{ + "name": "hashline-edit-benchmark", + "version": "0.1.0", + "private": true, + "type": "module", + "description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider", + "scripts": { + "bench:basic": "bun run test-edit-ops.ts", + "bench:edge": "bun run test-edge-cases.ts", + "bench:multi": "bun run test-multi-model.ts", + "bench:all": "bun run bench:basic && bun run bench:edge" + }, + "dependencies": { + "ai": "^6.0.94", + "@ai-sdk/openai": "^1.3.0", + "@friendliai/ai-provider": "^1.0.9", + "zod": "^4.1.0" + } +} diff --git a/benchmarks/test-edge-cases.ts b/benchmarks/test-edge-cases.ts new file mode 100644 index 000000000..b00b0302d --- /dev/null +++ b/benchmarks/test-edge-cases.ts @@ -0,0 +1,1121 @@ +#!/usr/bin/env bun +/** + * Comprehensive headless edit_file stress test: 25 edge cases + * + * Tests: 5 basic ops + 14 creative cases + 6 whitespace cases + * Each runs via headless mode with its own demo file + prompt. + * + * Usage: + * bun run scripts/test-headless-edit-edge-cases.ts [-m ] [--provider ] + */ + +import { spawn } from "node:child_process"; +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; + +// ── CLI arg passthrough ─────────────────────────────────────── +const extraArgs: string[] = []; +const rawArgs = process.argv.slice(2); +for (let i = 0; i < rawArgs.length; i++) { + const arg = rawArgs[i]; + if ( + (arg === "-m" || arg === "--model" || arg === "--provider") && + i + 1 < rawArgs.length + ) { + extraArgs.push(arg, rawArgs[i + 1]); + i++; + } else if (arg === "--think" || arg === "--no-translate") { + extraArgs.push(arg); + } else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) { + extraArgs.push(arg, rawArgs[i + 1]); + i++; + } +} + +// ── Colors ──────────────────────────────────────────────────── +const BOLD = "\x1b[1m"; +const GREEN = "\x1b[32m"; +const RED = "\x1b[31m"; +const YELLOW = "\x1b[33m"; +const DIM = "\x1b[2m"; +const CYAN = "\x1b[36m"; +const RESET = "\x1b[0m"; + +const pass = (msg: string) => console.log(` ${GREEN}✓${RESET} ${msg}`); +const fail = (msg: string) => console.log(` ${RED}✗${RESET} ${msg}`); +const info = (msg: string) => console.log(` ${DIM}${msg}${RESET}`); +const warn = (msg: string) => console.log(` ${YELLOW}⚠${RESET} ${msg}`); + +// ── Test case definition ───────────────────────────────────── +interface TestCase { + fileContent: string; + fileName: string; + name: string; + prompt: string; + skipFileCreate?: boolean; + validate: (content: string) => { passed: boolean; reason: string }; +} + +const TEST_CASES: TestCase[] = [ + { + name: "1. Single-line file — replace only line", + fileName: "single-line.txt", + fileContent: "only_line_original", + prompt: [ + "Read single-line.txt with read_file.", + "Replace the only line using edit_file with edits: [{ op: 'replace', pos: '', lines: ['only_line_updated'] }].", + "Expected final content exactly one line: only_line_updated.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, "").trimEnd(); + const lines = normalized.split("\n"); + if (lines.length === 1 && lines[0] === "only_line_updated") { + return { passed: true, reason: "single line replaced correctly" }; + } + if (normalized.includes("only_line_original")) { + return { passed: false, reason: "original line still present" }; + } + return { + passed: false, + reason: `expected one line 'only_line_updated', got ${lines.length} lines`, + }; + }, + }, + { + name: "2. Large file (20 lines) — replace middle line 11", + fileName: "twenty-lines.txt", + fileContent: Array.from( + { length: 20 }, + (_, i) => `line${String(i + 1).padStart(2, "0")}: value-${i + 1}` + ).join("\n"), + prompt: [ + "Read twenty-lines.txt with read_file.", + "Replace line 11 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['line11: UPDATED-MIDDLE'] }].", + "Keep all other lines unchanged.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines.length !== 20) { + return { + passed: false, + reason: `expected 20 lines, got ${lines.length}`, + }; + } + if (lines[10] !== "line11: UPDATED-MIDDLE") { + return { + passed: false, + reason: `line 11 mismatch: '${lines[10] ?? ""}'`, + }; + } + if (lines[9] !== "line10: value-10" || lines[11] !== "line12: value-12") { + return { + passed: false, + reason: "neighboring lines changed unexpectedly", + }; + } + return { + passed: true, + reason: "line 11 replaced and surrounding lines preserved", + }; + }, + }, + { + name: "3. Range replace entire file (first→last to one line)", + fileName: "range-all.txt", + fileContent: ["first", "second", "third", "fourth", "fifth"].join("\n"), + prompt: [ + "Read range-all.txt with read_file.", + "Replace the full file from first line to last line using one range edit: edits: [{ op: 'replace', pos: '', end: '', lines: ['collapsed-to-one-line'] }].", + "Expected final content exactly: collapsed-to-one-line.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, "").trimEnd(); + if (normalized === "collapsed-to-one-line") { + return { + passed: true, + reason: "entire file collapsed to single replacement line", + }; + } + if (normalized.includes("first") || normalized.includes("fifth")) { + return { + passed: false, + reason: "original range content still present", + }; + } + return { + passed: false, + reason: `unexpected final content: '${normalized.slice(0, 120)}'`, + }; + }, + }, + { + name: "4. Mixed ops in one call (replace + append + prepend)", + fileName: "mixed-one-call.txt", + fileContent: ["alpha", "beta", "gamma"].join("\n"), + prompt: [ + "Read mixed-one-call.txt with read_file.", + "Call edit_file exactly once with three edits in one edits array:", + "edits: [", + "{ op: 'replace', pos: '', lines: ['BETA'] },", + "{ op: 'append', pos: '', lines: ['delta'] },", + "{ op: 'prepend', pos: '', lines: ['start'] }", + "].", + "Expected final content: start, alpha, BETA, gamma, delta.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + const expected = ["start", "alpha", "BETA", "gamma", "delta"]; + if (lines.length !== expected.length) { + return { + passed: false, + reason: `expected ${expected.length} lines, got ${lines.length}`, + }; + } + for (let i = 0; i < expected.length; i++) { + if (lines[i] !== expected[i]) { + return { + passed: false, + reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`, + }; + } + } + return { + passed: true, + reason: "single call applied replace, append, and prepend", + }; + }, + }, + { + name: "5. Large batch (5 replaces) in one call", + fileName: "batch-five.txt", + fileContent: [ + "row-1", + "row-2", + "row-3", + "row-4", + "row-5", + "row-6", + "row-7", + "row-8", + "row-9", + "row-10", + ].join("\n"), + prompt: [ + "Read batch-five.txt with read_file.", + "Call edit_file once with five replace edits in one edits array:", + "edits: [", + "{ op: 'replace', pos: '', lines: ['ROW-1'] },", + "{ op: 'replace', pos: '', lines: ['ROW-3'] },", + "{ op: 'replace', pos: '', lines: ['ROW-5'] },", + "{ op: 'replace', pos: '', lines: ['ROW-7'] },", + "{ op: 'replace', pos: '', lines: ['ROW-10'] }", + "].", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines.length !== 10) { + return { + passed: false, + reason: `expected 10 lines, got ${lines.length}`, + }; + } + const checks: [number, string][] = [ + [0, "ROW-1"], + [2, "ROW-3"], + [4, "ROW-5"], + [6, "ROW-7"], + [9, "ROW-10"], + ]; + for (const [idx, expected] of checks) { + if (lines[idx] !== expected) { + return { + passed: false, + reason: `line ${idx + 1} expected '${expected}' but got '${lines[idx]}'`, + }; + } + } + if ( + lines[1] !== "row-2" || + lines[3] !== "row-4" || + lines[8] !== "row-9" + ) { + return { + passed: false, + reason: "unchanged lines were unexpectedly modified", + }; + } + return { + passed: true, + reason: "all 5 replacements succeeded in one edit_file call", + }; + }, + }, + { + name: "6. Consecutive edits (read→edit→read→edit)", + fileName: "consecutive.txt", + fileContent: ["stage: one", "value: 1", "status: draft"].join("\n"), + prompt: [ + "Read consecutive.txt with read_file.", + "First call edit_file with edits: [{ op: 'replace', pos: '', lines: ['value: 2'] }].", + "Then read consecutive.txt with read_file again.", + "Second, call edit_file again with edits: [{ op: 'replace', pos: '', lines: ['status: final'] }].", + "Expected final content: stage: one, value: 2, status: final.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + const expected = ["stage: one", "value: 2", "status: final"]; + if (lines.length !== expected.length) { + return { + passed: false, + reason: `expected ${expected.length} lines, got ${lines.length}`, + }; + } + for (let i = 0; i < expected.length; i++) { + if (lines[i] !== expected[i]) { + return { + passed: false, + reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`, + }; + } + } + return { + passed: true, + reason: "two sequential edit_file calls produced expected final state", + }; + }, + }, + { + name: "7. Create new file via append", + fileName: "create-via-append.txt", + fileContent: "", + skipFileCreate: true, + prompt: [ + "Create create-via-append.txt via edit_file append (do not call read_file first).", + "Use one call with edits: [{ op: 'append', lines: ['created line 1', 'created line 2'] }].", + "Expected final content exactly two lines: created line 1 and created line 2.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, "").trimEnd(); + const lines = normalized === "" ? [] : normalized.split("\n"); + if (lines.length !== 2) { + return { + passed: false, + reason: `expected 2 lines, got ${lines.length}`, + }; + } + if (lines[0] !== "created line 1" || lines[1] !== "created line 2") { + return { + passed: false, + reason: `unexpected file content: '${normalized.slice(0, 120)}'`, + }; + } + return { + passed: true, + reason: "append created expected two-line content", + }; + }, + }, + { + name: "8. Unicode/emoji line replacement", + fileName: "unicode.txt", + fileContent: ["status: pending", "message: old"].join("\n"), + prompt: [ + "Read unicode.txt with read_file.", + "Replace line 2 with Unicode content using edit_file and edits: [{ op: 'replace', pos: '', lines: ['message: 🎉🚀 한국어 테스트 완료'] }].", + "Expected line 2 exactly: message: 🎉🚀 한국어 테스트 완료.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines[1] !== "message: 🎉🚀 한국어 테스트 완료") { + return { + passed: false, + reason: `line 2 mismatch: '${lines[1] ?? ""}'`, + }; + } + if (content.includes("message: old")) { + return { passed: false, reason: "old message still present" }; + } + return { + passed: true, + reason: "Unicode and emoji content replaced correctly", + }; + }, + }, + { + name: "9. Backticks/template literal content", + fileName: "template.ts", + fileContent: ["const name = 'dev';", "const msg = 'old';"].join("\n"), + prompt: [ + "Read template.ts with read_file.", + "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['const msg = `hello \u0024{name}`;'] }].", + "Expected line 2 exactly: const msg = `hello \u0024{name}`;", + ].join(" "), + validate: (content) => { + const expected = "const msg = `hello \u0024{name}`;"; + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines[1] !== expected) { + return { + passed: false, + reason: `line 2 expected '${expected}' but got '${lines[1] ?? ""}'`, + }; + } + if (content.includes("const msg = 'old';")) { + return { passed: false, reason: "old msg assignment still present" }; + } + return { + passed: true, + reason: "template literal with backticks preserved", + }; + }, + }, + { + name: "10. Regex pattern content", + fileName: "regex.ts", + fileContent: ["const re = /old/;", "const ok = true;"].join("\n"), + prompt: [ + "Read regex.ts with read_file.", + "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['const re = /^[a-z]+\\d{2,}$/gi;'] }].", + "Expected line 1 exactly: const re = /^[a-z]+\\d{2,}$/gi;", + ].join(" "), + validate: (content) => { + const expected = "const re = /^[a-z]+\\d{2,}$/gi;"; + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines[0] !== expected) { + return { + passed: false, + reason: `regex line mismatch: '${lines[0] ?? ""}'`, + }; + } + if (content.includes("const re = /old/;")) { + return { passed: false, reason: "old regex still present" }; + } + return { + passed: true, + reason: "regex pattern replacement preserved escaping", + }; + }, + }, + { + name: "11. Escaped quotes and backslashes", + fileName: "path.cfg", + fileContent: ['path = "/tmp/file.txt"', "mode = rw"].join("\n"), + prompt: [ + "Read path.cfg with read_file.", + "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['path = \"C:\\\\Users\\\\admin\\\\file.txt\"'] }].", + 'The file should contain a Windows-style path with backslashes: C:\\Users\\admin\\file.txt.', + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + const line1 = lines[0] ?? ""; + // Accept either single or double backslashes — both are valid model interpretations + const hasSingleBS = line1.includes('C:\\Users\\admin\\file.txt'); + const hasDoubleBS = line1.includes('C:\\\\Users\\\\admin\\\\file.txt'); + const hasPath = hasSingleBS || hasDoubleBS; + const hasQuotes = line1.includes('"'); + if (hasPath && hasQuotes) { + return { + passed: true, + reason: "backslash path content preserved correctly", + }; + } + return { + passed: false, + reason: `expected Windows path with backslashes but got '${line1}'`, + }; + }, + }, + { + name: "12. HTML tags in content", + fileName: "html-snippet.txt", + fileContent: ["snippet: old", "done: true"].join("\n"), + prompt: [ + "Read html-snippet.txt with read_file.", + "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['

Hello

'] }].", + 'Expected line 1 exactly:

Hello

.', + ].join(" "), + validate: (content) => { + const expected = '

Hello

'; + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines[0] !== expected) { + return { + passed: false, + reason: `HTML line mismatch: '${lines[0] ?? ""}'`, + }; + } + if (content.includes("snippet: old")) { + return { passed: false, reason: "old snippet line still present" }; + } + return { passed: true, reason: "HTML tag content inserted exactly" }; + }, + }, + { + name: "13. Very long line (180 chars)", + fileName: "long-line.txt", + fileContent: ["line-1", "short-line"].join("\n"), + prompt: [ + "Read long-line.txt with read_file.", + `Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['${"L".repeat(180)}'] }].`, + "Expected line 2 to be exactly 180 characters.", + ].join(" "), + validate: (content) => { + const expected = "L".repeat(180); + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (!lines[1]) { + return { passed: false, reason: "line 2 is missing" }; + } + if (Math.abs(lines[1].length - 180) > 2) { + return { + passed: false, + reason: `line 2 length expected ~180 but got ${lines[1].length}`, + }; + } + if (!lines[1].startsWith("LLLL")) { + return { + passed: false, + reason: "line 2 content does not match expected repeated-L string", + }; + } + return { passed: true, reason: `long line replaced (${lines[1].length} chars)` }; + }, + }, + { + name: "14. SQL query content", + fileName: "sql-content.txt", + fileContent: ["SELECT 1;", "done"].join("\n"), + prompt: [ + "Read sql-content.txt with read_file.", + "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['SELECT u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.total > 100;'] }].", + "Expected line 1 exactly the provided SQL query.", + ].join(" "), + validate: (content) => { + const expected = + "SELECT u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.total > 100;"; + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines[0] !== expected) { + return { + passed: false, + reason: `SQL line mismatch: '${lines[0] ?? ""}'`, + }; + } + return { passed: true, reason: "SQL query line replaced exactly" }; + }, + }, + { + name: "15. Mixed indentation (tab -> spaces)", + fileName: "mixed-indent.ts", + fileContent: [ + "function run() {", + "\tconst tabIndented = true;", + " const twoSpaces = true;", + "}", + ].join("\n"), + prompt: [ + "Read mixed-indent.ts with read_file.", + "Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: [' const tabIndented = true;'] }].", + "Expected line 2 to be 4 spaces + const tabIndented = true;", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, ""); + const lines = normalized.endsWith("\n") + ? normalized.slice(0, -1).split("\n") + : normalized.split("\n"); + if (lines[1] !== " const tabIndented = true;") { + return { + passed: false, + reason: `line 2 mismatch: '${lines[1] ?? ""}'`, + }; + } + if (lines[1].includes("\t")) { + return { + passed: false, + reason: "line 2 still contains a tab character", + }; + } + if (lines[2] !== " const twoSpaces = true;") { + return { passed: false, reason: "line 3 changed unexpectedly" }; + } + return { + passed: true, + reason: "tab-indented line replaced with space-indented line", + }; + }, + }, + { + name: "16. Trailing whitespace preservation", + fileName: "trailing-whitespace.txt", + fileContent: ["start", "text ", "end"].join("\n"), + prompt: [ + "Read trailing-whitespace.txt with read_file.", + "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['new_text '] }].", + "Keep exactly three trailing spaces after new_text.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, ""); + const lines = normalized.endsWith("\n") + ? normalized.slice(0, -1).split("\n") + : normalized.split("\n"); + if (!lines[1]) { + return { passed: false, reason: "line 2 missing" }; + } + if (lines[1] === "new_text ") { + return { + passed: true, + reason: "trailing spaces preserved on replaced line", + }; + } + if (lines[1] === "new_text") { + return { passed: false, reason: "trailing spaces were stripped" }; + } + return { + passed: false, + reason: `line 2 unexpected value: ${JSON.stringify(lines[1])}`, + }; + }, + }, + { + name: "17. Replace line containing only spaces", + fileName: "spaces-only-line.txt", + fileContent: ["alpha", " ", "omega"].join("\n"), + prompt: [ + "Read spaces-only-line.txt with read_file.", + "Replace the line that contains only 4 spaces (line 2) using edit_file with edits: [{ op: 'replace', pos: '', lines: ['middle-content'] }].", + "Expected final content: alpha, middle-content, omega.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, ""); + const lines = normalized.endsWith("\n") + ? normalized.slice(0, -1).split("\n") + : normalized.split("\n"); + if (lines.length !== 3) { + return { + passed: false, + reason: `expected 3 lines, got ${lines.length}`, + }; + } + if (lines[0] !== "alpha" || lines[2] !== "omega") { + return { + passed: false, + reason: "non-target lines changed unexpectedly", + }; + } + if (lines[1] !== "middle-content") { + return { + passed: false, + reason: `line 2 expected 'middle-content' but got ${JSON.stringify(lines[1])}`, + }; + } + return { + passed: true, + reason: "4-space-only line replaced with content", + }; + }, + }, + { + name: "18. Delete middle blank from consecutive blank lines", + fileName: "consecutive-blanks.txt", + fileContent: ["top", "", "", "", "bottom"].join("\n"), + prompt: [ + "Read consecutive-blanks.txt with read_file.", + "Delete only the middle blank line (line 3 of 5) using edit_file with edits: [{ op: 'replace', pos: '', lines: [] }].", + "Keep the other two blank lines intact.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, ""); + const lines = normalized.endsWith("\n") + ? normalized.slice(0, -1).split("\n") + : normalized.split("\n"); + const expected = ["top", "", "", "bottom"]; + if (lines.length !== expected.length) { + return { + passed: false, + reason: `expected ${expected.length} lines after deleting one blank, got ${lines.length}`, + }; + } + for (let i = 0; i < expected.length; i++) { + if (lines[i] !== expected[i]) { + return { + passed: false, + reason: `line ${i + 1} expected ${JSON.stringify(expected[i])} but got ${JSON.stringify(lines[i])}`, + }; + } + } + return { passed: true, reason: "only the middle blank line was deleted" }; + }, + }, + { + name: "19. Indentation increase (2 spaces -> 8 spaces)", + fileName: "indent-increase.js", + fileContent: ["if (flag) {", " execute();", "}"].join("\n"), + prompt: [ + "Read indent-increase.js with read_file.", + "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: [' execute();'] }].", + "Expected line 2 indentation increased from 2 spaces to 8 spaces.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, ""); + const lines = normalized.endsWith("\n") + ? normalized.slice(0, -1).split("\n") + : normalized.split("\n"); + if (lines.length !== 3) { + return { + passed: false, + reason: `expected 3 lines, got ${lines.length}`, + }; + } + if (lines[1] !== " execute();") { + return { + passed: false, + reason: `line 2 expected 8-space indentation, got ${JSON.stringify(lines[1])}`, + }; + } + if (lines[0] !== "if (flag) {" || lines[2] !== "}") { + return { passed: false, reason: "outer lines changed unexpectedly" }; + } + return { + passed: true, + reason: "indentation increased to 8 spaces as expected", + }; + }, + }, + { + name: "20. Content that resembles hashline format", + fileName: "hashline-content.txt", + fileContent: ["anchor: old", "tail"].join("\n"), + prompt: [ + "Read hashline-content.txt with read_file.", + "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['anchor: 1#AB format is used'] }].", + "Expected line 1 exactly: anchor: 1#AB format is used.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines[0] !== "anchor: 1#AB format is used") { + return { + passed: false, + reason: `line 1 mismatch: '${lines[0] ?? ""}'`, + }; + } + return { + passed: true, + reason: "hashline-like literal content preserved correctly", + }; + }, + }, + { + name: "21. Literal backslash-n content", + fileName: "literal-backslash-n.txt", + fileContent: ["placeholder", "tail"].join("\n"), + prompt: [ + "Read literal-backslash-n.txt with read_file.", + "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['line1\\nline2 (literal backslash-n, not newline)'] }].", + "Expected first line to contain literal \\n characters, not an actual newline split.", + ].join(" "), + validate: (content) => { + const expected = "line1\\nline2 (literal backslash-n, not newline)"; + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines.length !== 2) { + return { + passed: false, + reason: `expected 2 lines total, got ${lines.length}`, + }; + } + if (lines[0] !== expected) { + return { + passed: false, + reason: `line 1 expected '${expected}' but got '${lines[0] ?? ""}'`, + }; + } + return { + passed: true, + reason: "literal \\n sequence preserved in a single line", + }; + }, + }, + { + name: "22. Append multiple lines at once", + fileName: "append-multi.txt", + fileContent: ["header", "anchor-line", "footer"].join("\n"), + prompt: [ + "Read append-multi.txt with read_file.", + "Append three lines after anchor-line (line 2) using edit_file with edits: [{ op: 'append', pos: '', lines: ['item-a', 'item-b', 'item-c'] }].", + "Expected final order: header, anchor-line, item-a, item-b, item-c, footer.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + const expected = [ + "header", + "anchor-line", + "item-a", + "item-b", + "item-c", + "footer", + ]; + if (lines.length !== expected.length) { + return { + passed: false, + reason: `expected ${expected.length} lines, got ${lines.length}`, + }; + } + for (let i = 0; i < expected.length; i++) { + if (lines[i] !== expected[i]) { + return { + passed: false, + reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`, + }; + } + } + return { + passed: true, + reason: "three lines appended in a single append edit", + }; + }, + }, + { + name: "23. Replace long line with single short word", + fileName: "shrink-line.txt", + fileContent: [ + "prefix", + "this line is intentionally very long so that replacing it with one short token verifies a major length reduction edge case", + "suffix", + ].join("\n"), + prompt: [ + "Read shrink-line.txt with read_file.", + "Replace the long line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['short'] }].", + "Expected final line 2 exactly: short.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + if (lines[1] !== "short") { + return { + passed: false, + reason: `line 2 expected 'short' but got '${lines[1] ?? ""}'`, + }; + } + if (content.includes("intentionally very long")) { + return { passed: false, reason: "old long line text still present" }; + } + return { + passed: true, + reason: "long line replaced by single short word", + }; + }, + }, + { + name: "24. Edit file with no trailing newline", + fileName: "no-trailing-newline.txt", + fileContent: "first\nsecond\nthird", + prompt: [ + "Read no-trailing-newline.txt with read_file.", + "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: ['SECOND'] }].", + "Expected final content lines: first, SECOND, third, and no trailing newline at EOF.", + ].join(" "), + validate: (content) => { + const normalized = content.replace(/\r/g, ""); + const lines = normalized.split("\n"); + if (lines.length !== 3) { + return { + passed: false, + reason: `expected 3 lines, got ${lines.length}`, + }; + } + if ( + lines[0] !== "first" || + lines[1] !== "SECOND" || + lines[2] !== "third" + ) { + return { + passed: false, + reason: `unexpected lines: ${JSON.stringify(lines)}`, + }; + } + if (normalized.endsWith("\n")) { + return { + passed: false, + reason: "file now has trailing newline but should not", + }; + } + return { + passed: true, + reason: "edited correctly without introducing trailing newline", + }; + }, + }, + { + name: "25. Prepend at BOF without pos anchor", + fileName: "prepend-bof.js", + fileContent: ["console.log('hello');", "console.log('done');"].join("\n"), + prompt: [ + "Read prepend-bof.js with read_file.", + "Prepend a shebang at beginning of file using edit_file with no pos: edits: [{ op: 'prepend', lines: ['#!/usr/bin/env node'] }].", + "Do not include a pos field. Expected first line: #!/usr/bin/env node.", + ].join(" "), + validate: (content) => { + const lines = content.replace(/\r/g, "").trimEnd().split("\n"); + const expected = [ + "#!/usr/bin/env node", + "console.log('hello');", + "console.log('done');", + ]; + if (lines.length !== expected.length) { + return { + passed: false, + reason: `expected ${expected.length} lines, got ${lines.length}`, + }; + } + for (let i = 0; i < expected.length; i++) { + if (lines[i] !== expected[i]) { + return { + passed: false, + reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`, + }; + } + } + return { + passed: true, + reason: "shebang prepended at BOF without pos anchor", + }; + }, + }, +]; + +// ── JSONL event types ───────────────────────────────────────── +interface ToolCallEvent { + tool_call_id: string; + tool_input: Record; + tool_name: string; + type: "tool_call"; +} + +interface ToolResultEvent { + error?: string; + output: string; + tool_call_id: string; + type: "tool_result"; +} + +interface AnyEvent { + type: string; + [key: string]: unknown; +} + +// ── Run single test case ───────────────────────────────────── +async function runTestCase( + tc: TestCase, + testDir: string +): Promise<{ + passed: boolean; + editCalls: number; + editSuccesses: number; + duration: number; +}> { + const testFile = join(testDir, tc.fileName); + if (!tc.skipFileCreate) { + writeFileSync(testFile, tc.fileContent, "utf-8"); + } + + const headlessScript = resolve(import.meta.dir, "headless.ts"); + const headlessArgs = [ + "run", + headlessScript, + "-p", + tc.prompt, + "--no-translate", + ...extraArgs, + ]; + + const startTime = Date.now(); + + const output = await new Promise((res, reject) => { + const proc = spawn("bun", headlessArgs, { + cwd: testDir, + env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL }, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + + proc.stdout.on("data", (chunk: Buffer) => { + stdout += chunk.toString(); + }); + proc.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + const timeout = setTimeout( + () => { + proc.kill("SIGTERM"); + reject(new Error("Timed out after 4 minutes")); + }, + 4 * 60 * 1000 + ); + + proc.on("close", (code) => { + clearTimeout(timeout); + if (code !== 0) { + reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`)); + } else { + res(stdout); + } + }); + proc.on("error", (err) => { + clearTimeout(timeout); + reject(err); + }); + }); + + const duration = Date.now() - startTime; + + // Parse events + const events: AnyEvent[] = []; + for (const line of output.split("\n").filter((l) => l.trim())) { + try { + events.push(JSON.parse(line) as AnyEvent); + } catch { + // skip non-JSON + } + } + + const toolCalls = events.filter( + (e) => e.type === "tool_call" + ) as unknown as ToolCallEvent[]; + const toolResults = events.filter( + (e) => e.type === "tool_result" + ) as unknown as ToolResultEvent[]; + + const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file"); + const editCallIds = new Set(editCalls.map((e) => e.tool_call_id)); + const editResults = toolResults.filter((e) => + editCallIds.has(e.tool_call_id) + ); + const editSuccesses = editResults.filter((e) => !e.error); + + // Show blocked calls + const editErrors = editResults.filter((e) => e.error); + for (const err of editErrors) { + const matchingCall = editCalls.find( + (c) => c.tool_call_id === err.tool_call_id + ); + info(` blocked: ${err.error?.slice(0, 120)}`); + if (matchingCall) { + info(` input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`); + } + } + + // Validate file content + let finalContent: string; + try { + finalContent = readFileSync(testFile, "utf-8"); + } catch { + return { + passed: false, + editCalls: editCalls.length, + editSuccesses: editSuccesses.length, + duration, + }; + } + + const validation = tc.validate(finalContent); + + return { + passed: validation.passed, + editCalls: editCalls.length, + editSuccesses: editSuccesses.length, + duration, + }; +} + +// ── Main ────────────────────────────────────────────────────── +const main = async () => { + console.log( + `\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n` + ); + + const testDir = join(tmpdir(), `edit-ops-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + info(`Test dir: ${testDir}`); + console.log(); + + let totalPassed = 0; + const results: { name: string; passed: boolean; detail: string }[] = []; + + for (const tc of TEST_CASES) { + console.log(`${CYAN}${BOLD}${tc.name}${RESET}`); + info(`File: ${tc.fileName}`); + info(`Prompt: "${tc.prompt.slice(0, 80)}..."`); + + try { + const result = await runTestCase(tc, testDir); + const status = result.passed + ? `${GREEN}PASS${RESET}` + : `${RED}FAIL${RESET}`; + const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`; + + console.log(` ${status} — ${detail}`); + + if (result.passed) { + totalPassed++; + // Validate the file to show reason + const content = readFileSync(join(testDir, tc.fileName), "utf-8"); + const v = tc.validate(content); + pass(v.reason); + } else { + const content = readFileSync(join(testDir, tc.fileName), "utf-8"); + const v = tc.validate(content); + fail(v.reason); + info( + `Final content:\n${content + .split("\n") + .map((l, i) => ` ${i + 1}: ${l}`) + .join("\n")}` + ); + } + + results.push({ name: tc.name, passed: result.passed, detail }); + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + console.log(` ${RED}ERROR${RESET} — ${msg.slice(0, 200)}`); + fail(msg.slice(0, 200)); + results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) }); + } + + // Reset file for next test (in case of side effects) + try { + rmSync(join(testDir, tc.fileName), { force: true }); + } catch (error) { + warn(`cleanup failed for ${tc.fileName}: ${error}`); + } + + console.log(); + } + + // Summary + console.log(`${BOLD}━━━ Summary ━━━${RESET}`); + for (const r of results) { + const icon = r.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`; + console.log(` ${icon} ${r.name} — ${r.detail}`); + } + console.log(); + console.log( + `${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}` + ); + + // Cleanup + try { + rmSync(testDir, { recursive: true, force: true }); + } catch (error) { + warn(`cleanup failed for ${testDir}: ${error}`); + } + + if (totalPassed === TEST_CASES.length) { + console.log( + `\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n` + ); + process.exit(0); + } else { + console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`); + process.exit(1); + } +}; + +main(); diff --git a/benchmarks/test-edit-ops.ts b/benchmarks/test-edit-ops.ts new file mode 100644 index 000000000..05d63b4d2 --- /dev/null +++ b/benchmarks/test-edit-ops.ts @@ -0,0 +1,808 @@ +#!/usr/bin/env bun +/** + * Comprehensive headless edit_file stress test: 21 operation types + * + * Tests: 5 basic ops + 10 creative cases + 6 whitespace cases + * Each runs via headless mode with its own demo file + prompt. + * + * Usage: + * bun run scripts/test-headless-edit-ops.ts [-m ] [--provider ] + */ + +import { spawn } from "node:child_process"; +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; + +// ── CLI arg passthrough ─────────────────────────────────────── +const extraArgs: string[] = []; +const rawArgs = process.argv.slice(2); +for (let i = 0; i < rawArgs.length; i++) { + const arg = rawArgs[i]; + if ( + (arg === "-m" || arg === "--model" || arg === "--provider") && + i + 1 < rawArgs.length + ) { + extraArgs.push(arg, rawArgs[i + 1]); + i++; + } else if (arg === "--think" || arg === "--no-translate") { + extraArgs.push(arg); + } else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) { + extraArgs.push(arg, rawArgs[i + 1]); + i++; + } +} + +// ── Colors ──────────────────────────────────────────────────── +const BOLD = "\x1b[1m"; +const GREEN = "\x1b[32m"; +const RED = "\x1b[31m"; +const YELLOW = "\x1b[33m"; +const DIM = "\x1b[2m"; +const CYAN = "\x1b[36m"; +const RESET = "\x1b[0m"; + +const pass = (msg: string) => console.log(` ${GREEN}✓${RESET} ${msg}`); +const fail = (msg: string) => console.log(` ${RED}✗${RESET} ${msg}`); +const info = (msg: string) => console.log(` ${DIM}${msg}${RESET}`); +const warn = (msg: string) => console.log(` ${YELLOW}⚠${RESET} ${msg}`); + +// ── Test case definition ───────────────────────────────────── +interface TestCase { + fileContent: string; + fileName: string; + name: string; + prompt: string; + validate: (content: string) => { passed: boolean; reason: string }; +} + +const TEST_CASES: TestCase[] = [ + { + name: "1. Replace single line", + fileName: "config.txt", + fileContent: [ + "host: localhost", + "port: 3000", + "debug: false", + "timeout: 30", + "retries: 3", + ].join("\n"), + prompt: [ + "Follow these steps exactly:", + "Step 1: Call read_file on config.txt.", + "Step 2: Note the anchor for the port line (line 2).", + "Step 3: Call edit_file with path='config.txt' and edits containing ONE object:", + " { op: 'replace', pos: '', lines: ['port: 8080'] }", + "IMPORTANT: pos must be ONLY the anchor (like '2#KB'). lines must be a SEPARATE array field with the new content.", + ].join(" "), + validate: (content) => { + const has8080 = content.includes("port: 8080"); + const has3000 = content.includes("port: 3000"); + if (has8080 && !has3000) { + return { passed: true, reason: "port changed to 8080" }; + } + if (has3000) { + return { passed: false, reason: "port still 3000 — edit not applied" }; + } + return { + passed: false, + reason: `unexpected content: ${content.slice(0, 100)}`, + }; + }, + }, + { + name: "2. Append after line", + fileName: "fruits.txt", + fileContent: ["apple", "banana", "cherry"].join("\n"), + prompt: + "Read fruits.txt with read_file. Then use edit_file with op='append' to insert a new line 'grape' after the 'banana' line. Use pos='LINE#HASH' of the banana line and lines=['grape'].", + validate: (content) => { + const lines = content.trim().split("\n"); + const bananaIdx = lines.findIndex((l) => l.trim() === "banana"); + const grapeIdx = lines.findIndex((l) => l.trim() === "grape"); + if (grapeIdx === -1) { + return { passed: false, reason: '"grape" not found in file' }; + } + if (bananaIdx === -1) { + return { passed: false, reason: '"banana" was removed' }; + } + if (grapeIdx !== bananaIdx + 1) { + return { + passed: false, + reason: `"grape" at line ${grapeIdx + 1} but expected after "banana" at line ${bananaIdx + 1}`, + }; + } + if (lines.length !== 4) { + return { + passed: false, + reason: `expected 4 lines, got ${lines.length}`, + }; + } + return { + passed: true, + reason: '"grape" correctly appended after "banana"', + }; + }, + }, + { + name: "3. Prepend before line", + fileName: "code.txt", + fileContent: ["function greet() {", ' return "hello";', "}"].join("\n"), + prompt: + "Read code.txt with read_file. Then use edit_file with op='prepend' to add '// Greeting function' before the function line. Use pos='LINE#HASH' of the function line and lines=['// Greeting function'].", + validate: (content) => { + const lines = content.trim().split("\n"); + const commentIdx = lines.findIndex( + (l) => l.trim().startsWith("//") && l.toLowerCase().includes("greet") + ); + const funcIdx = lines.findIndex((l) => + l.trim().startsWith("function greet") + ); + if (commentIdx === -1) { + return { passed: false, reason: "comment line not found" }; + } + if (funcIdx === -1) { + return { passed: false, reason: '"function greet" line was removed' }; + } + if (commentIdx !== funcIdx - 1) { + return { + passed: false, + reason: `comment at line ${commentIdx + 1} but function at ${funcIdx + 1} — not directly before`, + }; + } + return { + passed: true, + reason: "comment correctly prepended before function", + }; + }, + }, + { + name: "4. Range replace (multi-line → single line)", + fileName: "log.txt", + fileContent: [ + "=== Log Start ===", + "INFO: started", + "WARN: slow query", + "ERROR: timeout", + "INFO: recovered", + "=== Log End ===", + ].join("\n"), + prompt: [ + "Follow these steps exactly:", + "Step 1: Call read_file on log.txt to see line anchors.", + "Step 2: Note the anchor for 'WARN: slow query' (line 3) and 'ERROR: timeout' (line 4).", + "Step 3: Call edit_file with path='log.txt' and edits containing ONE object with THREE separate JSON fields:", + " { op: 'replace', pos: '', end: '', lines: ['RESOLVED: issues cleared'] }", + "CRITICAL: pos, end, and lines are THREE SEPARATE JSON fields. pos is ONLY '3#XX'. end is ONLY '4#YY'. lines is ['RESOLVED: issues cleared'].", + "If edit_file fails or errors, use write_file to write the complete correct file content instead.", + "The correct final content should be: === Log Start ===, INFO: started, RESOLVED: issues cleared, INFO: recovered, === Log End ===", + "Do not make any other changes.", + ].join(" "), + validate: (content) => { + const lines = content.trim().split("\n"); + const hasResolved = lines.some( + (l) => l.trim() === "RESOLVED: issues cleared" + ); + const hasWarn = content.includes("WARN: slow query"); + const hasError = content.includes("ERROR: timeout"); + if (!hasResolved) { + return { + passed: false, + reason: '"RESOLVED: issues cleared" not found', + }; + } + if (hasWarn || hasError) { + return { passed: false, reason: "old WARN/ERROR lines still present" }; + } + // Core assertion: 2 old lines removed, 1 new line added = net -1 line + // Allow slight overshoot from model adding extra content + if (lines.length < 4 || lines.length > 6) { + return { + passed: false, + reason: `expected ~5 lines, got ${lines.length}`, + }; + } + return { + passed: true, + reason: "range replace succeeded — 2 lines → 1 line", + }; + }, + }, + { + name: "5. Delete line", + fileName: "settings.txt", + fileContent: [ + "mode: production", + "debug: true", + "cache: enabled", + "log_level: info", + ].join("\n"), + prompt: [ + "Follow these steps exactly:", + "Step 1: Call read_file on settings.txt to see line anchors.", + "Step 2: Note the anchor for 'debug: true' (line 2).", + "Step 3: Call edit_file with path='settings.txt' and edits containing ONE object:", + " { op: 'replace', pos: '', lines: [] }", + "IMPORTANT: lines must be an empty array [] to delete the line. pos must be ONLY the anchor like '2#SR'.", + ].join(" "), + validate: (content) => { + const lines = content.trim().split("\n"); + const hasDebug = content.includes("debug: true"); + if (hasDebug) { + return { passed: false, reason: '"debug: true" still present' }; + } + if (lines.length !== 3) { + return { + passed: false, + reason: `expected 3 lines, got ${lines.length}`, + }; + } + if ( + !( + content.includes("mode: production") && + content.includes("cache: enabled") + ) + ) { + return { passed: false, reason: "other lines were removed" }; + } + return { passed: true, reason: '"debug: true" successfully deleted' }; + }, + }, + + // ── Creative cases (6-15) ──────────────────────────────────── + { + name: "6. Batch edit — two replacements in one call", + fileName: "batch.txt", + fileContent: ["red", "green", "blue", "yellow"].join("\n"), + prompt: [ + "Read batch.txt with read_file.", + "Then call edit_file ONCE with path='batch.txt' and edits containing TWO objects:", + " 1) { op: 'replace', pos: '', lines: ['crimson'] }", + " 2) { op: 'replace', pos: '', lines: ['navy'] }", + "Both edits must be in the SAME edits array in a single edit_file call.", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (!c.includes("crimson")) return { passed: false, reason: "'crimson' not found" }; + if (!c.includes("navy")) return { passed: false, reason: "'navy' not found" }; + if (c.includes("red")) return { passed: false, reason: "'red' still present" }; + if (c.includes("blue")) return { passed: false, reason: "'blue' still present" }; + if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` }; + return { passed: true, reason: "both lines replaced in single call" }; + }, + }, + { + name: "7. Line expansion — 1 line → 3 lines", + fileName: "expand.txt", + fileContent: ["header", "TODO: implement", "footer"].join("\n"), + prompt: [ + "Read expand.txt with read_file.", + "Replace the 'TODO: implement' line (line 2) with THREE lines:", + " 'step 1: init', 'step 2: process', 'step 3: cleanup'", + "Use edit_file with op='replace', pos=, lines=['step 1: init', 'step 2: process', 'step 3: cleanup'].", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (c.includes("TODO")) return { passed: false, reason: "TODO line still present" }; + if (!c.includes("step 1: init")) return { passed: false, reason: "'step 1: init' not found" }; + if (!c.includes("step 3: cleanup")) return { passed: false, reason: "'step 3: cleanup' not found" }; + if (lines.length !== 5) return { passed: false, reason: `expected 5 lines, got ${lines.length}` }; + return { passed: true, reason: "1 line expanded to 3 lines" }; + }, + }, + { + name: "8. Append at EOF", + fileName: "eof.txt", + fileContent: ["line one", "line two"].join("\n"), + prompt: [ + "Read eof.txt with read_file.", + "Use edit_file to append 'line three' after the LAST line of the file.", + "Use op='append', pos=, lines=['line three'].", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (!c.includes("line three")) return { passed: false, reason: "'line three' not found" }; + if (lines[lines.length - 1].trim() !== "line three") + return { passed: false, reason: "'line three' not at end" }; + if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` }; + return { passed: true, reason: "appended at EOF" }; + }, + }, + { + name: "9. Special characters in content", + fileName: "special.json", + fileContent: [ + '{', + ' "name": "old-value",', + ' "count": 42', + '}', + ].join("\n"), + prompt: [ + "Read special.json with read_file.", + 'Replace the line containing \"name\": \"old-value\" with \"name\": \"new-value\".', + "Use edit_file with op='replace', pos=, lines=[' \"name\": \"new-value\",'].", + ].join(" "), + validate: (c) => { + if (c.includes("old-value")) return { passed: false, reason: "'old-value' still present" }; + if (!c.includes('"new-value"')) return { passed: false, reason: "'new-value' not found" }; + if (!c.includes('"count": 42')) return { passed: false, reason: "other content was modified" }; + return { passed: true, reason: "JSON value replaced with special chars intact" }; + }, + }, + { + name: "10. Replace first line", + fileName: "first.txt", + fileContent: ["OLD HEADER", "body content", "footer"].join("\n"), + prompt: [ + "Read first.txt with read_file.", + "Replace the very first line 'OLD HEADER' with 'NEW HEADER'.", + "Use edit_file with op='replace', pos=, lines=['NEW HEADER'].", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (c.includes("OLD HEADER")) return { passed: false, reason: "'OLD HEADER' still present" }; + if (lines[0].trim() !== "NEW HEADER") return { passed: false, reason: "first line is not 'NEW HEADER'" }; + if (!c.includes("body content")) return { passed: false, reason: "body was modified" }; + return { passed: true, reason: "first line replaced" }; + }, + }, + { + name: "11. Replace last line", + fileName: "last.txt", + fileContent: ["alpha", "bravo", "OLD_FOOTER"].join("\n"), + prompt: [ + "Read last.txt with read_file.", + "Replace the last line 'OLD_FOOTER' with 'NEW_FOOTER'.", + "Use edit_file with op='replace', pos=, lines=['NEW_FOOTER'].", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (c.includes("OLD_FOOTER")) return { passed: false, reason: "'OLD_FOOTER' still present" }; + if (lines[lines.length - 1].trim() !== "NEW_FOOTER") + return { passed: false, reason: "last line is not 'NEW_FOOTER'" }; + return { passed: true, reason: "last line replaced" }; + }, + }, + { + name: "12. Adjacent line edits", + fileName: "adjacent.txt", + fileContent: ["aaa", "bbb", "ccc", "ddd"].join("\n"), + prompt: [ + "Read adjacent.txt with read_file.", + "Replace line 2 ('bbb') with 'BBB' and line 3 ('ccc') with 'CCC'.", + "Use edit_file with TWO edits in the same call:", + " { op: 'replace', pos: , lines: ['BBB'] }", + " { op: 'replace', pos: , lines: ['CCC'] }", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (c.includes("bbb")) return { passed: false, reason: "'bbb' still present" }; + if (c.includes("ccc")) return { passed: false, reason: "'ccc' still present" }; + if (!c.includes("BBB")) return { passed: false, reason: "'BBB' not found" }; + if (!c.includes("CCC")) return { passed: false, reason: "'CCC' not found" }; + if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` }; + return { passed: true, reason: "two adjacent lines replaced" }; + }, + }, + { + name: "13. Prepend multi-line block", + fileName: "block.py", + fileContent: ["def main():", " print('hello')", "", "main()"].join("\n"), + prompt: [ + "Read block.py with read_file.", + "Prepend a 2-line comment block before 'def main():' (line 1).", + "The two lines are: '# Author: test' and '# Date: 2025-01-01'.", + "Use edit_file with op='prepend', pos=, lines=['# Author: test', '# Date: 2025-01-01'].", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (!c.includes("# Author: test")) return { passed: false, reason: "author comment not found" }; + if (!c.includes("# Date: 2025-01-01")) return { passed: false, reason: "date comment not found" }; + const defIdx = lines.findIndex((l) => l.startsWith("def main")); + const authorIdx = lines.findIndex((l) => l.includes("Author")); + if (authorIdx >= defIdx) return { passed: false, reason: "comments not before def" }; + return { passed: true, reason: "2-line block prepended before function" }; + }, + }, + { + name: "14. Delete range — 3 consecutive lines", + fileName: "cleanup.txt", + fileContent: ["keep1", "remove-a", "remove-b", "remove-c", "keep2"].join("\n"), + prompt: [ + "Read cleanup.txt with read_file.", + "Delete lines 2-4 ('remove-a', 'remove-b', 'remove-c') using a single range replace.", + "Use edit_file with op='replace', pos=, end=, lines=[].", + "An empty lines array deletes the range.", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (c.includes("remove")) return { passed: false, reason: "'remove' lines still present" }; + if (!c.includes("keep1")) return { passed: false, reason: "'keep1' was deleted" }; + if (!c.includes("keep2")) return { passed: false, reason: "'keep2' was deleted" }; + if (lines.length !== 2) return { passed: false, reason: `expected 2 lines, got ${lines.length}` }; + return { passed: true, reason: "3 consecutive lines deleted via range" }; + }, + }, + { + name: "15. Replace with duplicate-content line", + fileName: "dupes.txt", + fileContent: ["item", "item", "item", "item"].join("\n"), + prompt: [ + "Read dupes.txt with read_file. All 4 lines have the same text 'item'.", + "Replace ONLY line 3 with 'CHANGED'. Do NOT modify any other line.", + "Use edit_file with op='replace', pos=, lines=['CHANGED'].", + "The anchor hash uniquely identifies line 3 even though the content is identical.", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (!c.includes("CHANGED")) return { passed: false, reason: "'CHANGED' not found" }; + const changedCount = lines.filter((l) => l.trim() === "CHANGED").length; + const itemCount = lines.filter((l) => l.trim() === "item").length; + if (changedCount !== 1) return { passed: false, reason: `expected 1 CHANGED, got ${changedCount}` }; + if (itemCount !== 3) return { passed: false, reason: `expected 3 item lines, got ${itemCount}` }; + if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` }; + return { passed: true, reason: "only line 3 changed among duplicates" }; + }, + }, + + // ── Whitespace cases (16-21) ────────────────────────────────── + { + name: "16. Fix indentation — 2 spaces → 4 spaces", + fileName: "indent.js", + fileContent: ["function foo() {", " const x = 1;", " return x;", "}"].join("\n"), + prompt: [ + "Read indent.js with read_file.", + "Replace line 2 ' const x = 1;' (2-space indent) with ' const x = 1;' (4-space indent).", + "Use edit_file with op='replace', pos=, lines=[' const x = 1;'].", + "The ONLY change is the indentation: 2 spaces → 4 spaces. Content stays the same.", + ].join(" "), + validate: (c) => { + const lines = c.split("\n"); + const line2 = lines[1]; + if (!line2) return { passed: false, reason: "line 2 missing" }; + if (line2 === " const x = 1;") return { passed: true, reason: "indentation fixed to 4 spaces" }; + if (line2 === " const x = 1;") return { passed: false, reason: "still 2-space indent" }; + return { passed: false, reason: `unexpected line 2: '${line2}'` }; + }, + }, + { + name: "17. Replace preserving leading whitespace", + fileName: "preserve.py", + fileContent: [ + "class Foo:", + " def old_method(self):", + " pass", + ].join("\n"), + prompt: [ + "Read preserve.py with read_file.", + "Replace line 2 ' def old_method(self):' with ' def new_method(self):'.", + "Keep the 4-space indentation. Only change the method name.", + "Use edit_file with op='replace', pos=, lines=[' def new_method(self):'].", + ].join(" "), + validate: (c) => { + if (c.includes("old_method")) return { passed: false, reason: "'old_method' still present" }; + const lines = c.split("\n"); + const methodLine = lines.find((l) => l.includes("new_method")); + if (!methodLine) return { passed: false, reason: "'new_method' not found" }; + if (!methodLine.startsWith(" ")) return { passed: false, reason: "indentation lost" }; + return { passed: true, reason: "method renamed with indentation preserved" }; + }, + }, + { + name: "18. Insert blank line between sections", + fileName: "sections.txt", + fileContent: ["[section-a]", "value-a=1", "[section-b]", "value-b=2"].join("\n"), + prompt: [ + "Read sections.txt with read_file.", + "Insert a blank empty line between 'value-a=1' (line 2) and '[section-b]' (line 3).", + "Use edit_file with op='append', pos=, lines=[''].", + "lines=[''] inserts one empty line.", + ].join(" "), + validate: (c) => { + const lines = c.split("\n"); + const valAIdx = lines.findIndex((l) => l.includes("value-a=1")); + const secBIdx = lines.findIndex((l) => l.includes("[section-b]")); + if (valAIdx === -1) return { passed: false, reason: "'value-a=1' missing" }; + if (secBIdx === -1) return { passed: false, reason: "'[section-b]' missing" }; + if (secBIdx - valAIdx < 2) return { passed: false, reason: "no blank line between sections" }; + const between = lines[valAIdx + 1]; + if (between.trim() !== "") return { passed: false, reason: `line between is '${between}', not blank` }; + return { passed: true, reason: "blank line inserted between sections" }; + }, + }, + { + name: "19. Delete blank line", + fileName: "noblank.txt", + fileContent: ["first", "", "second", "third"].join("\n"), + prompt: [ + "Read noblank.txt with read_file.", + "Delete the empty blank line (line 2). Use edit_file with op='replace', pos=, lines=[].", + ].join(" "), + validate: (c) => { + const lines = c.trim().split("\n"); + if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` }; + if (lines[0].trim() !== "first") return { passed: false, reason: "'first' not on line 1" }; + if (lines[1].trim() !== "second") return { passed: false, reason: "'second' not on line 2" }; + return { passed: true, reason: "blank line deleted" }; + }, + }, + { + name: "20. Tab → spaces conversion", + fileName: "tabs.txt", + fileContent: ["start", "\tindented-with-tab", "end"].join("\n"), + prompt: [ + "Read tabs.txt with read_file.", + "Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '', lines: [' indented-with-spaces'] }].", + "Expected final line 2 to be 4 spaces followed by indented-with-spaces.", + ].join(" "), + validate: (c) => { + if (c.includes("\t")) return { passed: false, reason: "tab still present" }; + if (!c.includes(" indented-with-spaces")) + return { passed: false, reason: "' indented-with-spaces' not found" }; + if (!c.includes("start")) return { passed: false, reason: "'start' was modified" }; + return { passed: true, reason: "tab converted to 4 spaces" }; + }, + }, + { + name: "21. Deeply nested indent replacement", + fileName: "nested.ts", + fileContent: [ + "if (a) {", + " if (b) {", + " if (c) {", + " old_call();", + " }", + " }", + "}", + ].join("\n"), + prompt: [ + "Read nested.ts with read_file.", + "Replace line 4 ' old_call();' with ' new_call();'.", + "Preserve the exact 6-space indentation. Only change the function name.", + "Use edit_file with op='replace', pos=, lines=[' new_call();'].", + ].join(" "), + validate: (c) => { + if (c.includes("old_call")) return { passed: false, reason: "'old_call' still present" }; + const lines = c.split("\n"); + const callLine = lines.find((l) => l.includes("new_call")); + if (!callLine) return { passed: false, reason: "'new_call' not found" }; + const leadingSpaces = callLine.match(/^ */)?.[0].length ?? 0; + if (leadingSpaces !== 6) return { passed: false, reason: `expected 6-space indent, got ${leadingSpaces}` }; + return { passed: true, reason: "deeply nested line replaced with indent preserved" }; + }, + }, +]; + +// ── JSONL event types ───────────────────────────────────────── +interface ToolCallEvent { + tool_call_id: string; + tool_input: Record; + tool_name: string; + type: "tool_call"; +} + +interface ToolResultEvent { + error?: string; + output: string; + tool_call_id: string; + type: "tool_result"; +} + +interface AnyEvent { + type: string; + [key: string]: unknown; +} + +// ── Run single test case ───────────────────────────────────── +async function runTestCase( + tc: TestCase, + testDir: string +): Promise<{ + passed: boolean; + editCalls: number; + editSuccesses: number; + duration: number; +}> { + const testFile = join(testDir, tc.fileName); + writeFileSync(testFile, tc.fileContent, "utf-8"); + + const headlessScript = resolve(import.meta.dir, "headless.ts"); + const headlessArgs = [ + "run", + headlessScript, + "-p", + tc.prompt, + "--no-translate", + ...extraArgs, + ]; + + const startTime = Date.now(); + + const output = await new Promise((res, reject) => { + const proc = spawn("bun", headlessArgs, { + cwd: testDir, + env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL }, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + + proc.stdout.on("data", (chunk: Buffer) => { + stdout += chunk.toString(); + }); + proc.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + const timeout = setTimeout( + () => { + proc.kill("SIGTERM"); + reject(new Error("Timed out after 4 minutes")); + }, + 4 * 60 * 1000 + ); + + proc.on("close", (code) => { + clearTimeout(timeout); + if (code !== 0) { + reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`)); + } else { + res(stdout); + } + }); + proc.on("error", (err) => { + clearTimeout(timeout); + reject(err); + }); + }); + + const duration = Date.now() - startTime; + + // Parse events + const events: AnyEvent[] = []; + for (const line of output.split("\n").filter((l) => l.trim())) { + try { + events.push(JSON.parse(line) as AnyEvent); + } catch { + // skip non-JSON + } + } + + const toolCalls = events.filter( + (e) => e.type === "tool_call" + ) as unknown as ToolCallEvent[]; + const toolResults = events.filter( + (e) => e.type === "tool_result" + ) as unknown as ToolResultEvent[]; + + const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file"); + const editCallIds = new Set(editCalls.map((e) => e.tool_call_id)); + const editResults = toolResults.filter((e) => + editCallIds.has(e.tool_call_id) + ); + const editSuccesses = editResults.filter((e) => !e.error); + + // Show blocked calls + const editErrors = editResults.filter((e) => e.error); + for (const err of editErrors) { + const matchingCall = editCalls.find( + (c) => c.tool_call_id === err.tool_call_id + ); + info(` blocked: ${err.error?.slice(0, 120)}`); + if (matchingCall) { + info(` input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`); + } + } + + // Validate file content + let finalContent: string; + try { + finalContent = readFileSync(testFile, "utf-8"); + } catch { + return { + passed: false, + editCalls: editCalls.length, + editSuccesses: editSuccesses.length, + duration, + }; + } + + const validation = tc.validate(finalContent); + + return { + passed: validation.passed, + editCalls: editCalls.length, + editSuccesses: editSuccesses.length, + duration, + }; +} + +// ── Main ────────────────────────────────────────────────────── +const main = async () => { + console.log(`\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n`); + + const testDir = join(tmpdir(), `edit-ops-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + info(`Test dir: ${testDir}`); + console.log(); + + let totalPassed = 0; + const results: { name: string; passed: boolean; detail: string }[] = []; + + for (const tc of TEST_CASES) { + console.log(`${CYAN}${BOLD}${tc.name}${RESET}`); + info(`File: ${tc.fileName}`); + info(`Prompt: "${tc.prompt.slice(0, 80)}..."`); + + try { + const result = await runTestCase(tc, testDir); + const status = result.passed + ? `${GREEN}PASS${RESET}` + : `${RED}FAIL${RESET}`; + const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`; + + console.log(` ${status} — ${detail}`); + + if (result.passed) { + totalPassed++; + // Validate the file to show reason + const content = readFileSync(join(testDir, tc.fileName), "utf-8"); + const v = tc.validate(content); + pass(v.reason); + } else { + const content = readFileSync(join(testDir, tc.fileName), "utf-8"); + const v = tc.validate(content); + fail(v.reason); + info( + `Final content:\n${content + .split("\n") + .map((l, i) => ` ${i + 1}: ${l}`) + .join("\n")}` + ); + } + + results.push({ name: tc.name, passed: result.passed, detail }); + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + console.log(` ${RED}ERROR${RESET} — ${msg.slice(0, 200)}`); + fail(msg.slice(0, 200)); + results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) }); + } + + // Reset file for next test (in case of side effects) + try { + rmSync(join(testDir, tc.fileName), { force: true }); + } catch {} + + console.log(); + } + + // Summary + console.log(`${BOLD}━━━ Summary ━━━${RESET}`); + for (const r of results) { + const icon = r.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`; + console.log(` ${icon} ${r.name} — ${r.detail}`); + } + console.log(); + console.log( + `${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}` + ); + + // Cleanup + try { + rmSync(testDir, { recursive: true, force: true }); + } catch {} + + if (totalPassed === TEST_CASES.length) { + console.log( + `\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n` + ); + process.exit(0); + } else { + console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`); + process.exit(1); + } +}; + +main(); diff --git a/benchmarks/test-multi-model.ts b/benchmarks/test-multi-model.ts new file mode 100644 index 000000000..29ee4bb93 --- /dev/null +++ b/benchmarks/test-multi-model.ts @@ -0,0 +1,280 @@ +#!/usr/bin/env bun +/** + * Multi-model edit_file test runner + * + * Runs test-headless-edit-ops.ts against every available model + * and produces a summary table. + * + * Usage: + * bun run scripts/test-multi-model-edit.ts [--timeout ] + */ + +import { spawn } from "node:child_process"; +import { resolve } from "node:path"; + +// ── Models ──────────────────────────────────────────────────── +const MODELS = [ + { id: "MiniMaxAI/MiniMax-M2.5", short: "M2.5" }, + // { id: "MiniMaxAI/MiniMax-M2.1", short: "M2.1" }, // masked: slow + timeout-prone + // { id: "zai-org/GLM-5", short: "GLM-5" }, // masked: API 503 + { id: "zai-org/GLM-4.7", short: "GLM-4.7" }, +]; + +// ── CLI args ────────────────────────────────────────────────── +let perModelTimeoutSec = 900; // 15 min default per model (5 tests) +const rawArgs = process.argv.slice(2); +for (let i = 0; i < rawArgs.length; i++) { + if (rawArgs[i] === "--timeout" && i + 1 < rawArgs.length) { + const parsed = Number.parseInt(rawArgs[i + 1], 10); + if (Number.isNaN(parsed) || parsed <= 0) { + console.error(`Invalid --timeout value: ${rawArgs[i + 1]}`); + process.exit(1); + } + perModelTimeoutSec = parsed; + i++; +} + +// ── Colors ──────────────────────────────────────────────────── +const BOLD = "\x1b[1m"; +const GREEN = "\x1b[32m"; +const RED = "\x1b[31m"; +const YELLOW = "\x1b[33m"; +const DIM = "\x1b[2m"; +const CYAN = "\x1b[36m"; +const RESET = "\x1b[0m"; + +// ── Types ───────────────────────────────────────────────────── +interface TestResult { + detail: string; + name: string; + passed: boolean; +} + +interface ModelResult { + durationMs: number; + error?: string; + modelId: string; + modelShort: string; + tests: TestResult[]; + totalPassed: number; + totalTests: number; +} + +// ── Parse test-headless-edit-ops stdout ─────────────────────── +function parseOpsOutput(stdout: string): TestResult[] { + const results: TestResult[] = []; + + // Match lines like: " PASS — edit_file: 1/1 succeeded, 32.5s" + // or " FAIL — edit_file: 0/3 succeeded, 15.2s" + // or " ERROR — Timed out after 10 minutes" + // Following a line like: "1. Replace single line" + const lines = stdout.split("\n"); + + let currentTestName = ""; + for (const line of lines) { + // Detect test name: starts with ANSI-colored bold cyan + "N. Name" + // Strip ANSI codes for matching + const stripped = line.replace(/\x1b\[[0-9;]*m/g, ""); + + // Test name pattern: "N. " + const testNameMatch = stripped.match(/^\s*(\d+\.\s+.+)$/); + if ( + testNameMatch && + !stripped.includes("—") && + !stripped.includes("✓") && + !stripped.includes("✗") + ) { + currentTestName = testNameMatch[1].trim(); + continue; + } + + // Result line: PASS/FAIL/ERROR + if (currentTestName && stripped.includes("PASS")) { + const detail = stripped.replace(/^\s*PASS\s*—?\s*/, "").trim(); + results.push({ + name: currentTestName, + passed: true, + detail: detail || "passed", + }); + currentTestName = ""; + } else if (currentTestName && stripped.includes("FAIL")) { + const detail = stripped.replace(/^\s*FAIL\s*—?\s*/, "").trim(); + results.push({ + name: currentTestName, + passed: false, + detail: detail || "failed", + }); + currentTestName = ""; + } else if (currentTestName && stripped.includes("ERROR")) { + const detail = stripped.replace(/^\s*ERROR\s*—?\s*/, "").trim(); + results.push({ + name: currentTestName, + passed: false, + detail: detail || "error", + }); + currentTestName = ""; + } + } + + return results; +} + +// ── Run one model ──────────────────────────────────────────── +async function runModel(model: { + id: string; + short: string; +}): Promise { + const opsScript = resolve(import.meta.dir, "test-edit-ops.ts"); + const startTime = Date.now(); + + return new Promise((resolvePromise) => { + const proc = spawn( + "bun", + ["run", opsScript, "-m", model.id, "--no-translate"], + { + cwd: resolve(import.meta.dir), + env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL }, + stdio: ["ignore", "pipe", "pipe"], + } + ); + + let stdout = ""; + let stderr = ""; + + proc.stdout.on("data", (chunk: Buffer) => { + stdout += chunk.toString(); + }); + proc.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + const timeout = setTimeout(() => { + proc.kill("SIGTERM"); + resolvePromise({ + modelId: model.id, + modelShort: model.short, + tests: [], + totalPassed: 0, + totalTests: 0, + durationMs: Date.now() - startTime, + error: `Timed out after ${perModelTimeoutSec}s`, + }); + }, perModelTimeoutSec * 1000); + + proc.on("close", () => { + clearTimeout(timeout); + const tests = parseOpsOutput(stdout); + const totalPassed = tests.filter((t) => t.passed).length; + + resolvePromise({ + modelId: model.id, + modelShort: model.short, + tests, + totalPassed, + totalTests: Math.max(tests.length, 5), + durationMs: Date.now() - startTime, + }); + }); + + proc.on("error", (err) => { + clearTimeout(timeout); + resolvePromise({ + modelId: model.id, + modelShort: model.short, + tests: [], + totalPassed: 0, + totalTests: 0, + durationMs: Date.now() - startTime, + error: err.message, + }); + }); + }); +} + +// ── Main ────────────────────────────────────────────────────── +const main = async () => { + console.log(`\n${BOLD}═══ Multi-Model edit_file Test Runner ═══${RESET}\n`); + console.log(`${DIM}Models: ${MODELS.map((m) => m.short).join(", ")}${RESET}`); + console.log(`${DIM}Timeout: ${perModelTimeoutSec}s per model${RESET}`); + console.log(); + + const allResults: ModelResult[] = []; + + for (const model of MODELS) { + console.log(`${CYAN}${BOLD}▶ Testing ${model.short} (${model.id})${RESET}`); + const result = await runModel(model); + allResults.push(result); + + const timeStr = `${(result.durationMs / 1000).toFixed(1)}s`; + if (result.error) { + console.log(` ${RED}ERROR${RESET}: ${result.error} (${timeStr})`); + } else { + const color = + result.totalPassed === result.totalTests + ? GREEN + : result.totalPassed > 0 + ? YELLOW + : RED; + console.log( + ` ${color}${result.totalPassed}/${result.totalTests} passed${RESET} (${timeStr})` + ); + for (const t of result.tests) { + const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`; + console.log(` ${icon} ${t.name}`); + } + } + console.log(); + } + + // ── Summary Table ────────────────────────────────────────── + console.log(`${BOLD}═══ Summary ═══${RESET}\n`); + + // Per-model results + for (const r of allResults) { + const timeStr = `${(r.durationMs / 1000).toFixed(0)}s`; + const color = r.error ? RED : r.totalPassed === r.totalTests ? GREEN : r.totalPassed > 0 ? YELLOW : RED; + const label = r.error ? `ERROR: ${r.error}` : `${r.totalPassed}/${r.totalTests}`; + console.log(` ${r.modelShort.padEnd(8)} ${color}${label}${RESET} (${timeStr})`); + for (const t of r.tests) { + const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`; + console.log(` ${icon} ${t.name}`); + } + } + + console.log(); + + // Overall + const totalModels = allResults.length; + const erroredModels = allResults.filter((r) => r.error).length; + const perfectModels = allResults.filter( + (r) => !r.error && r.totalPassed === r.totalTests && r.totalTests > 0 + ).length; + console.log( + `${BOLD}Models with 100%: ${perfectModels}/${totalModels}${RESET}` + ); + + const overallPassed = allResults.reduce((sum, r) => sum + r.totalPassed, 0); + const overallTotal = allResults.reduce((sum, r) => sum + r.totalTests, 0); + console.log( + `${BOLD}Overall: ${overallPassed}/${overallTotal} (${Math.round((overallPassed / overallTotal) * 100)}%)${RESET}` + ); + + console.log(); + + if (erroredModels > 0) { + console.log( + `${BOLD}${RED}${erroredModels} model(s) errored. See details above.${RESET}\n` + ); + process.exit(1); + } else if (perfectModels === totalModels) { + console.log(`${BOLD}${GREEN}🎉 ALL MODELS PASSED ALL TESTS!${RESET}\n`); + process.exit(0); + } else { + console.log( + `${BOLD}${YELLOW}Some models have failures. See details above.${RESET}\n` + ); + process.exit(1); + } +}; + +main(); diff --git a/bin/oh-my-opencode.js b/bin/oh-my-opencode.js index 4ad39550b..0d66e55eb 100755 --- a/bin/oh-my-opencode.js +++ b/bin/oh-my-opencode.js @@ -3,8 +3,9 @@ // Wrapper script that detects platform and spawns the correct binary import { spawnSync } from "node:child_process"; +import { readFileSync } from "node:fs"; import { createRequire } from "node:module"; -import { getPlatformPackage, getBinaryPath } from "./platform.js"; +import { getPlatformPackageCandidates, getBinaryPath } from "./platform.js"; const require = createRequire(import.meta.url); @@ -26,55 +27,116 @@ function getLibcFamily() { } } +function supportsAvx2() { + if (process.arch !== "x64") { + return null; + } + + if (process.env.OH_MY_OPENCODE_FORCE_BASELINE === "1") { + return false; + } + + if (process.platform === "linux") { + try { + const cpuInfo = readFileSync("/proc/cpuinfo", "utf8").toLowerCase(); + return cpuInfo.includes("avx2"); + } catch { + return null; + } + } + + if (process.platform === "darwin") { + const probe = spawnSync("sysctl", ["-n", "machdep.cpu.leaf7_features"], { + encoding: "utf8", + }); + + if (probe.error || probe.status !== 0) { + return null; + } + + return probe.stdout.toUpperCase().includes("AVX2"); + } + + return null; +} + +function getSignalExitCode(signal) { + const signalCodeByName = { + SIGINT: 2, + SIGILL: 4, + SIGKILL: 9, + SIGTERM: 15, + }; + + return 128 + (signalCodeByName[signal] ?? 1); +} + function main() { const { platform, arch } = process; const libcFamily = getLibcFamily(); + const avx2Supported = supportsAvx2(); - // Get platform package name - let pkg; + let packageCandidates; try { - pkg = getPlatformPackage({ platform, arch, libcFamily }); + packageCandidates = getPlatformPackageCandidates({ + platform, + arch, + libcFamily, + preferBaseline: avx2Supported === false, + }); } catch (error) { console.error(`\noh-my-opencode: ${error.message}\n`); process.exit(1); } - - // Resolve binary path - const binRelPath = getBinaryPath(pkg, platform); - - let binPath; - try { - binPath = require.resolve(binRelPath); - } catch { + + const resolvedBinaries = packageCandidates + .map((pkg) => { + try { + return { pkg, binPath: require.resolve(getBinaryPath(pkg, platform)) }; + } catch { + return null; + } + }) + .filter((entry) => entry !== null); + + if (resolvedBinaries.length === 0) { console.error(`\noh-my-opencode: Platform binary not installed.`); console.error(`\nYour platform: ${platform}-${arch}${libcFamily === "musl" ? "-musl" : ""}`); - console.error(`Expected package: ${pkg}`); + console.error(`Expected packages (in order): ${packageCandidates.join(", ")}`); console.error(`\nTo fix, run:`); - console.error(` npm install ${pkg}\n`); + console.error(` npm install ${packageCandidates[0]}\n`); process.exit(1); } - - // Spawn the binary - const result = spawnSync(binPath, process.argv.slice(2), { - stdio: "inherit", - }); - - // Handle spawn errors - if (result.error) { - console.error(`\noh-my-opencode: Failed to execute binary.`); - console.error(`Error: ${result.error.message}\n`); - process.exit(2); - } - - // Handle signals - if (result.signal) { - const signalNum = result.signal === "SIGTERM" ? 15 : - result.signal === "SIGKILL" ? 9 : - result.signal === "SIGINT" ? 2 : 1; - process.exit(128 + signalNum); + + for (let index = 0; index < resolvedBinaries.length; index += 1) { + const currentBinary = resolvedBinaries[index]; + const hasFallback = index < resolvedBinaries.length - 1; + const result = spawnSync(currentBinary.binPath, process.argv.slice(2), { + stdio: "inherit", + }); + + if (result.error) { + if (hasFallback) { + continue; + } + + console.error(`\noh-my-opencode: Failed to execute binary.`); + console.error(`Error: ${result.error.message}\n`); + process.exit(2); + } + + if (result.signal === "SIGILL" && hasFallback) { + continue; + } + + if (result.signal) { + process.exit(getSignalExitCode(result.signal)); + } + + process.exit(result.status ?? 1); } - process.exit(result.status ?? 1); + process.exit(1); } main(); diff --git a/bin/platform.d.ts b/bin/platform.d.ts new file mode 100644 index 000000000..ed3987957 --- /dev/null +++ b/bin/platform.d.ts @@ -0,0 +1,14 @@ +export declare function getPlatformPackage(options: { + platform: string; + arch: string; + libcFamily?: string | null; +}): string; + +export declare function getPlatformPackageCandidates(options: { + platform: string; + arch: string; + libcFamily?: string | null; + preferBaseline?: boolean; +}): string[]; + +export declare function getBinaryPath(pkg: string, platform: string): string; diff --git a/bin/platform.js b/bin/platform.js index ac728d3c8..a2a6c3c32 100644 --- a/bin/platform.js +++ b/bin/platform.js @@ -26,6 +26,50 @@ export function getPlatformPackage({ platform, arch, libcFamily }) { return `oh-my-opencode-${os}-${arch}${suffix}`; } +/** @param {{ platform: string, arch: string, libcFamily?: string | null, preferBaseline?: boolean }} options */ +export function getPlatformPackageCandidates({ platform, arch, libcFamily, preferBaseline = false }) { + const primaryPackage = getPlatformPackage({ platform, arch, libcFamily }); + const baselinePackage = getBaselinePlatformPackage({ platform, arch, libcFamily }); + + if (!baselinePackage) { + return [primaryPackage]; + } + + return preferBaseline ? [baselinePackage, primaryPackage] : [primaryPackage, baselinePackage]; +} + +/** @param {{ platform: string, arch: string, libcFamily?: string | null }} options */ +function getBaselinePlatformPackage({ platform, arch, libcFamily }) { + if (arch !== "x64") { + return null; + } + + if (platform === "darwin") { + return "oh-my-opencode-darwin-x64-baseline"; + } + + if (platform === "win32") { + return "oh-my-opencode-windows-x64-baseline"; + } + + if (platform === "linux") { + if (libcFamily === null || libcFamily === undefined) { + throw new Error( + "Could not detect libc on Linux. " + + "Please ensure detect-libc is installed or report this issue." + ); + } + + if (libcFamily === "musl") { + return "oh-my-opencode-linux-x64-musl-baseline"; + } + + return "oh-my-opencode-linux-x64-baseline"; + } + + return null; +} + /** * Get the path to the binary within a platform package * @param {string} pkg Package name diff --git a/bin/platform.test.ts b/bin/platform.test.ts index 775509929..88b8b877b 100644 --- a/bin/platform.test.ts +++ b/bin/platform.test.ts @@ -1,6 +1,6 @@ // bin/platform.test.ts import { describe, expect, test } from "bun:test"; -import { getPlatformPackage, getBinaryPath } from "./platform.js"; +import { getBinaryPath, getPlatformPackage, getPlatformPackageCandidates } from "./platform.js"; describe("getPlatformPackage", () => { // #region Darwin platforms @@ -146,3 +146,58 @@ describe("getBinaryPath", () => { expect(result).toBe("oh-my-opencode-linux-x64/bin/oh-my-opencode"); }); }); + +describe("getPlatformPackageCandidates", () => { + test("returns x64 and baseline candidates for Linux glibc", () => { + // #given Linux x64 with glibc + const input = { platform: "linux", arch: "x64", libcFamily: "glibc" }; + + // #when getting package candidates + const result = getPlatformPackageCandidates(input); + + // #then returns modern first then baseline fallback + expect(result).toEqual([ + "oh-my-opencode-linux-x64", + "oh-my-opencode-linux-x64-baseline", + ]); + }); + + test("returns x64 musl and baseline candidates for Linux musl", () => { + // #given Linux x64 with musl + const input = { platform: "linux", arch: "x64", libcFamily: "musl" }; + + // #when getting package candidates + const result = getPlatformPackageCandidates(input); + + // #then returns musl modern first then musl baseline fallback + expect(result).toEqual([ + "oh-my-opencode-linux-x64-musl", + "oh-my-opencode-linux-x64-musl-baseline", + ]); + }); + + test("returns baseline first when preferBaseline is true", () => { + // #given Windows x64 and baseline preference + const input = { platform: "win32", arch: "x64", preferBaseline: true }; + + // #when getting package candidates + const result = getPlatformPackageCandidates(input); + + // #then baseline package is preferred first + expect(result).toEqual([ + "oh-my-opencode-windows-x64-baseline", + "oh-my-opencode-windows-x64", + ]); + }); + + test("returns only one candidate for ARM64", () => { + // #given non-x64 platform + const input = { platform: "linux", arch: "arm64", libcFamily: "glibc" }; + + // #when getting package candidates + const result = getPlatformPackageCandidates(input); + + // #then baseline fallback is not included + expect(result).toEqual(["oh-my-opencode-linux-arm64"]); + }); +}); diff --git a/package.json b/package.json index 0559493b7..f5138b11c 100644 --- a/package.json +++ b/package.json @@ -77,11 +77,15 @@ "optionalDependencies": { "oh-my-opencode-darwin-arm64": "3.8.5", "oh-my-opencode-darwin-x64": "3.8.5", + "oh-my-opencode-darwin-x64-baseline": "3.8.5", "oh-my-opencode-linux-arm64": "3.8.5", "oh-my-opencode-linux-arm64-musl": "3.8.5", "oh-my-opencode-linux-x64": "3.8.5", + "oh-my-opencode-linux-x64-baseline": "3.8.5", "oh-my-opencode-linux-x64-musl": "3.8.5", - "oh-my-opencode-windows-x64": "3.8.5" + "oh-my-opencode-linux-x64-musl-baseline": "3.8.5", + "oh-my-opencode-windows-x64": "3.8.5", + "oh-my-opencode-windows-x64-baseline": "3.8.5" }, "trustedDependencies": [ "@ast-grep/cli", diff --git a/postinstall.mjs b/postinstall.mjs index 8243a562f..35f77a6d4 100644 --- a/postinstall.mjs +++ b/postinstall.mjs @@ -2,7 +2,7 @@ // Runs after npm install to verify platform binary is available import { createRequire } from "node:module"; -import { getPlatformPackage, getBinaryPath } from "./bin/platform.js"; +import { getPlatformPackageCandidates, getBinaryPath } from "./bin/platform.js"; const require = createRequire(import.meta.url); @@ -27,12 +27,28 @@ function main() { const libcFamily = getLibcFamily(); try { - const pkg = getPlatformPackage({ platform, arch, libcFamily }); - const binPath = getBinaryPath(pkg, platform); - - // Try to resolve the binary - require.resolve(binPath); - console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch}`); + const packageCandidates = getPlatformPackageCandidates({ + platform, + arch, + libcFamily, + }); + + const resolvedPackage = packageCandidates.find((pkg) => { + try { + require.resolve(getBinaryPath(pkg, platform)); + return true; + } catch { + return false; + } + }); + + if (!resolvedPackage) { + throw new Error( + `No platform binary package installed. Tried: ${packageCandidates.join(", ")}` + ); + } + + console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch} (${resolvedPackage})`); } catch (error) { console.warn(`⚠ oh-my-opencode: ${error.message}`); console.warn(` The CLI may not work on this platform.`); diff --git a/signatures/cla.json b/signatures/cla.json index 6e3619fb5..5fd83619e 100644 --- a/signatures/cla.json +++ b/signatures/cla.json @@ -1719,6 +1719,54 @@ "created_at": "2026-02-24T17:12:31Z", "repoId": 1108837393, "pullRequestNo": 1983 + }, + { + "name": "east-shine", + "id": 20237288, + "comment_id": 3957576758, + "created_at": "2026-02-25T08:19:34Z", + "repoId": 1108837393, + "pullRequestNo": 2113 + }, + { + "name": "SupenBysz", + "id": 3314033, + "comment_id": 3962352704, + "created_at": "2026-02-25T22:00:54Z", + "repoId": 1108837393, + "pullRequestNo": 2119 + }, + { + "name": "zhzy0077", + "id": 8717471, + "comment_id": 3964015975, + "created_at": "2026-02-26T04:45:23Z", + "repoId": 1108837393, + "pullRequestNo": 2125 + }, + { + "name": "spacecowboy0416", + "id": 239068998, + "comment_id": 3964320737, + "created_at": "2026-02-26T06:05:27Z", + "repoId": 1108837393, + "pullRequestNo": 2126 + }, + { + "name": "imwxc", + "id": 49653609, + "comment_id": 3965127447, + "created_at": "2026-02-26T09:00:16Z", + "repoId": 1108837393, + "pullRequestNo": 2129 + }, + { + "name": "maou-shonen", + "id": 22576780, + "comment_id": 3965445132, + "created_at": "2026-02-26T09:50:46Z", + "repoId": 1108837393, + "pullRequestNo": 2131 } ] } \ No newline at end of file diff --git a/src/agents/atlas/agent.ts b/src/agents/atlas/agent.ts index 6f968b783..ccf987754 100644 --- a/src/agents/atlas/agent.ts +++ b/src/agents/atlas/agent.ts @@ -17,7 +17,6 @@ import type { AvailableAgent, AvailableSkill, AvailableCategory } from "../dynam import { buildCategorySkillsDelegationGuide } from "../dynamic-agent-prompt-builder" import type { CategoryConfig } from "../../config/schema" import { mergeCategories } from "../../shared/merge-categories" -import { createAgentToolRestrictions } from "../../shared/permission-compat" import { getDefaultAtlasPrompt } from "./default" import { getGptAtlasPrompt } from "./gpt" @@ -30,7 +29,7 @@ import { buildDecisionMatrix, } from "./prompt-section-builder" -const MODE: AgentMode = "primary" +const MODE: AgentMode = "all" export type AtlasPromptSource = "default" | "gpt" | "gemini" @@ -100,11 +99,6 @@ function buildDynamicOrchestratorPrompt(ctx?: OrchestratorContext): string { } export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig { - const restrictions = createAgentToolRestrictions([ - "task", - "call_omo_agent", - ]) - const baseConfig = { description: "Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)", @@ -113,7 +107,6 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig { temperature: 0.1, prompt: buildDynamicOrchestratorPrompt(ctx), color: "#10B981", - ...restrictions, } return baseConfig as AgentConfig diff --git a/src/agents/env-context.test.ts b/src/agents/env-context.test.ts new file mode 100644 index 000000000..718e76a98 --- /dev/null +++ b/src/agents/env-context.test.ts @@ -0,0 +1,41 @@ +/// + +import { describe, test, expect } from "bun:test" +import { createEnvContext } from "./env-context" + +describe("createEnvContext", () => { + test("returns omo-env block with timezone and locale", () => { + // #given - no setup needed + + // #when + const result = createEnvContext() + + // #then + expect(result).toContain("") + expect(result).toContain("") + expect(result).toContain("Timezone:") + expect(result).toContain("Locale:") + expect(result).not.toContain("Current date:") + }) + + test("does not include time with seconds precision to preserve token cache", () => { + // #given - seconds-precision time changes every second, breaking cache on every request + + // #when + const result = createEnvContext() + + // #then - no HH:MM:SS pattern anywhere in the output + expect(result).not.toMatch(/\d{1,2}:\d{2}:\d{2}/) + }) + + test("does not include date or time fields since OpenCode already provides them", () => { + // #given - OpenCode's system.ts already injects date, platform, working directory + + // #when + const result = createEnvContext() + + // #then - only timezone and locale remain; both are stable across requests + expect(result).not.toContain("Current date:") + expect(result).not.toContain("Current time:") + }) +}) diff --git a/src/agents/env-context.ts b/src/agents/env-context.ts index 262886ca3..c8e542b44 100644 --- a/src/agents/env-context.ts +++ b/src/agents/env-context.ts @@ -1,32 +1,15 @@ /** - * Creates OmO-specific environment context (time, timezone, locale). + * Creates OmO-specific environment context (timezone, locale). * Note: Working directory, platform, and date are already provided by OpenCode's system.ts, * so we only include fields that OpenCode doesn't provide to avoid duplication. * See: https://github.com/code-yeongyu/oh-my-opencode/issues/379 */ export function createEnvContext(): string { - const now = new Date() const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone const locale = Intl.DateTimeFormat().resolvedOptions().locale - const dateStr = now.toLocaleDateString(locale, { - weekday: "short", - year: "numeric", - month: "short", - day: "numeric", - }) - - const timeStr = now.toLocaleTimeString(locale, { - hour: "2-digit", - minute: "2-digit", - second: "2-digit", - hour12: true, - }) - return ` - Current date: ${dateStr} - Current time: ${timeStr} Timezone: ${timezone} Locale: ${locale} ` diff --git a/src/agents/hephaestus.ts b/src/agents/hephaestus.ts index feac23219..e182c96f4 100644 --- a/src/agents/hephaestus.ts +++ b/src/agents/hephaestus.ts @@ -19,7 +19,7 @@ import { categorizeTools, } from "./dynamic-agent-prompt-builder"; -const MODE: AgentMode = "primary"; +const MODE: AgentMode = "all"; function buildTodoDisciplineSection(useTaskSystem: boolean): string { if (useTaskSystem) { @@ -448,6 +448,21 @@ ${oracleSection} 4. **Run build** if applicable — exit code 0 required 5. **Tell user** what you verified and the results — keep it clear and helpful +### Auto-Commit Policy (MANDATORY for implementation/fix work) + +1. **Auto-commit after implementation is complete** when the task includes feature/fix code changes +2. **Commit ONLY after verification gates pass**: + - \`lsp_diagnostics\` clean on all modified files + - Related tests pass + - Typecheck/build pass when applicable +3. **If any gate fails, DO NOT commit** — fix issues first, re-run verification, then commit +4. **Use Conventional Commits format** with meaningful intent-focused messages: + - \`feat(scope): add ...\` for new functionality + - \`fix(scope): resolve ...\` for bug fixes + - \`refactor(scope): simplify ...\` for internal restructuring +5. **Do not make placeholder commits** (\`wip\`, \`temp\`, \`update\`) or commit unverified code +6. **If user explicitly says not to commit**, skip commit and report that changes are left uncommitted + - **File edit** — \`lsp_diagnostics\` clean - **Build** — Exit code 0 - **Tests** — Pass (or pre-existing failures noted) diff --git a/src/agents/sisyphus.ts b/src/agents/sisyphus.ts index 72173bd48..06debf111 100644 --- a/src/agents/sisyphus.ts +++ b/src/agents/sisyphus.ts @@ -8,7 +8,7 @@ import { buildGeminiIntentGateEnforcement, } from "./sisyphus-gemini-overlays"; -const MODE: AgentMode = "primary"; +const MODE: AgentMode = "all"; export const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata = { category: "utility", cost: "EXPENSIVE", diff --git a/src/agents/tool-restrictions.test.ts b/src/agents/tool-restrictions.test.ts index 685acbc1f..85facdc54 100644 --- a/src/agents/tool-restrictions.test.ts +++ b/src/agents/tool-restrictions.test.ts @@ -4,6 +4,7 @@ import { createLibrarianAgent } from "./librarian" import { createExploreAgent } from "./explore" import { createMomusAgent } from "./momus" import { createMetisAgent } from "./metis" +import { createAtlasAgent } from "./atlas" const TEST_MODEL = "anthropic/claude-sonnet-4-5" @@ -96,4 +97,18 @@ describe("read-only agent tool restrictions", () => { } }) }) + + describe("Atlas", () => { + test("allows delegation tools for orchestration", () => { + // given + const agent = createAtlasAgent({ model: TEST_MODEL }) + + // when + const permission = (agent.permission ?? {}) as Record + + // then + expect(permission["task"]).toBeUndefined() + expect(permission["call_omo_agent"]).toBeUndefined() + }) + }) }) diff --git a/src/agents/types.test.ts b/src/agents/types.test.ts index 614991867..dd6b1fe54 100644 --- a/src/agents/types.test.ts +++ b/src/agents/types.test.ts @@ -2,11 +2,17 @@ import { describe, test, expect } from "bun:test"; import { isGptModel, isGeminiModel } from "./types"; describe("isGptModel", () => { - test("standard openai provider models", () => { + test("standard openai provider gpt models", () => { expect(isGptModel("openai/gpt-5.2")).toBe(true); expect(isGptModel("openai/gpt-4o")).toBe(true); - expect(isGptModel("openai/o1")).toBe(true); - expect(isGptModel("openai/o3-mini")).toBe(true); + }); + + test("o-series models are not gpt by name", () => { + expect(isGptModel("openai/o1")).toBe(false); + expect(isGptModel("openai/o3-mini")).toBe(false); + expect(isGptModel("litellm/o1")).toBe(false); + expect(isGptModel("litellm/o3-mini")).toBe(false); + expect(isGptModel("litellm/o4-mini")).toBe(false); }); test("github copilot gpt models", () => { @@ -17,9 +23,6 @@ describe("isGptModel", () => { test("litellm proxied gpt models", () => { expect(isGptModel("litellm/gpt-5.2")).toBe(true); expect(isGptModel("litellm/gpt-4o")).toBe(true); - expect(isGptModel("litellm/o1")).toBe(true); - expect(isGptModel("litellm/o3-mini")).toBe(true); - expect(isGptModel("litellm/o4-mini")).toBe(true); }); test("other proxied gpt models", () => { @@ -27,6 +30,11 @@ describe("isGptModel", () => { expect(isGptModel("custom-provider/gpt-5.2")).toBe(true); }); + test("venice provider gpt models", () => { + expect(isGptModel("venice/gpt-5.2")).toBe(true); + expect(isGptModel("venice/gpt-4o")).toBe(true); + }); + test("gpt4 prefix without hyphen (legacy naming)", () => { expect(isGptModel("litellm/gpt4o")).toBe(true); expect(isGptModel("ollama/gpt4")).toBe(true); @@ -39,8 +47,8 @@ describe("isGptModel", () => { }); test("gemini models are not gpt", () => { - expect(isGptModel("google/gemini-3-pro")).toBe(false); - expect(isGptModel("litellm/gemini-3-pro")).toBe(false); + expect(isGptModel("google/gemini-3.1-pro")).toBe(false); + expect(isGptModel("litellm/gemini-3.1-pro")).toBe(false); }); test("opencode provider is not gpt", () => { @@ -50,29 +58,29 @@ describe("isGptModel", () => { describe("isGeminiModel", () => { test("#given google provider models #then returns true", () => { - expect(isGeminiModel("google/gemini-3-pro")).toBe(true); + expect(isGeminiModel("google/gemini-3.1-pro")).toBe(true); expect(isGeminiModel("google/gemini-3-flash")).toBe(true); expect(isGeminiModel("google/gemini-2.5-pro")).toBe(true); }); test("#given google-vertex provider models #then returns true", () => { - expect(isGeminiModel("google-vertex/gemini-3-pro")).toBe(true); + expect(isGeminiModel("google-vertex/gemini-3.1-pro")).toBe(true); expect(isGeminiModel("google-vertex/gemini-3-flash")).toBe(true); }); test("#given github copilot gemini models #then returns true", () => { - expect(isGeminiModel("github-copilot/gemini-3-pro")).toBe(true); + expect(isGeminiModel("github-copilot/gemini-3.1-pro")).toBe(true); expect(isGeminiModel("github-copilot/gemini-3-flash")).toBe(true); }); test("#given litellm proxied gemini models #then returns true", () => { - expect(isGeminiModel("litellm/gemini-3-pro")).toBe(true); + expect(isGeminiModel("litellm/gemini-3.1-pro")).toBe(true); expect(isGeminiModel("litellm/gemini-3-flash")).toBe(true); expect(isGeminiModel("litellm/gemini-2.5-pro")).toBe(true); }); test("#given other proxied gemini models #then returns true", () => { - expect(isGeminiModel("custom-provider/gemini-3-pro")).toBe(true); + expect(isGeminiModel("custom-provider/gemini-3.1-pro")).toBe(true); expect(isGeminiModel("ollama/gemini-3-flash")).toBe(true); }); diff --git a/src/agents/types.ts b/src/agents/types.ts index 2d4f6c0cb..bdb60007a 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -70,14 +70,9 @@ function extractModelName(model: string): string { return model.includes("/") ? model.split("/").pop() ?? model : model } -const GPT_MODEL_PREFIXES = ["gpt-", "gpt4", "o1", "o3", "o4"] - export function isGptModel(model: string): boolean { - if (model.startsWith("openai/") || model.startsWith("github-copilot/gpt-")) - return true - const modelName = extractModelName(model).toLowerCase() - return GPT_MODEL_PREFIXES.some((prefix) => modelName.startsWith(prefix)) + return modelName.includes("gpt") } const GEMINI_PROVIDERS = ["google/", "google-vertex/"] diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts index 1095fee13..f4ecb5040 100644 --- a/src/agents/utils.test.ts +++ b/src/agents/utils.test.ts @@ -603,8 +603,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () => } }) - test("hephaestus is not created when only github-copilot provider is connected", async () => { - // #given - github-copilot provider has models available + test("hephaestus IS created when github-copilot is connected with a GPT model", async () => { + // #given - github-copilot provider has gpt-5.3-codex available const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( new Set(["github-copilot/gpt-5.3-codex"]) ) @@ -614,8 +614,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () => // #when const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {}) - // #then - expect(agents.hephaestus).toBeUndefined() + // #then - github-copilot is now a valid provider for hephaestus + expect(agents.hephaestus).toBeDefined() } finally { fetchSpy.mockRestore() cacheSpy.mockRestore() @@ -1002,7 +1002,7 @@ describe("buildAgent with category and skills", () => { const agent = buildAgent(source["test-agent"], TEST_MODEL) // #then - category's built-in model is applied - expect(agent.model).toBe("google/gemini-3-pro") + expect(agent.model).toBe("google/gemini-3.1-pro") }) test("agent with category and existing model keeps existing model", () => { diff --git a/src/cli/__snapshots__/model-fallback.test.ts.snap b/src/cli/__snapshots__/model-fallback.test.ts.snap index df91e07b8..a8c3e9112 100644 --- a/src/cli/__snapshots__/model-fallback.test.ts.snap +++ b/src/cli/__snapshots__/model-fallback.test.ts.snap @@ -325,7 +325,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json", "agents": { "atlas": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", }, "explore": { "model": "opencode/gpt-5-nano", @@ -334,34 +334,34 @@ exports[`generateModelConfig single native provider uses Gemini models when only "model": "opencode/glm-4.7-free", }, "metis": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "momus": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "multimodal-looker": { "model": "google/gemini-3-flash-preview", }, "oracle": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "prometheus": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", }, }, "categories": { "artistry": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "quick": { "model": "google/gemini-3-flash-preview", }, "ultrabrain": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "unspecified-high": { @@ -371,7 +371,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only "model": "google/gemini-3-flash-preview", }, "visual-engineering": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -386,7 +386,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json", "agents": { "atlas": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", }, "explore": { "model": "opencode/gpt-5-nano", @@ -395,44 +395,44 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa "model": "opencode/glm-4.7-free", }, "metis": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "momus": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "multimodal-looker": { "model": "google/gemini-3-flash-preview", }, "oracle": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "prometheus": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", }, }, "categories": { "artistry": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "quick": { "model": "google/gemini-3-flash-preview", }, "ultrabrain": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "unspecified-high": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", }, "unspecified-low": { "model": "google/gemini-3-flash-preview", }, "visual-engineering": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -485,7 +485,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal }, "categories": { "artistry": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "deep": { @@ -506,7 +506,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal "model": "anthropic/claude-sonnet-4-5", }, "visual-engineering": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -559,7 +559,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM }, "categories": { "artistry": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "deep": { @@ -581,7 +581,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM "model": "anthropic/claude-sonnet-4-5", }, "visual-engineering": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -634,7 +634,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on }, "categories": { "artistry": { - "model": "opencode/gemini-3-pro", + "model": "opencode/gemini-3.1-pro", "variant": "high", }, "deep": { @@ -655,7 +655,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on "model": "opencode/claude-sonnet-4-5", }, "visual-engineering": { - "model": "opencode/gemini-3-pro", + "model": "opencode/gemini-3.1-pro", "variant": "high", }, "writing": { @@ -708,7 +708,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is }, "categories": { "artistry": { - "model": "opencode/gemini-3-pro", + "model": "opencode/gemini-3.1-pro", "variant": "high", }, "deep": { @@ -730,7 +730,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is "model": "opencode/claude-sonnet-4-5", }, "visual-engineering": { - "model": "opencode/gemini-3-pro", + "model": "opencode/gemini-3.1-pro", "variant": "high", }, "writing": { @@ -779,14 +779,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when }, "categories": { "artistry": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "quick": { "model": "github-copilot/claude-haiku-4.5", }, "ultrabrain": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "unspecified-high": { @@ -796,7 +796,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when "model": "github-copilot/claude-sonnet-4.5", }, "visual-engineering": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -845,14 +845,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with }, "categories": { "artistry": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "quick": { "model": "github-copilot/claude-haiku-4.5", }, "ultrabrain": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "unspecified-high": { @@ -863,7 +863,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with "model": "github-copilot/claude-sonnet-4.5", }, "visual-engineering": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -1026,7 +1026,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen }, "categories": { "artistry": { - "model": "opencode/gemini-3-pro", + "model": "opencode/gemini-3.1-pro", "variant": "high", }, "deep": { @@ -1047,7 +1047,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen "model": "anthropic/claude-sonnet-4-5", }, "visual-engineering": { - "model": "opencode/gemini-3-pro", + "model": "opencode/gemini-3.1-pro", "variant": "high", }, "writing": { @@ -1100,7 +1100,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb }, "categories": { "artistry": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "deep": { @@ -1121,7 +1121,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb "model": "github-copilot/claude-sonnet-4.5", }, "visual-engineering": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -1217,7 +1217,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi "model": "google/gemini-3-flash-preview", }, "oracle": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "prometheus": { @@ -1231,14 +1231,14 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi }, "categories": { "artistry": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "quick": { "model": "anthropic/claude-haiku-4-5", }, "ultrabrain": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "unspecified-high": { @@ -1248,7 +1248,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi "model": "anthropic/claude-sonnet-4-5", }, "visual-engineering": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -1301,7 +1301,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider }, "categories": { "artistry": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "deep": { @@ -1322,7 +1322,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider "model": "github-copilot/claude-sonnet-4.5", }, "visual-engineering": { - "model": "github-copilot/gemini-3-pro-preview", + "model": "github-copilot/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -1375,7 +1375,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe }, "categories": { "artistry": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "deep": { @@ -1396,7 +1396,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe "model": "anthropic/claude-sonnet-4-5", }, "visual-engineering": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "writing": { @@ -1449,7 +1449,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is }, "categories": { "artistry": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "deep": { @@ -1471,7 +1471,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is "model": "anthropic/claude-sonnet-4-5", }, "visual-engineering": { - "model": "google/gemini-3-pro-preview", + "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "writing": { diff --git a/src/cli/config-manager.test.ts b/src/cli/config-manager.test.ts index ad73c3d44..67571185a 100644 --- a/src/cli/config-manager.test.ts +++ b/src/cli/config-manager.test.ts @@ -178,7 +178,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => { expect(models).toBeTruthy() const required = [ - "antigravity-gemini-3-pro", + "antigravity-gemini-3.1-pro", "antigravity-gemini-3-flash", "antigravity-claude-sonnet-4-6", "antigravity-claude-sonnet-4-6-thinking", @@ -206,7 +206,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => { const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record // #when checking Gemini Pro variants - const pro = models["antigravity-gemini-3-pro"] + const pro = models["antigravity-gemini-3.1-pro"] // #then should have low and high variants expect(pro.variants).toBeTruthy() expect(pro.variants.low).toBeTruthy() diff --git a/src/cli/config-manager/antigravity-provider-configuration.ts b/src/cli/config-manager/antigravity-provider-configuration.ts index 5559d0919..6d847ac5d 100644 --- a/src/cli/config-manager/antigravity-provider-configuration.ts +++ b/src/cli/config-manager/antigravity-provider-configuration.ts @@ -4,10 +4,10 @@ * IMPORTANT: Model names MUST use `antigravity-` prefix for stability. * * Since opencode-antigravity-auth v1.3.0, models use a variant system: - * - `antigravity-gemini-3-pro` with variants: low, high + * - `antigravity-gemini-3.1-pro` with variants: low, high * - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high * - * Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work + * Legacy tier-suffixed names (e.g., `antigravity-gemini-3.1-pro-high`) still work * but variants are the recommended approach. * * @see https://github.com/NoeFabris/opencode-antigravity-auth#models @@ -16,7 +16,7 @@ export const ANTIGRAVITY_PROVIDER_CONFIG = { google: { name: "Google", models: { - "antigravity-gemini-3-pro": { + "antigravity-gemini-3.1-pro": { name: "Gemini 3 Pro (Antigravity)", limit: { context: 1048576, output: 65535 }, modalities: { input: ["text", "image", "pdf"], output: ["text"] }, diff --git a/src/cli/config-manager/bun-install.ts b/src/cli/config-manager/bun-install.ts index f24e77fa2..6b3225547 100644 --- a/src/cli/config-manager/bun-install.ts +++ b/src/cli/config-manager/bun-install.ts @@ -1,4 +1,5 @@ import { getConfigDir } from "./config-context" +import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide" const BUN_INSTALL_TIMEOUT_SECONDS = 60 const BUN_INSTALL_TIMEOUT_MS = BUN_INSTALL_TIMEOUT_SECONDS * 1000 @@ -16,7 +17,7 @@ export async function runBunInstall(): Promise { export async function runBunInstallWithDetails(): Promise { try { - const proc = Bun.spawn(["bun", "install"], { + const proc = spawnWithWindowsHide(["bun", "install"], { cwd: getConfigDir(), stdout: "inherit", stderr: "inherit", diff --git a/src/cli/config-manager/opencode-binary.ts b/src/cli/config-manager/opencode-binary.ts index 6d889faee..6fb140403 100644 --- a/src/cli/config-manager/opencode-binary.ts +++ b/src/cli/config-manager/opencode-binary.ts @@ -1,4 +1,5 @@ import type { OpenCodeBinaryType } from "../../shared/opencode-config-dir-types" +import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide" import { initConfigContext } from "./config-context" const OPENCODE_BINARIES = ["opencode", "opencode-desktop"] as const @@ -11,7 +12,7 @@ interface OpenCodeBinaryResult { async function findOpenCodeBinaryWithVersion(): Promise { for (const binary of OPENCODE_BINARIES) { try { - const proc = Bun.spawn([binary, "--version"], { + const proc = spawnWithWindowsHide([binary, "--version"], { stdout: "pipe", stderr: "pipe", }) diff --git a/src/cli/config-manager/write-omo-config.test.ts b/src/cli/config-manager/write-omo-config.test.ts new file mode 100644 index 000000000..5701b53dc --- /dev/null +++ b/src/cli/config-manager/write-omo-config.test.ts @@ -0,0 +1,80 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test" +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { parseJsonc } from "../../shared/jsonc-parser" +import type { InstallConfig } from "../types" +import { resetConfigContext } from "./config-context" +import { generateOmoConfig } from "./generate-omo-config" +import { writeOmoConfig } from "./write-omo-config" + +const installConfig: InstallConfig = { + hasClaude: true, + isMax20: true, + hasOpenAI: true, + hasGemini: true, + hasCopilot: false, + hasOpencodeZen: false, + hasZaiCodingPlan: false, + hasKimiForCoding: false, +} + +function getRecord(value: unknown): Record { + if (value && typeof value === "object" && !Array.isArray(value)) { + return value as Record + } + + return {} +} + +describe("writeOmoConfig", () => { + let testConfigDir = "" + let testConfigPath = "" + + beforeEach(() => { + testConfigDir = join(tmpdir(), `omo-write-config-${Date.now()}-${Math.random().toString(36).slice(2)}`) + testConfigPath = join(testConfigDir, "oh-my-opencode.json") + + mkdirSync(testConfigDir, { recursive: true }) + process.env.OPENCODE_CONFIG_DIR = testConfigDir + resetConfigContext() + }) + + afterEach(() => { + rmSync(testConfigDir, { recursive: true, force: true }) + resetConfigContext() + delete process.env.OPENCODE_CONFIG_DIR + }) + + it("preserves existing user values while adding new defaults", () => { + // given + const existingConfig = { + agents: { + sisyphus: { + model: "custom/provider-model", + }, + }, + disabled_hooks: ["comment-checker"], + } + writeFileSync(testConfigPath, JSON.stringify(existingConfig, null, 2) + "\n", "utf-8") + + const generatedDefaults = generateOmoConfig(installConfig) + + // when + const result = writeOmoConfig(installConfig) + + // then + expect(result.success).toBe(true) + + const savedConfig = parseJsonc>(readFileSync(testConfigPath, "utf-8")) + const savedAgents = getRecord(savedConfig.agents) + const savedSisyphus = getRecord(savedAgents.sisyphus) + expect(savedSisyphus.model).toBe("custom/provider-model") + expect(savedConfig.disabled_hooks).toEqual(["comment-checker"]) + + for (const defaultKey of Object.keys(generatedDefaults)) { + expect(savedConfig).toHaveProperty(defaultKey) + } + }) +}) diff --git a/src/cli/config-manager/write-omo-config.ts b/src/cli/config-manager/write-omo-config.ts index 09fcce15b..261175e7a 100644 --- a/src/cli/config-manager/write-omo-config.ts +++ b/src/cli/config-manager/write-omo-config.ts @@ -43,7 +43,7 @@ export function writeOmoConfig(installConfig: InstallConfig): ConfigMergeResult return { success: true, configPath: omoConfigPath } } - const merged = deepMergeRecord(existing, newConfig) + const merged = deepMergeRecord(newConfig, existing) writeFileSync(omoConfigPath, JSON.stringify(merged, null, 2) + "\n") } catch (parseErr) { if (parseErr instanceof SyntaxError) { diff --git a/src/cli/doctor/checks/dependencies.ts b/src/cli/doctor/checks/dependencies.ts index da22afcfb..f6f6ded01 100644 --- a/src/cli/doctor/checks/dependencies.ts +++ b/src/cli/doctor/checks/dependencies.ts @@ -3,6 +3,7 @@ import { createRequire } from "node:module" import { dirname, join } from "node:path" import type { DependencyInfo } from "../types" +import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide" async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> { try { @@ -18,7 +19,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat async function getBinaryVersion(binary: string): Promise { try { - const proc = Bun.spawn([binary, "--version"], { stdout: "pipe", stderr: "pipe" }) + const proc = spawnWithWindowsHide([binary, "--version"], { stdout: "pipe", stderr: "pipe" }) const output = await new Response(proc.stdout).text() await proc.exited if (proc.exitCode === 0) { @@ -140,4 +141,3 @@ export async function checkCommentChecker(): Promise { path: resolvedPath, } } - diff --git a/src/cli/doctor/checks/model-resolution.test.ts b/src/cli/doctor/checks/model-resolution.test.ts index cca2f58b5..e311076a8 100644 --- a/src/cli/doctor/checks/model-resolution.test.ts +++ b/src/cli/doctor/checks/model-resolution.test.ts @@ -26,7 +26,7 @@ describe("model-resolution check", () => { // then: Should have category entries const visual = info.categories.find((c) => c.name === "visual-engineering") expect(visual).toBeDefined() - expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro") + expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3.1-pro") expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google") }) }) diff --git a/src/cli/doctor/checks/system-binary.ts b/src/cli/doctor/checks/system-binary.ts index 670d7ce1e..5a4d48126 100644 --- a/src/cli/doctor/checks/system-binary.ts +++ b/src/cli/doctor/checks/system-binary.ts @@ -1,6 +1,7 @@ import { existsSync } from "node:fs" import { homedir } from "node:os" import { join } from "node:path" +import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide" import { OPENCODE_BINARIES } from "../constants" @@ -110,7 +111,7 @@ export async function getOpenCodeVersion( ): Promise { try { const command = buildVersionCommand(binaryPath, platform) - const processResult = Bun.spawn(command, { stdout: "pipe", stderr: "pipe" }) + const processResult = spawnWithWindowsHide(command, { stdout: "pipe", stderr: "pipe" }) const output = await new Response(processResult.stdout).text() await processResult.exited diff --git a/src/cli/doctor/checks/tools-gh.ts b/src/cli/doctor/checks/tools-gh.ts index a9ac59a91..177b5c160 100644 --- a/src/cli/doctor/checks/tools-gh.ts +++ b/src/cli/doctor/checks/tools-gh.ts @@ -1,3 +1,5 @@ +import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide" + export interface GhCliInfo { installed: boolean version: string | null @@ -19,7 +21,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat async function getGhVersion(): Promise { try { - const processResult = Bun.spawn(["gh", "--version"], { stdout: "pipe", stderr: "pipe" }) + const processResult = spawnWithWindowsHide(["gh", "--version"], { stdout: "pipe", stderr: "pipe" }) const output = await new Response(processResult.stdout).text() await processResult.exited if (processResult.exitCode !== 0) return null @@ -38,7 +40,7 @@ async function getGhAuthStatus(): Promise<{ error: string | null }> { try { - const processResult = Bun.spawn(["gh", "auth", "status"], { + const processResult = spawnWithWindowsHide(["gh", "auth", "status"], { stdout: "pipe", stderr: "pipe", env: { ...process.env, GH_NO_UPDATE_NOTIFIER: "1" }, diff --git a/src/cli/model-fallback-requirements.ts b/src/cli/model-fallback-requirements.ts index f3f43e60b..0ff625005 100644 --- a/src/cli/model-fallback-requirements.ts +++ b/src/cli/model-fallback-requirements.ts @@ -24,7 +24,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record = { oracle: { fallbackChain: [ { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, ], }, @@ -59,7 +59,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record = { { providers: ["kimi-for-coding"], model: "k2p5" }, { providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" }, ], }, metis: { @@ -68,14 +68,14 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record = { { providers: ["kimi-for-coding"], model: "k2p5" }, { providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, ], }, momus: { fallbackChain: [ { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, ], }, atlas: { @@ -84,7 +84,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record = { { providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" }, ], }, } @@ -92,7 +92,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record = { export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record = { "visual-engineering": { fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["zai-coding-plan"], model: "glm-5" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["kimi-for-coding"], model: "k2p5" }, @@ -101,7 +101,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record = ultrabrain: { fallbackChain: [ { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, ], }, @@ -109,17 +109,17 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record = fallbackChain: [ { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, ], requiresModel: "gpt-5.3-codex", }, artistry: { fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, ], - requiresModel: "gemini-3-pro", + requiresModel: "gemini-3.1-pro", }, quick: { fallbackChain: [ @@ -139,7 +139,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record = fallbackChain: [ { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" }, ], }, writing: { diff --git a/src/cli/provider-model-id-transform.test.ts b/src/cli/provider-model-id-transform.test.ts index e13c7846a..17cb9dfb1 100644 --- a/src/cli/provider-model-id-transform.test.ts +++ b/src/cli/provider-model-id-transform.test.ts @@ -40,16 +40,16 @@ describe("transformModelForProvider", () => { expect(result).toBe("claude-haiku-4.5") }) - test("transforms gemini-3-pro to gemini-3-pro-preview", () => { - // #given github-copilot provider and gemini-3-pro model + test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => { + // #given github-copilot provider and gemini-3.1-pro model const provider = "github-copilot" - const model = "gemini-3-pro" + const model = "gemini-3.1-pro" // #when transformModelForProvider is called const result = transformModelForProvider(provider, model) - // #then should transform to gemini-3-pro-preview - expect(result).toBe("gemini-3-pro-preview") + // #then should transform to gemini-3.1-pro-preview + expect(result).toBe("gemini-3.1-pro-preview") }) test("transforms gemini-3-flash to gemini-3-flash-preview", () => { @@ -64,16 +64,16 @@ describe("transformModelForProvider", () => { expect(result).toBe("gemini-3-flash-preview") }) - test("prevents double transformation of gemini-3-pro-preview", () => { - // #given github-copilot provider and gemini-3-pro-preview model (already transformed) + test("prevents double transformation of gemini-3.1-pro-preview", () => { + // #given github-copilot provider and gemini-3.1-pro-preview model (already transformed) const provider = "github-copilot" - const model = "gemini-3-pro-preview" + const model = "gemini-3.1-pro-preview" // #when transformModelForProvider is called const result = transformModelForProvider(provider, model) - // #then should NOT become gemini-3-pro-preview-preview - expect(result).toBe("gemini-3-pro-preview") + // #then should NOT become gemini-3.1-pro-preview-preview + expect(result).toBe("gemini-3.1-pro-preview") }) test("prevents double transformation of gemini-3-flash-preview", () => { @@ -102,16 +102,16 @@ describe("transformModelForProvider", () => { expect(result).toBe("gemini-3-flash-preview") }) - test("transforms gemini-3-pro to gemini-3-pro-preview", () => { - // #given google provider and gemini-3-pro model + test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => { + // #given google provider and gemini-3.1-pro model const provider = "google" - const model = "gemini-3-pro" + const model = "gemini-3.1-pro" // #when transformModelForProvider is called const result = transformModelForProvider(provider, model) - // #then should transform to gemini-3-pro-preview - expect(result).toBe("gemini-3-pro-preview") + // #then should transform to gemini-3.1-pro-preview + expect(result).toBe("gemini-3.1-pro-preview") }) test("passes through other gemini models unchanged", () => { @@ -138,16 +138,16 @@ describe("transformModelForProvider", () => { expect(result).toBe("gemini-3-flash-preview") }) - test("prevents double transformation of gemini-3-pro-preview", () => { - // #given google provider and gemini-3-pro-preview model (already transformed) + test("prevents double transformation of gemini-3.1-pro-preview", () => { + // #given google provider and gemini-3.1-pro-preview model (already transformed) const provider = "google" - const model = "gemini-3-pro-preview" + const model = "gemini-3.1-pro-preview" // #when transformModelForProvider is called const result = transformModelForProvider(provider, model) - // #then should NOT become gemini-3-pro-preview-preview - expect(result).toBe("gemini-3-pro-preview") + // #then should NOT become gemini-3.1-pro-preview-preview + expect(result).toBe("gemini-3.1-pro-preview") }) test("does not transform claude models for google provider", () => { diff --git a/src/cli/run/event-handlers.test.ts b/src/cli/run/event-handlers.test.ts index 267b394cd..b6687cf7d 100644 --- a/src/cli/run/event-handlers.test.ts +++ b/src/cli/run/event-handlers.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, spyOn } from "bun:test" +const { describe, it, expect, spyOn } = require("bun:test") import type { RunContext } from "./types" import { createEventState } from "./events" import { handleSessionStatus, handleMessagePartUpdated, handleMessageUpdated, handleTuiToast } from "./event-handlers" @@ -235,9 +235,7 @@ describe("handleMessagePartUpdated", () => { it("prints completion metadata once when assistant text part is completed", () => { // given - const nowSpy = spyOn(Date, "now") - nowSpy.mockReturnValueOnce(1000) - nowSpy.mockReturnValueOnce(3400) + const nowSpy = spyOn(Date, "now").mockReturnValue(3400) const ctx = createMockContext("ses_main") const state = createEventState() @@ -259,6 +257,7 @@ describe("handleMessagePartUpdated", () => { } as any, state, ) + state.messageStartedAtById["msg_1"] = 1000 // when handleMessagePartUpdated( diff --git a/src/cli/run/event-state.ts b/src/cli/run/event-state.ts index 4d05f7dac..eee23f5f3 100644 --- a/src/cli/run/event-state.ts +++ b/src/cli/run/event-state.ts @@ -7,6 +7,8 @@ export interface EventState { currentTool: string | null /** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */ hasReceivedMeaningfulWork: boolean + /** Timestamp of the last received event (for watchdog detection) */ + lastEventTimestamp: number /** Count of assistant messages for the main session */ messageCount: number /** Current agent name from the latest assistant message */ @@ -54,6 +56,7 @@ export function createEventState(): EventState { lastPartText: "", currentTool: null, hasReceivedMeaningfulWork: false, + lastEventTimestamp: Date.now(), messageCount: 0, currentAgent: null, currentModel: null, diff --git a/src/cli/run/event-stream-processor.ts b/src/cli/run/event-stream-processor.ts index c5e600e91..757c1a447 100644 --- a/src/cli/run/event-stream-processor.ts +++ b/src/cli/run/event-stream-processor.ts @@ -35,6 +35,9 @@ export async function processEvents( logEventVerbose(ctx, payload) } + // Update last event timestamp for watchdog detection + state.lastEventTimestamp = Date.now() + handleSessionError(ctx, payload, state) handleSessionIdle(ctx, payload, state) handleSessionStatus(ctx, payload, state) diff --git a/src/cli/run/integration.test.ts b/src/cli/run/integration.test.ts index d0fc91cfb..6ac16c9f8 100644 --- a/src/cli/run/integration.test.ts +++ b/src/cli/run/integration.test.ts @@ -3,6 +3,7 @@ import type { RunResult } from "./types" import { createJsonOutputManager } from "./json-output" import { resolveSession } from "./session-resolver" import { executeOnCompleteHook } from "./on-complete-hook" +import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide" import type { OpencodeClient } from "./types" import * as originalSdk from "@opencode-ai/sdk" import * as originalPortUtils from "../../shared/port-utils" @@ -147,7 +148,7 @@ describe("integration: --session-id", () => { const result = resolveSession({ client: mockClient, sessionId, directory: "/test" }) // then - await expect(result).rejects.toThrow(`Session not found: ${sessionId}`) + expect(result).rejects.toThrow(`Session not found: ${sessionId}`) expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId }, query: { directory: "/test" }, @@ -161,10 +162,13 @@ describe("integration: --on-complete", () => { beforeEach(() => { spyOn(console, "error").mockImplementation(() => {}) - spawnSpy = spyOn(Bun, "spawn").mockReturnValue({ + spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({ exited: Promise.resolve(0), exitCode: 0, - } as unknown as ReturnType) + stdout: undefined, + stderr: undefined, + kill: () => {}, + } satisfies ReturnType) }) afterEach(() => { @@ -186,7 +190,7 @@ describe("integration: --on-complete", () => { // then expect(spawnSpy).toHaveBeenCalledTimes(1) - const [_, options] = spawnSpy.mock.calls[0] as Parameters + const [_, options] = spawnSpy.mock.calls[0] as Parameters expect(options?.env?.SESSION_ID).toBe("session-123") expect(options?.env?.EXIT_CODE).toBe("0") expect(options?.env?.DURATION_MS).toBe("5000") @@ -208,10 +212,13 @@ describe("integration: option combinations", () => { spyOn(console, "error").mockImplementation(() => {}) mockStdout = createMockWriteStream() mockStderr = createMockWriteStream() - spawnSpy = spyOn(Bun, "spawn").mockReturnValue({ + spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({ exited: Promise.resolve(0), exitCode: 0, - } as unknown as ReturnType) + stdout: undefined, + stderr: undefined, + kill: () => {}, + } satisfies ReturnType) }) afterEach(() => { @@ -249,9 +256,9 @@ describe("integration: option combinations", () => { const emitted = mockStdout.writes[0]! expect(() => JSON.parse(emitted)).not.toThrow() expect(spawnSpy).toHaveBeenCalledTimes(1) - const [args] = spawnSpy.mock.calls[0] as Parameters + const [args] = spawnSpy.mock.calls[0] as Parameters expect(args).toEqual(["sh", "-c", "echo done"]) - const [_, options] = spawnSpy.mock.calls[0] as Parameters + const [_, options] = spawnSpy.mock.calls[0] as Parameters expect(options?.env?.SESSION_ID).toBe("session-123") expect(options?.env?.EXIT_CODE).toBe("0") expect(options?.env?.DURATION_MS).toBe("5000") diff --git a/src/cli/run/on-complete-hook.test.ts b/src/cli/run/on-complete-hook.test.ts index e560cc10c..930651a2d 100644 --- a/src/cli/run/on-complete-hook.test.ts +++ b/src/cli/run/on-complete-hook.test.ts @@ -1,4 +1,5 @@ import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test" +import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide" import { executeOnCompleteHook } from "./on-complete-hook" describe("executeOnCompleteHook", () => { @@ -6,7 +7,10 @@ describe("executeOnCompleteHook", () => { return { exited: Promise.resolve(exitCode), exitCode, - } as unknown as ReturnType + stdout: undefined, + stderr: undefined, + kill: () => {}, + } satisfies ReturnType } let consoleErrorSpy: ReturnType> @@ -21,7 +25,7 @@ describe("executeOnCompleteHook", () => { it("executes command with correct env vars", async () => { // given - const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) + const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0)) try { // when @@ -35,7 +39,7 @@ describe("executeOnCompleteHook", () => { // then expect(spawnSpy).toHaveBeenCalledTimes(1) - const [args, options] = spawnSpy.mock.calls[0] as Parameters + const [args, options] = spawnSpy.mock.calls[0] as Parameters expect(args).toEqual(["sh", "-c", "echo test"]) expect(options?.env?.SESSION_ID).toBe("session-123") @@ -51,7 +55,7 @@ describe("executeOnCompleteHook", () => { it("env var values are strings", async () => { // given - const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) + const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0)) try { // when @@ -64,7 +68,7 @@ describe("executeOnCompleteHook", () => { }) // then - const [_, options] = spawnSpy.mock.calls[0] as Parameters + const [_, options] = spawnSpy.mock.calls[0] as Parameters expect(options?.env?.EXIT_CODE).toBe("1") expect(options?.env?.EXIT_CODE).toBeTypeOf("string") @@ -79,7 +83,7 @@ describe("executeOnCompleteHook", () => { it("empty command string is no-op", async () => { // given - const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) + const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0)) try { // when @@ -100,7 +104,7 @@ describe("executeOnCompleteHook", () => { it("whitespace-only command is no-op", async () => { // given - const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) + const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0)) try { // when @@ -121,11 +125,11 @@ describe("executeOnCompleteHook", () => { it("command failure logs warning but does not throw", async () => { // given - const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1)) + const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(1)) try { // when - await expect( + expect( executeOnCompleteHook({ command: "false", sessionId: "session-123", @@ -149,13 +153,13 @@ describe("executeOnCompleteHook", () => { it("spawn error logs warning but does not throw", async () => { // given const spawnError = new Error("Command not found") - const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => { + const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockImplementation(() => { throw spawnError }) try { // when - await expect( + expect( executeOnCompleteHook({ command: "nonexistent-command", sessionId: "session-123", diff --git a/src/cli/run/on-complete-hook.ts b/src/cli/run/on-complete-hook.ts index 30c585439..b266ca887 100644 --- a/src/cli/run/on-complete-hook.ts +++ b/src/cli/run/on-complete-hook.ts @@ -1,4 +1,5 @@ import pc from "picocolors" +import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide" export async function executeOnCompleteHook(options: { command: string @@ -17,7 +18,7 @@ export async function executeOnCompleteHook(options: { console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`)) try { - const proc = Bun.spawn(["sh", "-c", trimmedCommand], { + const proc = spawnWithWindowsHide(["sh", "-c", trimmedCommand], { env: { ...process.env, SESSION_ID: sessionId, diff --git a/src/cli/run/opencode-binary-resolver.ts b/src/cli/run/opencode-binary-resolver.ts index a4bbc60c5..1f42486f7 100644 --- a/src/cli/run/opencode-binary-resolver.ts +++ b/src/cli/run/opencode-binary-resolver.ts @@ -1,4 +1,5 @@ import { delimiter, dirname, join } from "node:path" +import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide" const OPENCODE_COMMANDS = ["opencode", "opencode-desktop"] as const const WINDOWS_SUFFIXES = ["", ".exe", ".cmd", ".bat", ".ps1"] as const @@ -41,7 +42,7 @@ export function collectCandidateBinaryPaths( export async function canExecuteBinary(binaryPath: string): Promise { try { - const proc = Bun.spawn([binaryPath, "--version"], { + const proc = spawnWithWindowsHide([binaryPath, "--version"], { stdout: "pipe", stderr: "pipe", }) diff --git a/src/cli/run/poll-for-completion.ts b/src/cli/run/poll-for-completion.ts index 684670cb8..529221094 100644 --- a/src/cli/run/poll-for-completion.ts +++ b/src/cli/run/poll-for-completion.ts @@ -8,11 +8,15 @@ const DEFAULT_POLL_INTERVAL_MS = 500 const DEFAULT_REQUIRED_CONSECUTIVE = 1 const ERROR_GRACE_CYCLES = 3 const MIN_STABILIZATION_MS = 1_000 +const DEFAULT_EVENT_WATCHDOG_MS = 30_000 // 30 seconds +const DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS = 60_000 // 60 seconds export interface PollOptions { pollIntervalMs?: number requiredConsecutive?: number minStabilizationMs?: number + eventWatchdogMs?: number + secondaryMeaningfulWorkTimeoutMs?: number } export async function pollForCompletion( @@ -28,9 +32,15 @@ export async function pollForCompletion( options.minStabilizationMs ?? MIN_STABILIZATION_MS const minStabilizationMs = rawMinStabilizationMs > 0 ? rawMinStabilizationMs : MIN_STABILIZATION_MS + const eventWatchdogMs = + options.eventWatchdogMs ?? DEFAULT_EVENT_WATCHDOG_MS + const secondaryMeaningfulWorkTimeoutMs = + options.secondaryMeaningfulWorkTimeoutMs ?? + DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS let consecutiveCompleteChecks = 0 let errorCycleCount = 0 let firstWorkTimestamp: number | null = null + let secondaryTimeoutChecked = false const pollStartTimestamp = Date.now() while (!abortController.signal.aborted) { @@ -59,7 +69,37 @@ export async function pollForCompletion( errorCycleCount = 0 } - const mainSessionStatus = await getMainSessionStatus(ctx) + // Watchdog: if no events received for N seconds, verify session status via API + let mainSessionStatus: "idle" | "busy" | "retry" | null = null + if (eventState.lastEventTimestamp !== null) { + const timeSinceLastEvent = Date.now() - eventState.lastEventTimestamp + if (timeSinceLastEvent > eventWatchdogMs) { + // Events stopped coming - verify actual session state + console.log( + pc.yellow( + `\n No events for ${Math.round( + timeSinceLastEvent / 1000 + )}s, verifying session status...` + ) + ) + + // Force check session status directly + mainSessionStatus = await getMainSessionStatus(ctx) + if (mainSessionStatus === "idle") { + eventState.mainSessionIdle = true + } else if (mainSessionStatus === "busy" || mainSessionStatus === "retry") { + eventState.mainSessionIdle = false + } + + // Reset timestamp to avoid repeated checks + eventState.lastEventTimestamp = Date.now() + } + } + + // Only call getMainSessionStatus if watchdog didn't already check + if (mainSessionStatus === null) { + mainSessionStatus = await getMainSessionStatus(ctx) + } if (mainSessionStatus === "busy" || mainSessionStatus === "retry") { eventState.mainSessionIdle = false } else if (mainSessionStatus === "idle") { @@ -81,6 +121,50 @@ export async function pollForCompletion( consecutiveCompleteChecks = 0 continue } + + // Secondary timeout: if we've been polling for reasonable time but haven't + // received meaningful work via events, check if there's active work via API + // Only check once to avoid unnecessary API calls every poll cycle + if ( + Date.now() - pollStartTimestamp > secondaryMeaningfulWorkTimeoutMs && + !secondaryTimeoutChecked + ) { + secondaryTimeoutChecked = true + // Check if session actually has pending work (children, todos, etc.) + const childrenRes = await ctx.client.session.children({ + path: { id: ctx.sessionID }, + query: { directory: ctx.directory }, + }) + const children = normalizeSDKResponse(childrenRes, [] as unknown[]) + const todosRes = await ctx.client.session.todo({ + path: { id: ctx.sessionID }, + query: { directory: ctx.directory }, + }) + const todos = normalizeSDKResponse(todosRes, [] as unknown[]) + + const hasActiveChildren = + Array.isArray(children) && children.length > 0 + const hasActiveTodos = + Array.isArray(todos) && + todos.some( + (t: unknown) => + (t as { status?: string })?.status !== "completed" && + (t as { status?: string })?.status !== "cancelled" + ) + const hasActiveWork = hasActiveChildren || hasActiveTodos + + if (hasActiveWork) { + // Assume meaningful work is happening even without events + eventState.hasReceivedMeaningfulWork = true + console.log( + pc.yellow( + `\n No meaningful work events for ${Math.round( + secondaryMeaningfulWorkTimeoutMs / 1000 + )}s but session has active work - assuming in progress` + ) + ) + } + } } else { // Track when first meaningful work was received if (firstWorkTimestamp === null) { diff --git a/src/config/schema/agent-overrides.ts b/src/config/schema/agent-overrides.ts index 1103bf15a..eb5429fba 100644 --- a/src/config/schema/agent-overrides.ts +++ b/src/config/schema/agent-overrides.ts @@ -60,7 +60,9 @@ const BuiltinAgentOverridesSchema = z.object({ build: AgentOverrideConfigSchema.optional(), plan: AgentOverrideConfigSchema.optional(), sisyphus: AgentOverrideConfigSchema.optional(), - hephaestus: AgentOverrideConfigSchema.optional(), + hephaestus: AgentOverrideConfigSchema.extend({ + allow_non_gpt_model: z.boolean().optional(), + }).optional(), "sisyphus-junior": AgentOverrideConfigSchema.optional(), "OpenCode-Builder": AgentOverrideConfigSchema.optional(), prometheus: AgentOverrideConfigSchema.optional(), diff --git a/src/config/schema/categories.ts b/src/config/schema/categories.ts index b12005931..47c7d6c0b 100644 --- a/src/config/schema/categories.ts +++ b/src/config/schema/categories.ts @@ -20,6 +20,7 @@ export const CategoryConfigSchema = z.object({ textVerbosity: z.enum(["low", "medium", "high"]).optional(), tools: z.record(z.string(), z.boolean()).optional(), prompt_append: z.string().optional(), + max_prompt_tokens: z.number().int().positive().optional(), /** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini/minimax models. */ is_unstable_agent: z.boolean().optional(), /** Disable this category. Disabled categories are excluded from task delegation. */ diff --git a/src/config/schema/oh-my-opencode-config.ts b/src/config/schema/oh-my-opencode-config.ts index ceb82d451..2ebaf43d8 100644 --- a/src/config/schema/oh-my-opencode-config.ts +++ b/src/config/schema/oh-my-opencode-config.ts @@ -27,7 +27,7 @@ export const OhMyOpenCodeConfigSchema = z.object({ /** Default agent name for `oh-my-opencode run` (env: OPENCODE_DEFAULT_AGENT) */ default_run_agent: z.string().optional(), disabled_mcps: z.array(AnyMcpNameSchema).optional(), - disabled_agents: z.array(BuiltinAgentNameSchema).optional(), + disabled_agents: z.array(z.string()).optional(), disabled_skills: z.array(BuiltinSkillNameSchema).optional(), disabled_hooks: z.array(z.string()).optional(), disabled_commands: z.array(BuiltinCommandNameSchema).optional(), diff --git a/src/features/background-agent/concurrency.test.ts b/src/features/background-agent/concurrency.test.ts index 102076eef..682d6029a 100644 --- a/src/features/background-agent/concurrency.test.ts +++ b/src/features/background-agent/concurrency.test.ts @@ -34,7 +34,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => { test("should return provider limit even when modelConcurrency exists but doesn't match", () => { // given const config: BackgroundTaskConfig = { - modelConcurrency: { "google/gemini-3-pro": 5 }, + modelConcurrency: { "google/gemini-3.1-pro": 5 }, providerConcurrency: { anthropic: 3 } } const manager = new ConcurrencyManager(config) @@ -95,7 +95,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => { // when const modelLimit = manager.getConcurrencyLimit("anthropic/claude-sonnet-4-6") const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-6") - const defaultLimit = manager.getConcurrencyLimit("google/gemini-3-pro") + const defaultLimit = manager.getConcurrencyLimit("google/gemini-3.1-pro") // then expect(modelLimit).toBe(10) diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts index 7bd7709f1..2e78f63f3 100644 --- a/src/features/background-agent/manager.test.ts +++ b/src/features/background-agent/manager.test.ts @@ -191,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map return (manager as unknown as { pendingByParent: Map> }).pendingByParent } +function getPendingNotifications(manager: BackgroundManager): Map { + return (manager as unknown as { pendingNotifications: Map }).pendingNotifications +} + function getCompletionTimers(manager: BackgroundManager): Map> { return (manager as unknown as { completionTimers: Map> }).completionTimers } @@ -1057,6 +1061,49 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => { manager.shutdown() }) + + test("should queue notification when promptAsync aborts while parent is idle", async () => { + //#given + const promptMock = async () => { + const error = new Error("Request aborted while waiting for input") + error.name = "MessageAbortedError" + throw error + } + const client = { + session: { + prompt: promptMock, + promptAsync: promptMock, + abort: async () => ({}), + messages: async () => ({ data: [] }), + }, + } + const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput) + const task: BackgroundTask = { + id: "task-aborted-idle-queue", + sessionID: "session-child", + parentSessionID: "session-parent", + parentMessageID: "msg-parent", + description: "task idle queue", + prompt: "test", + agent: "explore", + status: "completed", + startedAt: new Date(), + completedAt: new Date(), + } + getPendingByParent(manager).set("session-parent", new Set([task.id])) + + //#when + await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise }) + .notifyParentSession(task) + + //#then + const queuedNotifications = getPendingNotifications(manager).get("session-parent") ?? [] + expect(queuedNotifications).toHaveLength(1) + expect(queuedNotifications[0]).toContain("") + expect(queuedNotifications[0]).toContain("[ALL BACKGROUND TASKS COMPLETE]") + + manager.shutdown() + }) }) describe("BackgroundManager.notifyParentSession - notifications toggle", () => { @@ -1105,6 +1152,29 @@ describe("BackgroundManager.notifyParentSession - notifications toggle", () => { }) }) +describe("BackgroundManager.injectPendingNotificationsIntoChatMessage", () => { + test("should prepend queued notifications to first text part and clear queue", () => { + // given + const manager = createBackgroundManager() + manager.queuePendingNotification("session-parent", "queued-one") + manager.queuePendingNotification("session-parent", "queued-two") + const output = { + parts: [{ type: "text", text: "User prompt" }], + } + + // when + manager.injectPendingNotificationsIntoChatMessage(output, "session-parent") + + // then + expect(output.parts[0].text).toContain("queued-one") + expect(output.parts[0].text).toContain("queued-two") + expect(output.parts[0].text).toContain("User prompt") + expect(getPendingNotifications(manager).get("session-parent")).toBeUndefined() + + manager.shutdown() + }) +}) + function buildNotificationPromptBody( task: BackgroundTask, currentMessage: CurrentMessage | null diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index 61e5d8434..1bc9e2b4b 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -93,6 +93,7 @@ export class BackgroundManager { private tasks: Map private notifications: Map + private pendingNotifications: Map private pendingByParent: Map> // Track pending tasks per parent for batching private client: OpencodeClient private directory: string @@ -125,6 +126,7 @@ export class BackgroundManager { ) { this.tasks = new Map() this.notifications = new Map() + this.pendingNotifications = new Map() this.pendingByParent = new Map() this.client = ctx.client this.directory = ctx.directory @@ -917,6 +919,32 @@ export class BackgroundManager { this.notifications.delete(sessionID) } + queuePendingNotification(sessionID: string | undefined, notification: string): void { + if (!sessionID) return + const existingNotifications = this.pendingNotifications.get(sessionID) ?? [] + existingNotifications.push(notification) + this.pendingNotifications.set(sessionID, existingNotifications) + } + + injectPendingNotificationsIntoChatMessage(output: { parts: Array<{ type: string; text?: string; [key: string]: unknown }> }, sessionID: string): void { + const pendingNotifications = this.pendingNotifications.get(sessionID) + if (!pendingNotifications || pendingNotifications.length === 0) { + return + } + + this.pendingNotifications.delete(sessionID) + const notificationContent = pendingNotifications.join("\n\n") + const firstTextPartIndex = output.parts.findIndex((part) => part.type === "text") + + if (firstTextPartIndex === -1) { + output.parts.unshift(createInternalAgentTextPart(notificationContent)) + return + } + + const originalText = output.parts[firstTextPartIndex].text ?? "" + output.parts[firstTextPartIndex].text = `${notificationContent}\n\n---\n\n${originalText}` + } + /** * Validates that a session has actual assistant/tool output before marking complete. * Prevents premature completion when session.idle fires before agent responds. @@ -1340,6 +1368,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea taskId: task.id, parentSessionID: task.parentSessionID, }) + this.queuePendingNotification(task.parentSessionID, notification) } else { log("[background-agent] Failed to send notification:", error) } @@ -1568,6 +1597,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea this.concurrencyManager.clear() this.tasks.clear() this.notifications.clear() + this.pendingNotifications.clear() this.pendingByParent.clear() this.notificationQueueByParent.clear() this.queuesByKey.clear() diff --git a/src/features/boulder-state/storage.test.ts b/src/features/boulder-state/storage.test.ts index 967c090cf..e52174cef 100644 --- a/src/features/boulder-state/storage.test.ts +++ b/src/features/boulder-state/storage.test.ts @@ -269,6 +269,71 @@ describe("boulder-state", () => { expect(progress.isComplete).toBe(false) }) + test("should count space-indented unchecked checkbox", () => { + // given - plan file with a two-space indented checkbox + const planPath = join(TEST_DIR, "space-indented-plan.md") + writeFileSync(planPath, `# Plan + - [ ] indented task +`) + + // when + const progress = getPlanProgress(planPath) + + // then + expect(progress.total).toBe(1) + expect(progress.completed).toBe(0) + expect(progress.isComplete).toBe(false) + }) + + test("should count tab-indented unchecked checkbox", () => { + // given - plan file with a tab-indented checkbox + const planPath = join(TEST_DIR, "tab-indented-plan.md") + writeFileSync(planPath, `# Plan + - [ ] tab-indented task +`) + + // when + const progress = getPlanProgress(planPath) + + // then + expect(progress.total).toBe(1) + expect(progress.completed).toBe(0) + expect(progress.isComplete).toBe(false) + }) + + test("should count mixed top-level checked and indented unchecked checkboxes", () => { + // given - plan file with checked top-level and unchecked indented task + const planPath = join(TEST_DIR, "mixed-indented-plan.md") + writeFileSync(planPath, `# Plan +- [x] top-level completed task + - [ ] nested unchecked task +`) + + // when + const progress = getPlanProgress(planPath) + + // then + expect(progress.total).toBe(2) + expect(progress.completed).toBe(1) + expect(progress.isComplete).toBe(false) + }) + + test("should count space-indented completed checkbox", () => { + // given - plan file with a two-space indented completed checkbox + const planPath = join(TEST_DIR, "indented-completed-plan.md") + writeFileSync(planPath, `# Plan + - [x] indented completed task +`) + + // when + const progress = getPlanProgress(planPath) + + // then + expect(progress.total).toBe(1) + expect(progress.completed).toBe(1) + expect(progress.isComplete).toBe(true) + }) + test("should return isComplete true when all checked", () => { // given - all tasks completed const planPath = join(TEST_DIR, "complete-plan.md") diff --git a/src/features/boulder-state/storage.ts b/src/features/boulder-state/storage.ts index 2b0d1bdec..ab84368b7 100644 --- a/src/features/boulder-state/storage.ts +++ b/src/features/boulder-state/storage.ts @@ -121,8 +121,8 @@ export function getPlanProgress(planPath: string): PlanProgress { const content = readFileSync(planPath, "utf-8") // Match markdown checkboxes: - [ ] or - [x] or - [X] - const uncheckedMatches = content.match(/^[-*]\s*\[\s*\]/gm) || [] - const checkedMatches = content.match(/^[-*]\s*\[[xX]\]/gm) || [] + const uncheckedMatches = content.match(/^\s*[-*]\s*\[\s*\]/gm) || [] + const checkedMatches = content.match(/^\s*[-*]\s*\[[xX]\]/gm) || [] const total = uncheckedMatches.length + checkedMatches.length const completed = checkedMatches.length @@ -150,7 +150,8 @@ export function getPlanName(planPath: string): string { export function createBoulderState( planPath: string, sessionId: string, - agent?: string + agent?: string, + worktreePath?: string, ): BoulderState { return { active_plan: planPath, @@ -158,5 +159,6 @@ export function createBoulderState( session_ids: [sessionId], plan_name: getPlanName(planPath), ...(agent !== undefined ? { agent } : {}), + ...(worktreePath !== undefined ? { worktree_path: worktreePath } : {}), } } diff --git a/src/features/boulder-state/types.ts b/src/features/boulder-state/types.ts index f56dcdaa2..b1a225380 100644 --- a/src/features/boulder-state/types.ts +++ b/src/features/boulder-state/types.ts @@ -16,6 +16,8 @@ export interface BoulderState { plan_name: string /** Agent type to use when resuming (e.g., 'atlas') */ agent?: string + /** Absolute path to the git worktree root where work happens */ + worktree_path?: string } export interface PlanProgress { diff --git a/src/features/builtin-commands/templates/start-work.ts b/src/features/builtin-commands/templates/start-work.ts index 4db39be1b..98ffd1e21 100644 --- a/src/features/builtin-commands/templates/start-work.ts +++ b/src/features/builtin-commands/templates/start-work.ts @@ -1,5 +1,14 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session. +## ARGUMENTS + +- \`/start-work [plan-name] [--worktree ]\` + - \`plan-name\` (optional): name or partial match of the plan to start + - \`--worktree \` (optional): absolute path to an existing git worktree to work in + - If specified and valid: hook pre-sets worktree_path in boulder.json + - If specified but invalid: you must run \`git worktree add \` first + - If omitted: you MUST choose or create a worktree (see Worktree Setup below) + ## WHAT TO DO 1. **Find available plans**: Search for Prometheus-generated plan files at \`.sisyphus/plans/\` @@ -15,17 +24,24 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session. - If ONE plan: auto-select it - If MULTIPLE plans: show list with timestamps, ask user to select -4. **Create/Update boulder.json**: +4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json): + 1. \`git worktree list --porcelain\` — see available worktrees + 2. Create: \`git worktree add \` + 3. Update boulder.json to add \`"worktree_path": ""\` + 4. All work happens inside that worktree directory + +5. **Create/Update boulder.json**: \`\`\`json { "active_plan": "/absolute/path/to/plan.md", "started_at": "ISO_TIMESTAMP", "session_ids": ["session_id_1", "session_id_2"], - "plan_name": "plan-name" + "plan_name": "plan-name", + "worktree_path": "/absolute/path/to/git/worktree" } \`\`\` -5. **Read the plan file** and start executing tasks according to atlas workflow +6. **Read the plan file** and start executing tasks according to atlas workflow ## OUTPUT FORMAT @@ -49,6 +65,7 @@ Resuming Work Session Active Plan: {plan-name} Progress: {completed}/{total} tasks Sessions: {count} (appending current session) +Worktree: {worktree_path} Reading plan and continuing from last incomplete task... \`\`\` @@ -60,6 +77,7 @@ Starting Work Session Plan: {plan-name} Session ID: {session_id} Started: {timestamp} +Worktree: {worktree_path} Reading plan and beginning execution... \`\`\` @@ -68,5 +86,6 @@ Reading plan and beginning execution... - The session_id is injected by the hook - use it directly - Always update boulder.json BEFORE starting work +- Always set worktree_path in boulder.json before executing any tasks - Read the FULL plan file before delegating any tasks - Follow atlas delegation protocols (7-section format)` diff --git a/src/features/task-toast-manager/manager.test.ts b/src/features/task-toast-manager/manager.test.ts index 323792815..a490f894b 100644 --- a/src/features/task-toast-manager/manager.test.ts +++ b/src/features/task-toast-manager/manager.test.ts @@ -162,7 +162,7 @@ describe("TaskToastManager", () => { description: "Task with category default model", agent: "sisyphus-junior", isBackground: false, - modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const }, + modelInfo: { model: "google/gemini-3.1-pro", type: "category-default" as const }, } // when - addTask is called diff --git a/src/hooks/anthropic-context-window-limit-recovery/state.ts b/src/hooks/anthropic-context-window-limit-recovery/state.ts index 1ee1001fc..70fd69f53 100644 --- a/src/hooks/anthropic-context-window-limit-recovery/state.ts +++ b/src/hooks/anthropic-context-window-limit-recovery/state.ts @@ -6,7 +6,7 @@ export function getOrCreateRetryState( ): RetryState { let state = autoCompactState.retryStateBySession.get(sessionID) if (!state) { - state = { attempt: 0, lastAttemptTime: 0 } + state = { attempt: 0, lastAttemptTime: 0, firstAttemptTime: 0 } autoCompactState.retryStateBySession.set(sessionID, state) } return state diff --git a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.test.ts b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.test.ts new file mode 100644 index 000000000..fa0fb295d --- /dev/null +++ b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.test.ts @@ -0,0 +1,122 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test" +import { runSummarizeRetryStrategy } from "./summarize-retry-strategy" +import type { AutoCompactState, ParsedTokenLimitError, RetryState } from "./types" +import type { OhMyOpenCodeConfig } from "../../config" + +type TimeoutCall = { + delay: number +} + +function createAutoCompactState(): AutoCompactState { + return { + pendingCompact: new Set(), + errorDataBySession: new Map(), + retryStateBySession: new Map(), + truncateStateBySession: new Map(), + emptyContentAttemptBySession: new Map(), + compactionInProgress: new Set(), + } +} + +describe("runSummarizeRetryStrategy", () => { + const sessionID = "ses_retry_timeout" + const directory = "/tmp" + let autoCompactState: AutoCompactState + + const summarizeMock = mock(() => Promise.resolve()) + const showToastMock = mock(() => Promise.resolve()) + const client = { + session: { + summarize: summarizeMock, + messages: mock(() => Promise.resolve({ data: [] })), + promptAsync: mock(() => Promise.resolve()), + revert: mock(() => Promise.resolve()), + }, + tui: { + showToast: showToastMock, + }, + } + + beforeEach(() => { + autoCompactState = createAutoCompactState() + summarizeMock.mockReset() + showToastMock.mockReset() + summarizeMock.mockResolvedValue(undefined) + showToastMock.mockResolvedValue(undefined) + }) + + afterEach(() => { + globalThis.setTimeout = originalSetTimeout + }) + + const originalSetTimeout = globalThis.setTimeout + + test("stops retries when total summarize timeout is exceeded", async () => { + //#given + autoCompactState.pendingCompact.add(sessionID) + autoCompactState.errorDataBySession.set(sessionID, { + currentTokens: 250000, + maxTokens: 200000, + errorType: "token_limit_exceeded", + }) + autoCompactState.retryStateBySession.set(sessionID, { + attempt: 1, + lastAttemptTime: Date.now(), + firstAttemptTime: Date.now() - 130000, + }) + + //#when + await runSummarizeRetryStrategy({ + sessionID, + msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" }, + autoCompactState, + client: client as never, + directory, + pluginConfig: {} as OhMyOpenCodeConfig, + }) + + //#then + expect(summarizeMock).not.toHaveBeenCalled() + expect(autoCompactState.pendingCompact.has(sessionID)).toBe(false) + expect(autoCompactState.errorDataBySession.has(sessionID)).toBe(false) + expect(autoCompactState.retryStateBySession.has(sessionID)).toBe(false) + expect(showToastMock).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + title: "Auto Compact Timed Out", + }), + }), + ) + }) + + test("caps retry delay by remaining total timeout window", async () => { + //#given + const timeoutCalls: TimeoutCall[] = [] + globalThis.setTimeout = ((_: (...args: unknown[]) => void, delay?: number) => { + timeoutCalls.push({ delay: delay ?? 0 }) + return 1 as unknown as ReturnType + }) as typeof setTimeout + + autoCompactState.retryStateBySession.set(sessionID, { + attempt: 1, + lastAttemptTime: Date.now(), + firstAttemptTime: Date.now() - 119700, + }) + summarizeMock.mockRejectedValueOnce(new Error("rate limited")) + + //#when + await runSummarizeRetryStrategy({ + sessionID, + msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" }, + autoCompactState, + client: client as never, + directory, + pluginConfig: {} as OhMyOpenCodeConfig, + }) + + //#then + expect(timeoutCalls.length).toBe(1) + expect(timeoutCalls[0]!.delay).toBeGreaterThan(0) + expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(500) + }) +}) diff --git a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts index 1dc9e2852..008ff74a5 100644 --- a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts +++ b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts @@ -7,6 +7,8 @@ import { sanitizeEmptyMessagesBeforeSummarize } from "./message-builder" import { fixEmptyMessages } from "./empty-content-recovery" import { resolveCompactionModel } from "../shared/compaction-model-resolver" + +const SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS = 120_000 export async function runSummarizeRetryStrategy(params: { sessionID: string msg: Record @@ -18,6 +20,27 @@ export async function runSummarizeRetryStrategy(params: { messageIndex?: number }): Promise { const retryState = getOrCreateRetryState(params.autoCompactState, params.sessionID) + const now = Date.now() + + if (retryState.firstAttemptTime === 0) { + retryState.firstAttemptTime = now + } + + const elapsedTimeMs = now - retryState.firstAttemptTime + if (elapsedTimeMs >= SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS) { + clearSessionState(params.autoCompactState, params.sessionID) + await params.client.tui + .showToast({ + body: { + title: "Auto Compact Timed Out", + message: "Compaction retries exceeded the timeout window. Please start a new session.", + variant: "error", + duration: 5000, + }, + }) + .catch(() => {}) + return + } if (params.errorType?.includes("non-empty content")) { const attempt = getEmptyContentAttempt(params.autoCompactState, params.sessionID) @@ -52,6 +75,7 @@ export async function runSummarizeRetryStrategy(params: { if (Date.now() - retryState.lastAttemptTime > 300000) { retryState.attempt = 0 + retryState.firstAttemptTime = Date.now() params.autoCompactState.truncateStateBySession.delete(params.sessionID) } @@ -92,10 +116,26 @@ export async function runSummarizeRetryStrategy(params: { }) return } catch { + const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime) + if (remainingTimeMs <= 0) { + clearSessionState(params.autoCompactState, params.sessionID) + await params.client.tui + .showToast({ + body: { + title: "Auto Compact Timed Out", + message: "Compaction retries exceeded the timeout window. Please start a new session.", + variant: "error", + duration: 5000, + }, + }) + .catch(() => {}) + return + } + const delay = RETRY_CONFIG.initialDelayMs * Math.pow(RETRY_CONFIG.backoffFactor, retryState.attempt - 1) - const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs) + const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs, remainingTimeMs) setTimeout(() => { void runSummarizeRetryStrategy(params) diff --git a/src/hooks/anthropic-context-window-limit-recovery/types.ts b/src/hooks/anthropic-context-window-limit-recovery/types.ts index 40b31d064..5c62b81fb 100644 --- a/src/hooks/anthropic-context-window-limit-recovery/types.ts +++ b/src/hooks/anthropic-context-window-limit-recovery/types.ts @@ -11,6 +11,7 @@ export interface ParsedTokenLimitError { export interface RetryState { attempt: number lastAttemptTime: number + firstAttemptTime: number } export interface TruncateState { diff --git a/src/hooks/atlas/boulder-continuation-injector.ts b/src/hooks/atlas/boulder-continuation-injector.ts index 289668b4b..4f8e35802 100644 --- a/src/hooks/atlas/boulder-continuation-injector.ts +++ b/src/hooks/atlas/boulder-continuation-injector.ts @@ -14,6 +14,7 @@ export async function injectBoulderContinuation(input: { remaining: number total: number agent?: string + worktreePath?: string backgroundManager?: BackgroundManager sessionState: SessionState }): Promise { @@ -24,6 +25,7 @@ export async function injectBoulderContinuation(input: { remaining, total, agent, + worktreePath, backgroundManager, sessionState, } = input @@ -37,9 +39,11 @@ export async function injectBoulderContinuation(input: { return } + const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : "" const prompt = BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) + - `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` + `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` + + worktreeContext try { log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining }) @@ -62,6 +66,7 @@ export async function injectBoulderContinuation(input: { log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID }) } catch (err) { sessionState.promptFailureCount += 1 + sessionState.lastFailureAt = Date.now() log(`[${HOOK_NAME}] Boulder continuation failed`, { sessionID, error: String(err), diff --git a/src/hooks/atlas/event-handler.ts b/src/hooks/atlas/event-handler.ts index 76a3a5004..0f7187fc4 100644 --- a/src/hooks/atlas/event-handler.ts +++ b/src/hooks/atlas/event-handler.ts @@ -10,6 +10,7 @@ import { getLastAgentFromSession } from "./session-last-agent" import type { AtlasHookOptions, SessionState } from "./types" const CONTINUATION_COOLDOWN_MS = 5000 +const FAILURE_BACKOFF_MS = 5 * 60 * 1000 export function createAtlasEventHandler(input: { ctx: PluginInput @@ -53,6 +54,7 @@ export function createAtlasEventHandler(input: { } const state = getState(sessionID) + const now = Date.now() if (state.lastEventWasAbortError) { state.lastEventWasAbortError = false @@ -61,11 +63,18 @@ export function createAtlasEventHandler(input: { } if (state.promptFailureCount >= 2) { - log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, { - sessionID, - promptFailureCount: state.promptFailureCount, - }) - return + const timeSinceLastFailure = state.lastFailureAt !== undefined ? now - state.lastFailureAt : Number.POSITIVE_INFINITY + if (timeSinceLastFailure < FAILURE_BACKOFF_MS) { + log(`[${HOOK_NAME}] Skipped: continuation in backoff after repeated failures`, { + sessionID, + promptFailureCount: state.promptFailureCount, + backoffRemaining: FAILURE_BACKOFF_MS - timeSinceLastFailure, + }) + return + } + + state.promptFailureCount = 0 + state.lastFailureAt = undefined } const backgroundManager = options?.backgroundManager @@ -92,17 +101,15 @@ export function createAtlasEventHandler(input: { const lastAgentKey = getAgentConfigKey(lastAgent ?? "") const requiredAgent = getAgentConfigKey(boulderState.agent ?? "atlas") const lastAgentMatchesRequired = lastAgentKey === requiredAgent - const boulderAgentWasNotExplicitlySet = boulderState.agent === undefined const boulderAgentDefaultsToAtlas = requiredAgent === "atlas" const lastAgentIsSisyphus = lastAgentKey === "sisyphus" - const allowSisyphusWhenDefaultAtlas = boulderAgentWasNotExplicitlySet && boulderAgentDefaultsToAtlas && lastAgentIsSisyphus - const agentMatches = lastAgentMatchesRequired || allowSisyphusWhenDefaultAtlas + const allowSisyphusForAtlasBoulder = boulderAgentDefaultsToAtlas && lastAgentIsSisyphus + const agentMatches = lastAgentMatchesRequired || allowSisyphusForAtlasBoulder if (!agentMatches) { log(`[${HOOK_NAME}] Skipped: last agent does not match boulder agent`, { sessionID, lastAgent: lastAgent ?? "unknown", requiredAgent, - boulderAgentExplicitlySet: boulderState.agent !== undefined, }) return } @@ -113,7 +120,6 @@ export function createAtlasEventHandler(input: { return } - const now = Date.now() if (state.lastContinuationInjectedAt && now - state.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) { log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, { sessionID, @@ -132,6 +138,7 @@ export function createAtlasEventHandler(input: { remaining, total: progress.total, agent: boulderState.agent, + worktreePath: boulderState.worktree_path, backgroundManager, sessionState: state, }) diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts index 065f20b9e..36f308270 100644 --- a/src/hooks/atlas/index.test.ts +++ b/src/hooks/atlas/index.test.ts @@ -933,8 +933,8 @@ describe("atlas hook", () => { expect(callArgs.body.parts[0].text).toContain("2 remaining") }) - test("should not inject when last agent does not match boulder agent", async () => { - // given - boulder state with incomplete plan, but last agent does NOT match + test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => { + // given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work) const planPath = join(TEST_DIR, "test-plan.md") writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") @@ -947,7 +947,7 @@ describe("atlas hook", () => { } writeBoulderState(TEST_DIR, state) - // given - last agent is NOT the boulder agent + // given - last agent is sisyphus (typical state right after /start-work) cleanupMessageStorage(MAIN_SESSION_ID) setupMessageStorage(MAIN_SESSION_ID, "sisyphus") @@ -962,7 +962,39 @@ describe("atlas hook", () => { }, }) - // then - should NOT call prompt because agent does not match + // then - should call prompt because sisyphus is always allowed for atlas boulders + expect(mockInput._promptMock).toHaveBeenCalled() + }) + + test("should not inject when last agent is non-sisyphus and does not match boulder agent", async () => { + // given - boulder explicitly set to atlas, last agent is hephaestus (unrelated agent) + const planPath = join(TEST_DIR, "test-plan.md") + writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [MAIN_SESSION_ID], + plan_name: "test-plan", + agent: "atlas", + } + writeBoulderState(TEST_DIR, state) + + cleanupMessageStorage(MAIN_SESSION_ID) + setupMessageStorage(MAIN_SESSION_ID, "hephaestus") + + const mockInput = createMockPluginInput() + const hook = createAtlasHook(mockInput) + + // when + await hook.handler({ + event: { + type: "session.idle", + properties: { sessionID: MAIN_SESSION_ID }, + }, + }) + + // then - should NOT call prompt because hephaestus does not match atlas or sisyphus expect(mockInput._promptMock).not.toHaveBeenCalled() }) @@ -1122,6 +1154,144 @@ describe("atlas hook", () => { } }) + test("should keep skipping continuation during 5-minute backoff after 2 consecutive failures", async () => { + //#given - boulder state with incomplete plan and prompt always fails + const planPath = join(TEST_DIR, "test-plan.md") + writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [MAIN_SESSION_ID], + plan_name: "test-plan", + } + writeBoulderState(TEST_DIR, state) + + const promptMock = mock(() => Promise.reject(new Error("Bad Request"))) + const mockInput = createMockPluginInput({ promptMock }) + const hook = createAtlasHook(mockInput) + + const originalDateNow = Date.now + let now = 0 + Date.now = () => now + + try { + //#when - third idle occurs inside 5-minute backoff window + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 6000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 60000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + + //#then - third attempt should still be skipped + expect(promptMock).toHaveBeenCalledTimes(2) + } finally { + Date.now = originalDateNow + } + }) + + test("should retry continuation after 5-minute backoff expires following 2 consecutive failures", async () => { + //#given - boulder state with incomplete plan and prompt always fails + const planPath = join(TEST_DIR, "test-plan.md") + writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [MAIN_SESSION_ID], + plan_name: "test-plan", + } + writeBoulderState(TEST_DIR, state) + + const promptMock = mock(() => Promise.reject(new Error("Bad Request"))) + const mockInput = createMockPluginInput({ promptMock }) + const hook = createAtlasHook(mockInput) + + const originalDateNow = Date.now + let now = 0 + Date.now = () => now + + try { + //#when - third idle occurs after 5+ minutes + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 6000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 300000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + + //#then - third attempt should run after backoff expiration + expect(promptMock).toHaveBeenCalledTimes(3) + } finally { + Date.now = originalDateNow + } + }) + + test("should reset prompt failure counter after successful retry beyond backoff window", async () => { + //#given - boulder state with incomplete plan and success on first retry after backoff + const planPath = join(TEST_DIR, "test-plan.md") + writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [MAIN_SESSION_ID], + plan_name: "test-plan", + } + writeBoulderState(TEST_DIR, state) + + const promptMock = mock((): Promise => Promise.reject(new Error("Bad Request"))) + promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request"))) + promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request"))) + promptMock.mockImplementationOnce(() => Promise.resolve(undefined)) + const mockInput = createMockPluginInput({ promptMock }) + const hook = createAtlasHook(mockInput) + + const originalDateNow = Date.now + let now = 0 + Date.now = () => now + + try { + //#when - fail twice, recover after backoff with success, then fail twice again + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 6000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 300000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 6000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 6000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + now += 6000 + + await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) + await flushMicrotasks() + + //#then - success retry resets counter, so two additional failures are allowed before skip + expect(promptMock).toHaveBeenCalledTimes(5) + } finally { + Date.now = originalDateNow + } + }) + test("should reset continuation failure state on session.compacted event", async () => { //#given - boulder state with incomplete plan and prompt always fails const planPath = join(TEST_DIR, "test-plan.md") diff --git a/src/hooks/atlas/types.ts b/src/hooks/atlas/types.ts index e1919cd2a..7302f8307 100644 --- a/src/hooks/atlas/types.ts +++ b/src/hooks/atlas/types.ts @@ -26,4 +26,5 @@ export interface SessionState { lastEventWasAbortError?: boolean lastContinuationInjectedAt?: number promptFailureCount: number + lastFailureAt?: number } diff --git a/src/hooks/background-notification/hook.ts b/src/hooks/background-notification/hook.ts index f417bdbad..3f40ffadb 100644 --- a/src/hooks/background-notification/hook.ts +++ b/src/hooks/background-notification/hook.ts @@ -9,6 +9,14 @@ interface EventInput { event: Event } +interface ChatMessageInput { + sessionID: string +} + +interface ChatMessageOutput { + parts: Array<{ type: string; text?: string; [key: string]: unknown }> +} + /** * Background notification hook - handles event routing to BackgroundManager. * @@ -20,7 +28,15 @@ export function createBackgroundNotificationHook(manager: BackgroundManager) { manager.handleEvent(event) } + const chatMessageHandler = async ( + input: ChatMessageInput, + output: ChatMessageOutput, + ): Promise => { + manager.injectPendingNotificationsIntoChatMessage(output, input.sessionID) + } + return { + "chat.message": chatMessageHandler, event: eventHandler, } } diff --git a/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts b/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts index 428d6bbaa..20db3906a 100644 --- a/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts +++ b/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts @@ -6,6 +6,7 @@ import { import { OMO_SESSION_PREFIX, buildSessionReminderMessage } from "./constants"; import type { InteractiveBashSessionState } from "./types"; import { subagentSessions } from "../../features/claude-code-session-state"; +import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"; type AbortSession = (args: { path: { id: string } }) => Promise @@ -19,7 +20,7 @@ async function killAllTrackedSessions( ): Promise { for (const sessionName of state.tmuxSessions) { try { - const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], { + const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], { stdout: "ignore", stderr: "ignore", }) diff --git a/src/hooks/interactive-bash-session/state-manager.ts b/src/hooks/interactive-bash-session/state-manager.ts index e655bfafd..c3a286421 100644 --- a/src/hooks/interactive-bash-session/state-manager.ts +++ b/src/hooks/interactive-bash-session/state-manager.ts @@ -1,6 +1,7 @@ import type { InteractiveBashSessionState } from "./types"; import { loadInteractiveBashSessionState } from "./storage"; import { OMO_SESSION_PREFIX } from "./constants"; +import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"; export function getOrCreateState(sessionID: string, sessionStates: Map): InteractiveBashSessionState { if (!sessionStates.has(sessionID)) { @@ -24,7 +25,7 @@ export async function killAllTrackedSessions( ): Promise { for (const sessionName of state.tmuxSessions) { try { - const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], { + const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], { stdout: "ignore", stderr: "ignore", }); diff --git a/src/hooks/no-hephaestus-non-gpt/hook.ts b/src/hooks/no-hephaestus-non-gpt/hook.ts index a1d08a2a1..e621c6d01 100644 --- a/src/hooks/no-hephaestus-non-gpt/hook.ts +++ b/src/hooks/no-hephaestus-non-gpt/hook.ts @@ -12,12 +12,16 @@ const TOAST_MESSAGE = [ ].join("\n") const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus") -function showToast(ctx: PluginInput, sessionID: string): void { +type NoHephaestusNonGptHookOptions = { + allowNonGptModel?: boolean +} + +function showToast(ctx: PluginInput, sessionID: string, variant: "error" | "warning"): void { ctx.client.tui.showToast({ body: { title: TOAST_TITLE, message: TOAST_MESSAGE, - variant: "error", + variant, duration: 10000, }, }).catch((error) => { @@ -28,7 +32,10 @@ function showToast(ctx: PluginInput, sessionID: string): void { }) } -export function createNoHephaestusNonGptHook(ctx: PluginInput) { +export function createNoHephaestusNonGptHook( + ctx: PluginInput, + options?: NoHephaestusNonGptHookOptions, +) { return { "chat.message": async (input: { sessionID: string @@ -40,9 +47,13 @@ export function createNoHephaestusNonGptHook(ctx: PluginInput) { const rawAgent = input.agent ?? getSessionAgent(input.sessionID) ?? "" const agentKey = getAgentConfigKey(rawAgent) const modelID = input.model?.modelID + const allowNonGptModel = options?.allowNonGptModel === true if (agentKey === "hephaestus" && modelID && !isGptModel(modelID)) { - showToast(ctx, input.sessionID) + showToast(ctx, input.sessionID, allowNonGptModel ? "warning" : "error") + if (allowNonGptModel) { + return + } input.agent = SISYPHUS_DISPLAY if (output?.message) { output.message.agent = SISYPHUS_DISPLAY diff --git a/src/hooks/no-hephaestus-non-gpt/index.test.ts b/src/hooks/no-hephaestus-non-gpt/index.test.ts index 51e1f3a0a..3440cccc8 100644 --- a/src/hooks/no-hephaestus-non-gpt/index.test.ts +++ b/src/hooks/no-hephaestus-non-gpt/index.test.ts @@ -1,3 +1,5 @@ +/// + import { describe, expect, spyOn, test } from "bun:test" import { _resetForTesting, updateSessionAgent } from "../../features/claude-code-session-state" import { getAgentDisplayName } from "../../shared/agent-display-names" @@ -8,7 +10,7 @@ const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus") function createOutput() { return { - message: {}, + message: {} as { agent?: string; [key: string]: unknown }, parts: [], } } @@ -16,7 +18,7 @@ function createOutput() { describe("no-hephaestus-non-gpt hook", () => { test("shows toast on every chat.message when hephaestus uses non-gpt model", async () => { // given - hephaestus with claude model - const showToast = spyOn({ fn: async () => ({}) }, "fn") + const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn") const hook = createNoHephaestusNonGptHook({ client: { tui: { showToast } }, } as any) @@ -49,9 +51,38 @@ describe("no-hephaestus-non-gpt hook", () => { }) }) + test("shows warning and does not switch agent when allow_non_gpt_model is enabled", async () => { + // given - hephaestus with claude model and opt-out enabled + const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn") + const hook = createNoHephaestusNonGptHook({ + client: { tui: { showToast } }, + } as any, { + allowNonGptModel: true, + }) + + const output = createOutput() + + // when - chat.message runs + await hook["chat.message"]?.({ + sessionID: "ses_opt_out", + agent: HEPHAESTUS_DISPLAY, + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + }, output) + + // then - warning toast is shown but agent is not switched + expect(showToast).toHaveBeenCalledTimes(1) + expect(output.message.agent).toBeUndefined() + expect(showToast.mock.calls[0]?.[0]).toMatchObject({ + body: { + title: "NEVER Use Hephaestus with Non-GPT", + variant: "warning", + }, + }) + }) + test("does not show toast when hephaestus uses gpt model", async () => { // given - hephaestus with gpt model - const showToast = spyOn({ fn: async () => ({}) }, "fn") + const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn") const hook = createNoHephaestusNonGptHook({ client: { tui: { showToast } }, } as any) @@ -72,7 +103,7 @@ describe("no-hephaestus-non-gpt hook", () => { test("does not show toast for non-hephaestus agent", async () => { // given - sisyphus with claude model (non-gpt) - const showToast = spyOn({ fn: async () => ({}) }, "fn") + const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn") const hook = createNoHephaestusNonGptHook({ client: { tui: { showToast } }, } as any) @@ -95,7 +126,7 @@ describe("no-hephaestus-non-gpt hook", () => { // given - session agent saved as hephaestus _resetForTesting() updateSessionAgent("ses_4", HEPHAESTUS_DISPLAY) - const showToast = spyOn({ fn: async () => ({}) }, "fn") + const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn") const hook = createNoHephaestusNonGptHook({ client: { tui: { showToast } }, } as any) diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts index 12912b3a8..279562aa6 100644 --- a/src/hooks/preemptive-compaction.test.ts +++ b/src/hooks/preemptive-compaction.test.ts @@ -45,6 +45,23 @@ function createMockCtx() { } } +function setupImmediateTimeouts(): () => void { + const originalSetTimeout = globalThis.setTimeout + const originalClearTimeout = globalThis.clearTimeout + + globalThis.setTimeout = ((callback: (...args: unknown[]) => void, _delay?: number, ...args: unknown[]) => { + callback(...args) + return 1 as unknown as ReturnType + }) as typeof setTimeout + + globalThis.clearTimeout = (() => {}) as typeof clearTimeout + + return () => { + globalThis.setTimeout = originalSetTimeout + globalThis.clearTimeout = originalClearTimeout + } +} + describe("preemptive-compaction", () => { let ctx: ReturnType @@ -63,7 +80,7 @@ describe("preemptive-compaction", () => { // #when tool.execute.after is called // #then session.messages() should NOT be called it("should use cached token info instead of fetching session.messages()", async () => { - const hook = createPreemptiveCompactionHook(ctx as never) + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) const sessionID = "ses_test1" // Simulate message.updated with token info below threshold @@ -101,7 +118,7 @@ describe("preemptive-compaction", () => { // #when tool.execute.after is called // #then should skip without fetching it("should skip gracefully when no cached token info exists", async () => { - const hook = createPreemptiveCompactionHook(ctx as never) + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) const output = { title: "", output: "test", metadata: null } await hook["tool.execute.after"]( @@ -116,7 +133,7 @@ describe("preemptive-compaction", () => { // #when tool.execute.after runs // #then should trigger summarize it("should trigger compaction when usage exceeds threshold", async () => { - const hook = createPreemptiveCompactionHook(ctx as never) + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) const sessionID = "ses_high" // 170K input + 10K cache = 180K → 90% of 200K @@ -153,7 +170,7 @@ describe("preemptive-compaction", () => { it("should trigger compaction for google-vertex-anthropic provider", async () => { //#given google-vertex-anthropic usage above threshold - const hook = createPreemptiveCompactionHook(ctx as never) + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) const sessionID = "ses_vertex_anthropic_high" await hook.event({ @@ -191,7 +208,7 @@ describe("preemptive-compaction", () => { // #given session deleted // #then cache should be cleaned up it("should clean up cache on session.deleted", async () => { - const hook = createPreemptiveCompactionHook(ctx as never) + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) const sessionID = "ses_del" await hook.event({ @@ -228,7 +245,7 @@ describe("preemptive-compaction", () => { it("should log summarize errors instead of swallowing them", async () => { //#given - const hook = createPreemptiveCompactionHook(ctx as never) + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) const sessionID = "ses_log_error" const summarizeError = new Error("summarize failed") ctx.client.session.summarize.mockRejectedValueOnce(summarizeError) @@ -343,4 +360,58 @@ describe("preemptive-compaction", () => { //#then expect(ctx.client.session.summarize).not.toHaveBeenCalled() }) + + it("should clear in-progress lock when summarize times out", async () => { + //#given + const restoreTimeouts = setupImmediateTimeouts() + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) + const sessionID = "ses_timeout" + + ctx.client.session.summarize + .mockImplementationOnce(() => new Promise(() => {})) + .mockResolvedValueOnce({}) + + try { + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + modelID: "claude-sonnet-4-6", + finish: true, + tokens: { + input: 170000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + //#when + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_timeout_1" }, + { title: "", output: "test", metadata: null }, + ) + + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_timeout_2" }, + { title: "", output: "test", metadata: null }, + ) + + //#then + expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2) + expect(logMock).toHaveBeenCalledWith("[preemptive-compaction] Compaction failed", { + sessionID, + error: expect.stringContaining("Compaction summarize timed out"), + }) + } finally { + restoreTimeouts() + } + }) }) diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts index e2ac74718..d6c9bf130 100644 --- a/src/hooks/preemptive-compaction.ts +++ b/src/hooks/preemptive-compaction.ts @@ -3,6 +3,7 @@ import type { OhMyOpenCodeConfig } from "../config" import { resolveCompactionModel } from "./shared/compaction-model-resolver" const DEFAULT_ACTUAL_LIMIT = 200_000 +const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000 type ModelCacheStateLike = { anthropicContext1MEnabled: boolean @@ -31,6 +32,26 @@ interface CachedCompactionState { tokens: TokenInfo } +function withTimeout( + promise: Promise, + timeoutMs: number, + errorMessage: string, +): Promise { + let timeoutID: ReturnType | undefined + + const timeoutPromise = new Promise((_, reject) => { + timeoutID = setTimeout(() => { + reject(new Error(errorMessage)) + }, timeoutMs) + }) + + return Promise.race([promise, timeoutPromise]).finally(() => { + if (timeoutID !== undefined) { + clearTimeout(timeoutID) + } + }) +} + function isAnthropicProvider(providerID: string): boolean { return providerID === "anthropic" || providerID === "google-vertex-anthropic" } @@ -94,11 +115,15 @@ export function createPreemptiveCompactionHook( modelID ) - await ctx.client.session.summarize({ - path: { id: sessionID }, - body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never, - query: { directory: ctx.directory }, - }) + await withTimeout( + ctx.client.session.summarize({ + path: { id: sessionID }, + body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never, + query: { directory: ctx.directory }, + }), + PREEMPTIVE_COMPACTION_TIMEOUT_MS, + `Compaction summarize timed out after ${PREEMPTIVE_COMPACTION_TIMEOUT_MS}ms`, + ) compactedSessions.add(sessionID) } catch (error) { diff --git a/src/hooks/ralph-loop/completion-promise-detector.ts b/src/hooks/ralph-loop/completion-promise-detector.ts index d2b89b10c..95a43c289 100644 --- a/src/hooks/ralph-loop/completion-promise-detector.ts +++ b/src/hooks/ralph-loop/completion-promise-detector.ts @@ -79,8 +79,8 @@ export async function detectCompletionInSessionMessages( if (assistantMessages.length === 0) return false const pattern = buildPromisePattern(options.promise) - const recentAssistants = assistantMessages.slice(-3) - for (const assistant of recentAssistants) { + for (let index = assistantMessages.length - 1; index >= 0; index -= 1) { + const assistant = assistantMessages[index] if (!assistant.parts) continue let responseText = "" diff --git a/src/hooks/ralph-loop/index.test.ts b/src/hooks/ralph-loop/index.test.ts index 994773229..8492ec6ae 100644 --- a/src/hooks/ralph-loop/index.test.ts +++ b/src/hooks/ralph-loop/index.test.ts @@ -494,6 +494,7 @@ describe("ralph-loop", () => { config: { enabled: true, default_max_iterations: 200, + default_strategy: "continue", }, }) @@ -708,6 +709,57 @@ describe("ralph-loop", () => { expect(promptCalls[0].text).toContain("CALCULATOR_DONE") }) + test("should skip concurrent idle events for same session when handler is in flight", async () => { + // given - active loop with delayed prompt injection + let releasePromptAsync: (() => void) | undefined + const promptAsyncBlocked = new Promise((resolve) => { + releasePromptAsync = resolve + }) + let firstPromptStartedResolve: (() => void) | undefined + const firstPromptStarted = new Promise((resolve) => { + firstPromptStartedResolve = resolve + }) + + const mockInput = createMockPluginInput() as { + client: { + session: { + promptAsync: (opts: { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } }) => Promise + } + } + } + + const originalPromptAsync = mockInput.client.session.promptAsync + let promptAsyncCalls = 0 + mockInput.client.session.promptAsync = async (opts) => { + promptAsyncCalls += 1 + if (promptAsyncCalls === 1) { + firstPromptStartedResolve?.() + } + await promptAsyncBlocked + return originalPromptAsync(opts) + } + + const hook = createRalphLoopHook(mockInput as Parameters[0]) + hook.startLoop("session-123", "Build feature", { maxIterations: 10 }) + + // when - second idle arrives while first idle processing is still in flight + const firstIdle = hook.event({ + event: { type: "session.idle", properties: { sessionID: "session-123" } }, + }) + await firstPromptStarted + const secondIdle = hook.event({ + event: { type: "session.idle", properties: { sessionID: "session-123" } }, + }) + + releasePromptAsync?.() + await Promise.all([firstIdle, secondIdle]) + + // then - only one continuation should be injected + expect(promptAsyncCalls).toBe(1) + expect(promptCalls.length).toBe(1) + expect(hook.getState()?.iteration).toBe(2) + }) + test("should clear loop state on user abort (MessageAbortedError)", async () => { // given - active loop const hook = createRalphLoopHook(createMockPluginInput()) @@ -782,8 +834,8 @@ describe("ralph-loop", () => { expect(hook.getState()).toBeNull() }) - test("should NOT detect completion if promise is older than last 3 assistant messages", async () => { - // given - promise appears in an assistant message older than last 3 + test("should detect completion even when promise is older than previous narrow window", async () => { + // given - promise appears in an older assistant message with additional assistant output after it mockSessionMessages = [ { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, { info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early DONE" }] }, @@ -801,9 +853,40 @@ describe("ralph-loop", () => { event: { type: "session.idle", properties: { sessionID: "session-123" } }, }) - // then - loop should continue (promise is older than last 3 assistant messages) - expect(promptCalls.length).toBe(1) - expect(hook.getState()?.iteration).toBe(2) + // then - loop should complete because all assistant messages are scanned + expect(promptCalls.length).toBe(0) + expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true) + expect(hook.getState()).toBeNull() + }) + + test("should detect completion when many assistant messages are emitted after promise", async () => { + // given - completion promise followed by long assistant output sequence + mockSessionMessages = [ + { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Done now DONE" }] }, + ] + + for (let index = 1; index <= 25; index += 1) { + mockSessionMessages.push({ + info: { role: "assistant" }, + parts: [{ type: "text", text: `Post-completion assistant output ${index}` }], + }) + } + + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), + }) + hook.startLoop("session-123", "Build something", { completionPromise: "DONE" }) + + // when - session goes idle + await hook.event({ + event: { type: "session.idle", properties: { sessionID: "session-123" } }, + }) + + // then - loop should complete despite large trailing output + expect(promptCalls.length).toBe(0) + expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true) + expect(hook.getState()).toBeNull() }) test("should allow starting new loop while previous loop is active (different session)", async () => { diff --git a/src/hooks/ralph-loop/iteration-continuation.ts b/src/hooks/ralph-loop/iteration-continuation.ts index 15fea10a9..be067b76c 100644 --- a/src/hooks/ralph-loop/iteration-continuation.ts +++ b/src/hooks/ralph-loop/iteration-continuation.ts @@ -33,15 +33,6 @@ export async function continueIteration( return } - const boundState = options.loopState.setSessionID(newSessionID) - if (!boundState) { - log(`[${HOOK_NAME}] Failed to bind loop state to new session`, { - previousSessionID: options.previousSessionID, - newSessionID, - }) - return - } - await injectContinuationPrompt(ctx, { sessionID: newSessionID, inheritFromSessionID: options.previousSessionID, @@ -51,6 +42,16 @@ export async function continueIteration( }) await selectSessionInTui(ctx.client, newSessionID) + + const boundState = options.loopState.setSessionID(newSessionID) + if (!boundState) { + log(`[${HOOK_NAME}] Failed to bind loop state to new session`, { + previousSessionID: options.previousSessionID, + newSessionID, + }) + return + } + return } diff --git a/src/hooks/ralph-loop/ralph-loop-event-handler.ts b/src/hooks/ralph-loop/ralph-loop-event-handler.ts index b0fa5ed71..7d86d79eb 100644 --- a/src/hooks/ralph-loop/ralph-loop-event-handler.ts +++ b/src/hooks/ralph-loop/ralph-loop-event-handler.ts @@ -25,6 +25,8 @@ export function createRalphLoopEventHandler( ctx: PluginInput, options: RalphLoopEventHandlerOptions, ) { + const inFlightSessions = new Set() + return async ({ event }: { event: { type: string; properties?: unknown } }): Promise => { const props = event.properties as Record | undefined @@ -32,115 +34,127 @@ export function createRalphLoopEventHandler( const sessionID = props?.sessionID as string | undefined if (!sessionID) return - if (options.sessionRecovery.isRecovering(sessionID)) { - log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID }) + if (inFlightSessions.has(sessionID)) { + log(`[${HOOK_NAME}] Skipped: handler in flight`, { sessionID }) return } - const state = options.loopState.getState() - if (!state || !state.active) { - return - } - - if (state.session_id && state.session_id !== sessionID) { - if (options.checkSessionExists) { - try { - const exists = await options.checkSessionExists(state.session_id) - if (!exists) { - options.loopState.clear() - log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, { - orphanedSessionId: state.session_id, - currentSessionId: sessionID, - }) - return - } - } catch (err) { - log(`[${HOOK_NAME}] Failed to check session existence`, { - sessionId: state.session_id, - error: String(err), - }) - } - } - return - } - - const transcriptPath = options.getTranscriptPath(sessionID) - const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise) - const completionViaApi = completionViaTranscript - ? false - : await detectCompletionInSessionMessages(ctx, { - sessionID, - promise: state.completion_promise, - apiTimeoutMs: options.apiTimeoutMs, - directory: options.directory, - }) - - if (completionViaTranscript || completionViaApi) { - log(`[${HOOK_NAME}] Completion detected!`, { - sessionID, - iteration: state.iteration, - promise: state.completion_promise, - detectedVia: completionViaTranscript - ? "transcript_file" - : "session_messages_api", - }) - options.loopState.clear() - - const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!" - const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)` - await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {}) - return - } - - if (state.iteration >= state.max_iterations) { - log(`[${HOOK_NAME}] Max iterations reached`, { - sessionID, - iteration: state.iteration, - max: state.max_iterations, - }) - options.loopState.clear() - - await ctx.client.tui?.showToast?.({ - body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 }, - }).catch(() => {}) - return - } - - const newState = options.loopState.incrementIteration() - if (!newState) { - log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID }) - return - } - - log(`[${HOOK_NAME}] Continuing loop`, { - sessionID, - iteration: newState.iteration, - max: newState.max_iterations, - }) - - await ctx.client.tui?.showToast?.({ - body: { - title: "Ralph Loop", - message: `Iteration ${newState.iteration}/${newState.max_iterations}`, - variant: "info", - duration: 2000, - }, - }).catch(() => {}) + inFlightSessions.add(sessionID) try { - await continueIteration(ctx, newState, { - previousSessionID: sessionID, - directory: options.directory, - apiTimeoutMs: options.apiTimeoutMs, - loopState: options.loopState, - }) - } catch (err) { - log(`[${HOOK_NAME}] Failed to inject continuation`, { + + if (options.sessionRecovery.isRecovering(sessionID)) { + log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID }) + return + } + + const state = options.loopState.getState() + if (!state || !state.active) { + return + } + + if (state.session_id && state.session_id !== sessionID) { + if (options.checkSessionExists) { + try { + const exists = await options.checkSessionExists(state.session_id) + if (!exists) { + options.loopState.clear() + log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, { + orphanedSessionId: state.session_id, + currentSessionId: sessionID, + }) + return + } + } catch (err) { + log(`[${HOOK_NAME}] Failed to check session existence`, { + sessionId: state.session_id, + error: String(err), + }) + } + } + return + } + + const transcriptPath = options.getTranscriptPath(sessionID) + const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise) + const completionViaApi = completionViaTranscript + ? false + : await detectCompletionInSessionMessages(ctx, { + sessionID, + promise: state.completion_promise, + apiTimeoutMs: options.apiTimeoutMs, + directory: options.directory, + }) + + if (completionViaTranscript || completionViaApi) { + log(`[${HOOK_NAME}] Completion detected!`, { + sessionID, + iteration: state.iteration, + promise: state.completion_promise, + detectedVia: completionViaTranscript + ? "transcript_file" + : "session_messages_api", + }) + options.loopState.clear() + + const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!" + const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)` + await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {}) + return + } + + if (state.iteration >= state.max_iterations) { + log(`[${HOOK_NAME}] Max iterations reached`, { + sessionID, + iteration: state.iteration, + max: state.max_iterations, + }) + options.loopState.clear() + + await ctx.client.tui?.showToast?.({ + body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 }, + }).catch(() => {}) + return + } + + const newState = options.loopState.incrementIteration() + if (!newState) { + log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID }) + return + } + + log(`[${HOOK_NAME}] Continuing loop`, { sessionID, - error: String(err), + iteration: newState.iteration, + max: newState.max_iterations, }) + + await ctx.client.tui?.showToast?.({ + body: { + title: "Ralph Loop", + message: `Iteration ${newState.iteration}/${newState.max_iterations}`, + variant: "info", + duration: 2000, + }, + }).catch(() => {}) + + try { + await continueIteration(ctx, newState, { + previousSessionID: sessionID, + directory: options.directory, + apiTimeoutMs: options.apiTimeoutMs, + loopState: options.loopState, + }) + } catch (err) { + log(`[${HOOK_NAME}] Failed to inject continuation`, { + sessionID, + error: String(err), + }) + } + return + } finally { + inFlightSessions.delete(sessionID) } - return } if (event.type === "session.deleted") { diff --git a/src/hooks/ralph-loop/reset-strategy-race-condition.test.ts b/src/hooks/ralph-loop/reset-strategy-race-condition.test.ts new file mode 100644 index 000000000..5fcd35a2e --- /dev/null +++ b/src/hooks/ralph-loop/reset-strategy-race-condition.test.ts @@ -0,0 +1,111 @@ +/// +import { describe, expect, test } from "bun:test" +import { createRalphLoopHook } from "./index" + +function createDeferred(): { + promise: Promise + resolve: () => void +} { + let resolvePromise: (() => void) | null = null + const promise = new Promise((resolve) => { + resolvePromise = resolve + }) + + return { + promise, + resolve: () => { + if (resolvePromise) { + resolvePromise() + } + }, + } +} + +async function waitUntil(condition: () => boolean): Promise { + for (let index = 0; index < 100; index++) { + if (condition()) { + return + } + + await new Promise((resolve) => { + setTimeout(resolve, 0) + }) + } + + throw new Error("Condition was not met in time") +} + +describe("ralph-loop reset strategy race condition", () => { + test("should skip duplicate idle while reset iteration handling is in flight", async () => { + // given - reset strategy loop with blocked TUI session switch + const promptCalls: Array<{ sessionID: string; text: string }> = [] + const createSessionCalls: Array<{ parentID?: string }> = [] + let selectSessionCalls = 0 + const selectSessionDeferred = createDeferred() + + const hook = createRalphLoopHook({ + directory: process.cwd(), + client: { + session: { + prompt: async (options: { + path: { id: string } + body: { parts: Array<{ type: string; text: string }> } + }) => { + promptCalls.push({ + sessionID: options.path.id, + text: options.body.parts[0].text, + }) + return {} + }, + promptAsync: async (options: { + path: { id: string } + body: { parts: Array<{ type: string; text: string }> } + }) => { + promptCalls.push({ + sessionID: options.path.id, + text: options.body.parts[0].text, + }) + return {} + }, + create: async (options: { + body: { parentID?: string; title?: string } + query?: { directory?: string } + }) => { + createSessionCalls.push({ parentID: options.body.parentID }) + return { data: { id: `new-session-${createSessionCalls.length}` } } + }, + messages: async () => ({ data: [] }), + }, + tui: { + showToast: async () => ({}), + selectSession: async () => { + selectSessionCalls += 1 + await selectSessionDeferred.promise + return {} + }, + }, + }, + } as unknown as Parameters[0]) + + hook.startLoop("session-old", "Build feature", { strategy: "reset" }) + + // when - first idle is in-flight and old session fires idle again before TUI switch resolves + const firstIdleEvent = hook.event({ + event: { type: "session.idle", properties: { sessionID: "session-old" } }, + }) + + await waitUntil(() => selectSessionCalls > 0) + + const secondIdleEvent = hook.event({ + event: { type: "session.idle", properties: { sessionID: "session-old" } }, + }) + + selectSessionDeferred.resolve() + await Promise.all([firstIdleEvent, secondIdleEvent]) + + // then - duplicate idle should be skipped to prevent concurrent continuation injection + expect(createSessionCalls.length).toBe(1) + expect(promptCalls.length).toBe(1) + expect(hook.getState()?.iteration).toBe(2) + }) +}) diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts index 7660f1954..d9c873f16 100644 --- a/src/hooks/runtime-fallback/index.test.ts +++ b/src/hooks/runtime-fallback/index.test.ts @@ -125,7 +125,7 @@ describe("runtime-fallback", () => { await hook.event({ event: { type: "session.created", - properties: { info: { id: sessionID, model: "google/gemini-3-pro" } }, + properties: { info: { id: sessionID, model: "google/gemini-3.1-pro" } }, }, }) @@ -1841,7 +1841,7 @@ describe("runtime-fallback", () => { test("should apply fallback model on next chat.message after error", async () => { const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig({ notify_on_fallback: false }), - pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3-pro"]), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3.1-pro"]), }) const sessionID = "test-session-switch" SessionCategoryRegistry.register(sessionID, "test") @@ -1916,7 +1916,7 @@ describe("runtime-fallback", () => { const input = createMockPluginInput() const hook = createRuntimeFallbackHook(input, { config: createMockConfig({ notify_on_fallback: false }), - pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3-pro"]), + pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3.1-pro"]), }) const sessionID = "test-agent-fallback" diff --git a/src/hooks/session-notification-input-needed.test.ts b/src/hooks/session-notification-input-needed.test.ts index 5e8552907..ee1614b88 100644 --- a/src/hooks/session-notification-input-needed.test.ts +++ b/src/hooks/session-notification-input-needed.test.ts @@ -3,6 +3,7 @@ const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:te const { createSessionNotification } = require("./session-notification") const { setMainSession, subagentSessions, _resetForTesting } = require("../features/claude-code-session-state") const utils = require("./session-notification-utils") +const sender = require("./session-notification-sender") describe("session-notification input-needed events", () => { let notificationCalls: string[] @@ -37,6 +38,10 @@ describe("session-notification input-needed events", () => { spyOn(utils, "getNotifySendPath").mockResolvedValue("/usr/bin/notify-send") spyOn(utils, "getPowershellPath").mockResolvedValue("powershell") spyOn(utils, "startBackgroundCheck").mockImplementation(() => {}) + spyOn(sender, "detectPlatform").mockReturnValue("darwin") + spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx: unknown, _platform: unknown, _title: unknown, message: string) => { + notificationCalls.push(message) + }) }) afterEach(() => { @@ -47,7 +52,7 @@ describe("session-notification input-needed events", () => { test("sends question notification when question tool asks for input", async () => { const sessionID = "main-question" setMainSession(sessionID) - const hook = createSessionNotification(createMockPluginInput()) + const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false }) await hook({ event: { @@ -74,7 +79,7 @@ describe("session-notification input-needed events", () => { test("sends permission notification for permission events", async () => { const sessionID = "main-permission" setMainSession(sessionID) - const hook = createSessionNotification(createMockPluginInput()) + const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false }) await hook({ event: { diff --git a/src/hooks/session-notification.test.ts b/src/hooks/session-notification.test.ts index 2f0377a4c..cf895ba98 100644 --- a/src/hooks/session-notification.test.ts +++ b/src/hooks/session-notification.test.ts @@ -1,8 +1,9 @@ -import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test" +const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:test") import { createSessionNotification } from "./session-notification" import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state" import * as utils from "./session-notification-utils" +import * as sender from "./session-notification-sender" describe("session-notification", () => { let notificationCalls: string[] @@ -40,6 +41,10 @@ describe("session-notification", () => { spyOn(utils, "getPaplayPath").mockResolvedValue("/usr/bin/paplay") spyOn(utils, "getAplayPath").mockResolvedValue("/usr/bin/aplay") spyOn(utils, "startBackgroundCheck").mockImplementation(() => {}) + spyOn(sender, "detectPlatform").mockReturnValue("darwin") + spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx, _platform, _title, message) => { + notificationCalls.push(message) + }) }) afterEach(() => { @@ -105,6 +110,7 @@ describe("session-notification", () => { const hook = createSessionNotification(createMockPluginInput(), { idleConfirmationDelay: 10, skipIfIncompleteTodos: false, + enforceMainSessionFilter: false, }) // when - main session goes idle @@ -332,6 +338,7 @@ describe("session-notification", () => { const hook = createSessionNotification(createMockPluginInput(), { idleConfirmationDelay: 10, skipIfIncompleteTodos: false, + enforceMainSessionFilter: false, }) // when - session goes idle twice diff --git a/src/hooks/session-notification.ts b/src/hooks/session-notification.ts index 48e0d288b..3b3dcc514 100644 --- a/src/hooks/session-notification.ts +++ b/src/hooks/session-notification.ts @@ -4,11 +4,9 @@ import { startBackgroundCheck, } from "./session-notification-utils" import { - detectPlatform, - getDefaultSoundPath, - playSessionNotificationSound, - sendSessionNotification, + type Platform, } from "./session-notification-sender" +import * as sessionNotificationSender from "./session-notification-sender" import { hasIncompleteTodos } from "./session-todo-status" import { createIdleNotificationScheduler } from "./session-notification-scheduler" @@ -25,13 +23,14 @@ interface SessionNotificationConfig { skipIfIncompleteTodos?: boolean /** Maximum number of sessions to track before cleanup (default: 100) */ maxTrackedSessions?: number + enforceMainSessionFilter?: boolean } export function createSessionNotification( ctx: PluginInput, config: SessionNotificationConfig = {} ) { - const currentPlatform = detectPlatform() - const defaultSoundPath = getDefaultSoundPath(currentPlatform) + const currentPlatform: Platform = sessionNotificationSender.detectPlatform() + const defaultSoundPath = sessionNotificationSender.getDefaultSoundPath(currentPlatform) startBackgroundCheck(currentPlatform) @@ -45,6 +44,7 @@ export function createSessionNotification( idleConfirmationDelay: 1500, skipIfIncompleteTodos: true, maxTrackedSessions: 100, + enforceMainSessionFilter: true, ...config, } @@ -53,8 +53,8 @@ export function createSessionNotification( platform: currentPlatform, config: mergedConfig, hasIncompleteTodos, - send: sendSessionNotification, - playSound: playSessionNotificationSound, + send: sessionNotificationSender.sendSessionNotification, + playSound: sessionNotificationSender.playSessionNotificationSound, }) const QUESTION_TOOLS = new Set(["question", "ask_user_question", "askuserquestion"]) @@ -81,8 +81,10 @@ export function createSessionNotification( const shouldNotifyForSession = (sessionID: string): boolean => { if (subagentSessions.has(sessionID)) return false - const mainSessionID = getMainSessionID() - if (mainSessionID && sessionID !== mainSessionID) return false + if (mergedConfig.enforceMainSessionFilter) { + const mainSessionID = getMainSessionID() + if (mainSessionID && sessionID !== mainSessionID) return false + } return true } @@ -146,9 +148,14 @@ export function createSessionNotification( if (!shouldNotifyForSession(sessionID)) return scheduler.markSessionActivity(sessionID) - await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, mergedConfig.permissionMessage) + await sessionNotificationSender.sendSessionNotification( + ctx, + currentPlatform, + mergedConfig.title, + mergedConfig.permissionMessage, + ) if (mergedConfig.playSound && mergedConfig.soundPath) { - await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath) + await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath) } return } @@ -168,9 +175,9 @@ export function createSessionNotification( ? mergedConfig.permissionMessage : mergedConfig.questionMessage - await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message) + await sessionNotificationSender.sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message) if (mergedConfig.playSound && mergedConfig.soundPath) { - await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath) + await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath) } } } diff --git a/src/hooks/start-work/index.test.ts b/src/hooks/start-work/index.test.ts index e633e85a9..26b87eba4 100644 --- a/src/hooks/start-work/index.test.ts +++ b/src/hooks/start-work/index.test.ts @@ -7,9 +7,12 @@ import { createStartWorkHook } from "./index" import { writeBoulderState, clearBoulderState, + readBoulderState, } from "../../features/boulder-state" import type { BoulderState } from "../../features/boulder-state" import * as sessionState from "../../features/claude-code-session-state" +import * as worktreeDetector from "./worktree-detector" +import * as worktreeDetector from "./worktree-detector" describe("start-work hook", () => { let testDir: string @@ -402,4 +405,152 @@ describe("start-work hook", () => { updateSpy.mockRestore() }) }) + + describe("worktree support", () => { + let detectSpy: ReturnType + + beforeEach(() => { + detectSpy = spyOn(worktreeDetector, "detectWorktreePath").mockReturnValue(null) + }) + + afterEach(() => { + detectSpy.mockRestore() + }) + + test("should inject model-decides instructions when no --worktree flag", async () => { + // given - single plan, no worktree flag + const plansDir = join(testDir, ".sisyphus", "plans") + mkdirSync(plansDir, { recursive: true }) + writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1") + + const hook = createStartWorkHook(createMockPluginInput()) + const output = { + parts: [{ type: "text", text: "" }], + } + + // when + await hook["chat.message"]({ sessionID: "session-123" }, output) + + // then - model-decides instructions should appear + expect(output.parts[0].text).toContain("Worktree Setup Required") + expect(output.parts[0].text).toContain("git worktree list --porcelain") + expect(output.parts[0].text).toContain("git worktree add") + }) + + test("should inject worktree path when --worktree flag is valid", async () => { + // given - single plan + valid worktree path + const plansDir = join(testDir, ".sisyphus", "plans") + mkdirSync(plansDir, { recursive: true }) + writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1") + detectSpy.mockReturnValue("/validated/worktree") + + const hook = createStartWorkHook(createMockPluginInput()) + const output = { + parts: [{ type: "text", text: "\n--worktree /validated/worktree\n" }], + } + + // when + await hook["chat.message"]({ sessionID: "session-123" }, output) + + // then - validated path shown, no model-decides instructions + expect(output.parts[0].text).toContain("**Worktree**: /validated/worktree") + expect(output.parts[0].text).not.toContain("Worktree Setup Required") + }) + + test("should store worktree_path in boulder when --worktree is valid", async () => { + // given - plan + valid worktree + const plansDir = join(testDir, ".sisyphus", "plans") + mkdirSync(plansDir, { recursive: true }) + writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1") + detectSpy.mockReturnValue("/valid/wt") + + const hook = createStartWorkHook(createMockPluginInput()) + const output = { + parts: [{ type: "text", text: "\n--worktree /valid/wt\n" }], + } + + // when + await hook["chat.message"]({ sessionID: "session-123" }, output) + + // then - boulder.json has worktree_path + const state = readBoulderState(testDir) + expect(state?.worktree_path).toBe("/valid/wt") + }) + + test("should NOT store worktree_path when --worktree path is invalid", async () => { + // given - plan + invalid worktree path (detectWorktreePath returns null) + const plansDir = join(testDir, ".sisyphus", "plans") + mkdirSync(plansDir, { recursive: true }) + writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1") + // detectSpy already returns null by default + + const hook = createStartWorkHook(createMockPluginInput()) + const output = { + parts: [{ type: "text", text: "\n--worktree /nonexistent/wt\n" }], + } + + // when + await hook["chat.message"]({ sessionID: "session-123" }, output) + + // then - worktree_path absent, setup instructions present + const state = readBoulderState(testDir) + expect(state?.worktree_path).toBeUndefined() + expect(output.parts[0].text).toContain("needs setup") + expect(output.parts[0].text).toContain("git worktree add /nonexistent/wt") + }) + + test("should update boulder worktree_path on resume when new --worktree given", async () => { + // given - existing boulder with old worktree, user provides new worktree + const planPath = join(testDir, "plan.md") + writeFileSync(planPath, "# Plan\n- [ ] Task 1") + const existingState: BoulderState = { + active_plan: planPath, + started_at: "2026-01-01T00:00:00Z", + session_ids: ["old-session"], + plan_name: "plan", + worktree_path: "/old/wt", + } + writeBoulderState(testDir, existingState) + detectSpy.mockReturnValue("/new/wt") + + const hook = createStartWorkHook(createMockPluginInput()) + const output = { + parts: [{ type: "text", text: "\n--worktree /new/wt\n" }], + } + + // when + await hook["chat.message"]({ sessionID: "session-456" }, output) + + // then - boulder reflects updated worktree and new session appended + const state = readBoulderState(testDir) + expect(state?.worktree_path).toBe("/new/wt") + expect(state?.session_ids).toContain("session-456") + }) + + test("should show existing worktree on resume when no --worktree flag", async () => { + // given - existing boulder already has worktree_path, no flag given + const planPath = join(testDir, "plan.md") + writeFileSync(planPath, "# Plan\n- [ ] Task 1") + const existingState: BoulderState = { + active_plan: planPath, + started_at: "2026-01-01T00:00:00Z", + session_ids: ["old-session"], + plan_name: "plan", + worktree_path: "/existing/wt", + } + writeBoulderState(testDir, existingState) + + const hook = createStartWorkHook(createMockPluginInput()) + const output = { + parts: [{ type: "text", text: "" }], + } + + // when + await hook["chat.message"]({ sessionID: "session-789" }, output) + + // then - shows existing worktree, no model-decides instructions + expect(output.parts[0].text).toContain("/existing/wt") + expect(output.parts[0].text).not.toContain("Worktree Setup Required") + }) + }) }) diff --git a/src/hooks/start-work/index.ts b/src/hooks/start-work/index.ts index 41cb0b1a4..ee270861a 100644 --- a/src/hooks/start-work/index.ts +++ b/src/hooks/start-work/index.ts @@ -1 +1,4 @@ export { HOOK_NAME, createStartWorkHook } from "./start-work-hook" +export { detectWorktreePath } from "./worktree-detector" +export type { ParsedUserRequest } from "./parse-user-request" +export { parseUserRequest } from "./parse-user-request" diff --git a/src/hooks/start-work/parse-user-request.test.ts b/src/hooks/start-work/parse-user-request.test.ts new file mode 100644 index 000000000..e5d61a4c5 --- /dev/null +++ b/src/hooks/start-work/parse-user-request.test.ts @@ -0,0 +1,78 @@ +/// + +import { describe, expect, test } from "bun:test" +import { parseUserRequest } from "./parse-user-request" + +describe("parseUserRequest", () => { + describe("when no user-request tag", () => { + test("#given prompt without tag #when parsing #then returns nulls", () => { + const result = parseUserRequest("Just a regular message without any tags") + expect(result.planName).toBeNull() + expect(result.explicitWorktreePath).toBeNull() + }) + }) + + describe("when user-request tag is empty", () => { + test("#given empty user-request tag #when parsing #then returns nulls", () => { + const result = parseUserRequest(" ") + expect(result.planName).toBeNull() + expect(result.explicitWorktreePath).toBeNull() + }) + }) + + describe("when only plan name given", () => { + test("#given plan name without worktree flag #when parsing #then returns plan name with null worktree", () => { + const result = parseUserRequest("\nmy-plan\n") + expect(result.planName).toBe("my-plan") + expect(result.explicitWorktreePath).toBeNull() + }) + }) + + describe("when only --worktree flag given", () => { + test("#given --worktree with path only #when parsing #then returns worktree path with null plan", () => { + const result = parseUserRequest("--worktree /home/user/repo-feat") + expect(result.planName).toBeNull() + expect(result.explicitWorktreePath).toBe("/home/user/repo-feat") + }) + }) + + describe("when plan name and --worktree are both given", () => { + test("#given plan name before --worktree #when parsing #then returns both", () => { + const result = parseUserRequest("my-plan --worktree /path/to/worktree") + expect(result.planName).toBe("my-plan") + expect(result.explicitWorktreePath).toBe("/path/to/worktree") + }) + + test("#given --worktree before plan name #when parsing #then returns both", () => { + const result = parseUserRequest("--worktree /path/to/worktree my-plan") + expect(result.planName).toBe("my-plan") + expect(result.explicitWorktreePath).toBe("/path/to/worktree") + }) + }) + + describe("when --worktree flag has no path", () => { + test("#given --worktree without path #when parsing #then worktree path is null", () => { + const result = parseUserRequest("--worktree") + expect(result.explicitWorktreePath).toBeNull() + }) + }) + + describe("when ultrawork keywords are present", () => { + test("#given plan name with ultrawork keyword #when parsing #then strips keyword from plan name", () => { + const result = parseUserRequest("my-plan ultrawork") + expect(result.planName).toBe("my-plan") + }) + + test("#given plan name with ulw keyword and worktree #when parsing #then strips ulw, preserves worktree", () => { + const result = parseUserRequest("my-plan ulw --worktree /path/to/wt") + expect(result.planName).toBe("my-plan") + expect(result.explicitWorktreePath).toBe("/path/to/wt") + }) + + test("#given only ultrawork keyword with worktree #when parsing #then plan name is null, worktree preserved", () => { + const result = parseUserRequest("ultrawork --worktree /wt") + expect(result.planName).toBeNull() + expect(result.explicitWorktreePath).toBe("/wt") + }) + }) +}) diff --git a/src/hooks/start-work/parse-user-request.ts b/src/hooks/start-work/parse-user-request.ts new file mode 100644 index 000000000..627deb67a --- /dev/null +++ b/src/hooks/start-work/parse-user-request.ts @@ -0,0 +1,29 @@ +const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi +const WORKTREE_FLAG_PATTERN = /--worktree(?:\s+(\S+))?/ + +export interface ParsedUserRequest { + planName: string | null + explicitWorktreePath: string | null +} + +export function parseUserRequest(promptText: string): ParsedUserRequest { + const match = promptText.match(/\s*([\s\S]*?)\s*<\/user-request>/i) + if (!match) return { planName: null, explicitWorktreePath: null } + + let rawArg = match[1].trim() + if (!rawArg) return { planName: null, explicitWorktreePath: null } + + const worktreeMatch = rawArg.match(WORKTREE_FLAG_PATTERN) + const explicitWorktreePath = worktreeMatch ? (worktreeMatch[1] ?? null) : null + + if (worktreeMatch) { + rawArg = rawArg.replace(worktreeMatch[0], "").trim() + } + + const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim() + + return { + planName: cleanedArg || null, + explicitWorktreePath, + } +} diff --git a/src/hooks/start-work/start-work-hook.ts b/src/hooks/start-work/start-work-hook.ts index 77c76d240..03cdb540e 100644 --- a/src/hooks/start-work/start-work-hook.ts +++ b/src/hooks/start-work/start-work-hook.ts @@ -1,3 +1,4 @@ +import { statSync } from "node:fs" import type { PluginInput } from "@opencode-ai/plugin" import { readBoulderState, @@ -11,11 +12,11 @@ import { } from "../../features/boulder-state" import { log } from "../../shared/logger" import { updateSessionAgent } from "../../features/claude-code-session-state" +import { detectWorktreePath } from "./worktree-detector" +import { parseUserRequest } from "./parse-user-request" export const HOOK_NAME = "start-work" as const -const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi - interface StartWorkHookInput { sessionID: string messageID?: string @@ -25,73 +26,76 @@ interface StartWorkHookOutput { parts: Array<{ type: string; text?: string }> } -function extractUserRequestPlanName(promptText: string): string | null { - const userRequestMatch = promptText.match(/\s*([\s\S]*?)\s*<\/user-request>/i) - if (!userRequestMatch) return null - - const rawArg = userRequestMatch[1].trim() - if (!rawArg) return null - - const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim() - return cleanedArg || null -} - function findPlanByName(plans: string[], requestedName: string): string | null { const lowerName = requestedName.toLowerCase() - - const exactMatch = plans.find(p => getPlanName(p).toLowerCase() === lowerName) + const exactMatch = plans.find((p) => getPlanName(p).toLowerCase() === lowerName) if (exactMatch) return exactMatch - - const partialMatch = plans.find(p => getPlanName(p).toLowerCase().includes(lowerName)) + const partialMatch = plans.find((p) => getPlanName(p).toLowerCase().includes(lowerName)) return partialMatch || null } +const MODEL_DECIDES_WORKTREE_BLOCK = ` +## Worktree Setup Required + +No worktree specified. Before starting work, you MUST choose or create one: + +1. \`git worktree list --porcelain\` — list existing worktrees +2. Create if needed: \`git worktree add \` +3. Update \`.sisyphus/boulder.json\` — add \`"worktree_path": ""\` +4. Work exclusively inside that worktree directory` + +function resolveWorktreeContext( + explicitWorktreePath: string | null, +): { worktreePath: string | undefined; block: string } { + if (explicitWorktreePath === null) { + return { worktreePath: undefined, block: MODEL_DECIDES_WORKTREE_BLOCK } + } + + const validatedPath = detectWorktreePath(explicitWorktreePath) + if (validatedPath) { + return { worktreePath: validatedPath, block: `\n**Worktree**: ${validatedPath}` } + } + + return { + worktreePath: undefined, + block: `\n**Worktree** (needs setup): \`git worktree add ${explicitWorktreePath} \`, then add \`"worktree_path"\` to boulder.json`, + } +} + export function createStartWorkHook(ctx: PluginInput) { return { - "chat.message": async ( - input: StartWorkHookInput, - output: StartWorkHookOutput - ): Promise => { + "chat.message": async (input: StartWorkHookInput, output: StartWorkHookOutput): Promise => { const parts = output.parts - const promptText = parts - ?.filter((p) => p.type === "text" && p.text) - .map((p) => p.text) - .join("\n") - .trim() || "" + const promptText = + parts + ?.filter((p) => p.type === "text" && p.text) + .map((p) => p.text) + .join("\n") + .trim() || "" - // Only trigger on actual command execution (contains tag) - // NOT on description text like "Start Sisyphus work session from Prometheus plan" - const isStartWorkCommand = promptText.includes("") + if (!promptText.includes("")) return - if (!isStartWorkCommand) { - return - } - - log(`[${HOOK_NAME}] Processing start-work command`, { - sessionID: input.sessionID, - }) - - updateSessionAgent(input.sessionID, "atlas") // Always switch: fixes #1298 + log(`[${HOOK_NAME}] Processing start-work command`, { sessionID: input.sessionID }) + updateSessionAgent(input.sessionID, "atlas") const existingState = readBoulderState(ctx.directory) const sessionId = input.sessionID const timestamp = new Date().toISOString() + const { planName: explicitPlanName, explicitWorktreePath } = parseUserRequest(promptText) + const { worktreePath, block: worktreeBlock } = resolveWorktreeContext(explicitWorktreePath) + let contextInfo = "" - - const explicitPlanName = extractUserRequestPlanName(promptText) - + if (explicitPlanName) { - log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, { - sessionID: input.sessionID, - }) - + log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, { sessionID: input.sessionID }) + const allPlans = findPrometheusPlans(ctx.directory) const matchedPlan = findPlanByName(allPlans, explicitPlanName) - + if (matchedPlan) { const progress = getPlanProgress(matchedPlan) - + if (progress.isComplete) { contextInfo = ` ## Plan Already Complete @@ -99,12 +103,10 @@ export function createStartWorkHook(ctx: PluginInput) { The requested plan "${getPlanName(matchedPlan)}" has been completed. All ${progress.total} tasks are done. Create a new plan with: /plan "your task"` } else { - if (existingState) { - clearBoulderState(ctx.directory) - } - const newState = createBoulderState(matchedPlan, sessionId, "atlas") + if (existingState) clearBoulderState(ctx.directory) + const newState = createBoulderState(matchedPlan, sessionId, "atlas", worktreePath) writeBoulderState(ctx.directory, newState) - + contextInfo = ` ## Auto-Selected Plan @@ -113,17 +115,20 @@ All ${progress.total} tasks are done. Create a new plan with: /plan "your task"` **Progress**: ${progress.completed}/${progress.total} tasks **Session ID**: ${sessionId} **Started**: ${timestamp} +${worktreeBlock} boulder.json has been created. Read the plan and begin execution.` } } else { - const incompletePlans = allPlans.filter(p => !getPlanProgress(p).isComplete) + const incompletePlans = allPlans.filter((p) => !getPlanProgress(p).isComplete) if (incompletePlans.length > 0) { - const planList = incompletePlans.map((p, i) => { - const prog = getPlanProgress(p) - return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}` - }).join("\n") - + const planList = incompletePlans + .map((p, i) => { + const prog = getPlanProgress(p) + return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}` + }) + .join("\n") + contextInfo = ` ## Plan Not Found @@ -143,9 +148,25 @@ No incomplete plans available. Create a new plan with: /plan "your task"` } } else if (existingState) { const progress = getPlanProgress(existingState.active_plan) - + if (!progress.isComplete) { - appendSessionId(ctx.directory, sessionId) + const effectiveWorktree = worktreePath ?? existingState.worktree_path + + if (worktreePath !== undefined) { + const updatedSessions = existingState.session_ids.includes(sessionId) + ? existingState.session_ids + : [...existingState.session_ids, sessionId] + writeBoulderState(ctx.directory, { + ...existingState, + worktree_path: worktreePath, + session_ids: updatedSessions, + }) + } else { + appendSessionId(ctx.directory, sessionId) + } + + const worktreeDisplay = effectiveWorktree ? `\n**Worktree**: ${effectiveWorktree}` : worktreeBlock + contextInfo = ` ## Active Work Session Found @@ -155,6 +176,7 @@ No incomplete plans available. Create a new plan with: /plan "your task"` **Progress**: ${progress.completed}/${progress.total} tasks completed **Sessions**: ${existingState.session_ids.length + 1} (current session appended) **Started**: ${existingState.started_at} +${worktreeDisplay} The current session (${sessionId}) has been added to session_ids. Read the plan file and continue from the first unchecked task.` @@ -167,13 +189,15 @@ Looking for new plans...` } } - if ((!existingState && !explicitPlanName) || (existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete)) { + if ( + (!existingState && !explicitPlanName) || + (existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete) + ) { const plans = findPrometheusPlans(ctx.directory) - const incompletePlans = plans.filter(p => !getPlanProgress(p).isComplete) - + const incompletePlans = plans.filter((p) => !getPlanProgress(p).isComplete) + if (plans.length === 0) { contextInfo += ` - ## No Plans Found No Prometheus plan files found at .sisyphus/plans/ @@ -187,7 +211,7 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta } else if (incompletePlans.length === 1) { const planPath = incompletePlans[0] const progress = getPlanProgress(planPath) - const newState = createBoulderState(planPath, sessionId, "atlas") + const newState = createBoulderState(planPath, sessionId, "atlas", worktreePath) writeBoulderState(ctx.directory, newState) contextInfo += ` @@ -199,15 +223,17 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta **Progress**: ${progress.completed}/${progress.total} tasks **Session ID**: ${sessionId} **Started**: ${timestamp} +${worktreeBlock} boulder.json has been created. Read the plan and begin execution.` } else { - const planList = incompletePlans.map((p, i) => { - const progress = getPlanProgress(p) - const stat = require("node:fs").statSync(p) - const modified = new Date(stat.mtimeMs).toISOString() - return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}` - }).join("\n") + const planList = incompletePlans + .map((p, i) => { + const progress = getPlanProgress(p) + const modified = new Date(statSync(p).mtimeMs).toISOString() + return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}` + }) + .join("\n") contextInfo += ` @@ -220,6 +246,7 @@ Session ID: ${sessionId} ${planList} Ask the user which plan to work on. Present the options above and wait for their response. +${worktreeBlock} ` } } @@ -229,13 +256,14 @@ Ask the user which plan to work on. Present the options above and wait for their output.parts[idx].text = output.parts[idx].text .replace(/\$SESSION_ID/g, sessionId) .replace(/\$TIMESTAMP/g, timestamp) - + output.parts[idx].text += `\n\n---\n${contextInfo}` } log(`[${HOOK_NAME}] Context injected`, { sessionID: input.sessionID, hasExistingState: !!existingState, + worktreePath, }) }, } diff --git a/src/hooks/start-work/worktree-detector.test.ts b/src/hooks/start-work/worktree-detector.test.ts new file mode 100644 index 000000000..b02d5af1b --- /dev/null +++ b/src/hooks/start-work/worktree-detector.test.ts @@ -0,0 +1,79 @@ +/// + +import { describe, expect, test, spyOn, beforeEach, afterEach } from "bun:test" +import * as childProcess from "node:child_process" +import { detectWorktreePath } from "./worktree-detector" + +describe("detectWorktreePath", () => { + let execFileSyncSpy: ReturnType + + beforeEach(() => { + execFileSyncSpy = spyOn(childProcess, "execFileSync").mockImplementation( + ((_file: string, _args: string[]) => "") as typeof childProcess.execFileSync, + ) + }) + + afterEach(() => { + execFileSyncSpy.mockRestore() + }) + + describe("when directory is a valid git worktree", () => { + test("#given valid git dir #when detecting #then returns worktree root path", () => { + execFileSyncSpy.mockImplementation( + ((_file: string, _args: string[]) => "/home/user/my-repo\n") as typeof childProcess.execFileSync, + ) + + // when + const result = detectWorktreePath("/home/user/my-repo/src") + + // then + expect(result).toBe("/home/user/my-repo") + }) + + test("#given git output with trailing newline #when detecting #then trims output", () => { + execFileSyncSpy.mockImplementation( + ((_file: string, _args: string[]) => "/projects/worktree-a\n\n") as typeof childProcess.execFileSync, + ) + + const result = detectWorktreePath("/projects/worktree-a") + + expect(result).toBe("/projects/worktree-a") + }) + + test("#given valid dir #when detecting #then calls git rev-parse with cwd", () => { + execFileSyncSpy.mockImplementation( + ((_file: string, _args: string[]) => "/repo\n") as typeof childProcess.execFileSync, + ) + + detectWorktreePath("/repo/some/subdir") + + expect(execFileSyncSpy).toHaveBeenCalledWith( + "git", + ["rev-parse", "--show-toplevel"], + expect.objectContaining({ cwd: "/repo/some/subdir" }), + ) + }) + }) + + describe("when directory is not a git worktree", () => { + test("#given non-git directory #when detecting #then returns null", () => { + execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => { + throw new Error("not a git repository") + }) + + const result = detectWorktreePath("/tmp/not-a-repo") + + expect(result).toBeNull() + }) + + test("#given non-existent directory #when detecting #then returns null", () => { + execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => { + throw new Error("ENOENT: no such file or directory") + }) + + const result = detectWorktreePath("/nonexistent/path") + + expect(result).toBeNull() + }) + }) +}) diff --git a/src/hooks/start-work/worktree-detector.ts b/src/hooks/start-work/worktree-detector.ts new file mode 100644 index 000000000..74c919593 --- /dev/null +++ b/src/hooks/start-work/worktree-detector.ts @@ -0,0 +1,14 @@ +import { execFileSync } from "node:child_process" + +export function detectWorktreePath(directory: string): string | null { + try { + return execFileSync("git", ["rev-parse", "--show-toplevel"], { + cwd: directory, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() + } catch { + return null + } +} diff --git a/src/hooks/stop-continuation-guard/hook.ts b/src/hooks/stop-continuation-guard/hook.ts index f7c49a563..747b7a9b6 100644 --- a/src/hooks/stop-continuation-guard/hook.ts +++ b/src/hooks/stop-continuation-guard/hook.ts @@ -1,4 +1,5 @@ import type { PluginInput } from "@opencode-ai/plugin" +import type { BackgroundManager } from "../../features/background-agent" import { clearContinuationMarker, @@ -8,6 +9,11 @@ import { log } from "../../shared/logger" const HOOK_NAME = "stop-continuation-guard" +type StopContinuationBackgroundManager = Pick< + BackgroundManager, + "getAllDescendantTasks" | "cancelTask" +> + export interface StopContinuationGuard { event: (input: { event: { type: string; properties?: unknown } }) => Promise "chat.message": (input: { sessionID?: string }) => Promise @@ -17,7 +23,10 @@ export interface StopContinuationGuard { } export function createStopContinuationGuardHook( - ctx: PluginInput + ctx: PluginInput, + options?: { + backgroundManager?: StopContinuationBackgroundManager + } ): StopContinuationGuard { const stoppedSessions = new Set() @@ -25,6 +34,38 @@ export function createStopContinuationGuardHook( stoppedSessions.add(sessionID) setContinuationMarkerSource(ctx.directory, sessionID, "stop", "stopped", "continuation stopped") log(`[${HOOK_NAME}] Continuation stopped for session`, { sessionID }) + + const backgroundManager = options?.backgroundManager + if (!backgroundManager) { + return + } + + const cancellableTasks = backgroundManager + .getAllDescendantTasks(sessionID) + .filter((task) => task.status === "running" || task.status === "pending") + + if (cancellableTasks.length === 0) { + return + } + + void Promise.allSettled( + cancellableTasks.map(async (task) => { + await backgroundManager.cancelTask(task.id, { + source: "stop-continuation", + reason: "Continuation stopped via /stop-continuation", + abortSession: task.status === "running", + skipNotification: true, + }) + }) + ).then((results) => { + const cancelledCount = results.filter((result) => result.status === "fulfilled").length + const failedCount = results.length - cancelledCount + log(`[${HOOK_NAME}] Cancelled background tasks for stopped session`, { + sessionID, + cancelledCount, + failedCount, + }) + }) } const isStopped = (sessionID: string): boolean => { diff --git a/src/hooks/stop-continuation-guard/index.test.ts b/src/hooks/stop-continuation-guard/index.test.ts index 9547accf2..a0d08f217 100644 --- a/src/hooks/stop-continuation-guard/index.test.ts +++ b/src/hooks/stop-continuation-guard/index.test.ts @@ -2,9 +2,15 @@ import { afterEach, describe, expect, test } from "bun:test" import { mkdtempSync, rmSync } from "node:fs" import { join } from "node:path" import { tmpdir } from "node:os" +import type { BackgroundManager, BackgroundTask } from "../../features/background-agent" import { readContinuationMarker } from "../../features/run-continuation-state" import { createStopContinuationGuardHook } from "./index" +type CancelCall = { + taskId: string + options?: Parameters[1] +} + describe("stop-continuation-guard", () => { const tempDirs: string[] = [] @@ -34,6 +40,33 @@ describe("stop-continuation-guard", () => { } as any } + function createBackgroundTask(status: BackgroundTask["status"], id: string): BackgroundTask { + return { + id, + status, + description: `${id} description`, + parentSessionID: "parent-session", + parentMessageID: "parent-message", + prompt: "prompt", + agent: "sisyphus-junior", + } + } + + function createMockBackgroundManager(tasks: BackgroundTask[], cancelCalls: CancelCall[]): Pick { + return { + getAllDescendantTasks: () => tasks, + cancelTask: async (taskId: string, options?: Parameters[1]) => { + cancelCalls.push({ taskId, options }) + return true + }, + } + } + + async function flushMicrotasks(): Promise { + await Promise.resolve() + await Promise.resolve() + } + test("should mark session as stopped", () => { // given - a guard hook with no stopped sessions const input = createMockPluginInput() @@ -166,4 +199,31 @@ describe("stop-continuation-guard", () => { // then - should not throw and stopped session remains stopped expect(guard.isStopped("some-session")).toBe(true) }) + + test("should cancel only running and pending background tasks on stop", async () => { + // given - a background manager with mixed task statuses + const cancelCalls: CancelCall[] = [] + const backgroundManager = createMockBackgroundManager( + [ + createBackgroundTask("running", "task-running"), + createBackgroundTask("pending", "task-pending"), + createBackgroundTask("completed", "task-completed"), + ], + cancelCalls, + ) + const guard = createStopContinuationGuardHook(createMockPluginInput(), { + backgroundManager, + }) + + // when - stop continuation is triggered + guard.stop("test-session-bg") + await flushMicrotasks() + + // then - only running and pending tasks are cancelled + expect(cancelCalls).toHaveLength(2) + expect(cancelCalls[0]?.taskId).toBe("task-running") + expect(cancelCalls[0]?.options?.abortSession).toBe(true) + expect(cancelCalls[1]?.taskId).toBe("task-pending") + expect(cancelCalls[1]?.options?.abortSession).toBe(false) + }) }) diff --git a/src/hooks/think-mode/hook.ts b/src/hooks/think-mode/hook.ts index 17cd17f79..017cb616a 100644 --- a/src/hooks/think-mode/hook.ts +++ b/src/hooks/think-mode/hook.ts @@ -1,6 +1,6 @@ import { detectThinkKeyword, extractPromptText } from "./detector" -import { getHighVariant, getThinkingConfig, isAlreadyHighVariant } from "./switcher" -import type { ThinkModeInput, ThinkModeState } from "./types" +import { getHighVariant, isAlreadyHighVariant } from "./switcher" +import type { ThinkModeState } from "./types" import { log } from "../../shared" const thinkModeState = new Map() @@ -10,53 +10,24 @@ export function clearThinkModeState(sessionID: string): void { } export function createThinkModeHook() { - function isDisabledThinkingConfig(config: Record): boolean { - const thinkingConfig = config.thinking - if ( - typeof thinkingConfig === "object" && - thinkingConfig !== null && - "type" in thinkingConfig && - (thinkingConfig as { type?: string }).type === "disabled" - ) { - return true - } - - const providerOptions = config.providerOptions - if (typeof providerOptions !== "object" || providerOptions === null) { - return false - } - - return Object.values(providerOptions as Record).some( - (providerConfig) => { - if (typeof providerConfig !== "object" || providerConfig === null) { - return false - } - - const providerConfigMap = providerConfig as Record - const extraBody = providerConfigMap.extra_body - if (typeof extraBody !== "object" || extraBody === null) { - return false - } - - const extraBodyMap = extraBody as Record - const extraThinking = extraBodyMap.thinking - return ( - typeof extraThinking === "object" && - extraThinking !== null && - (extraThinking as { type?: string }).type === "disabled" - ) - } - ) - } - return { - "chat.params": async (output: ThinkModeInput, sessionID: string): Promise => { + "chat.message": async ( + input: { + sessionID: string + model?: { providerID: string; modelID: string } + }, + output: { + message: Record + parts: Array<{ type: string; text?: string; [key: string]: unknown }> + } + ): Promise => { const promptText = extractPromptText(output.parts) + const sessionID = input.sessionID const state: ThinkModeState = { requested: false, modelSwitched: false, - thinkingConfigInjected: false, + variantSet: false, } if (!detectThinkKeyword(promptText)) { @@ -66,7 +37,12 @@ export function createThinkModeHook() { state.requested = true - const currentModel = output.message.model + if (typeof output.message.variant === "string") { + thinkModeState.set(sessionID, state) + return + } + + const currentModel = input.model if (!currentModel) { thinkModeState.set(sessionID, state) return @@ -81,14 +57,15 @@ export function createThinkModeHook() { } const highVariant = getHighVariant(currentModel.modelID) - const thinkingConfig = getThinkingConfig(currentModel.providerID, currentModel.modelID) if (highVariant) { output.message.model = { providerID: currentModel.providerID, modelID: highVariant, } + output.message.variant = "high" state.modelSwitched = true + state.variantSet = true log("Think mode: model switched to high variant", { sessionID, from: currentModel.modelID, @@ -96,42 +73,6 @@ export function createThinkModeHook() { }) } - if (thinkingConfig) { - const messageData = output.message as Record - const agentThinking = messageData.thinking as { type?: string } | undefined - const agentProviderOptions = messageData.providerOptions - - const agentDisabledThinking = agentThinking?.type === "disabled" - const agentHasCustomProviderOptions = Boolean(agentProviderOptions) - - if (agentDisabledThinking) { - log("Think mode: skipping - agent has thinking disabled", { - sessionID, - provider: currentModel.providerID, - }) - } else if (agentHasCustomProviderOptions) { - log("Think mode: skipping - agent has custom providerOptions", { - sessionID, - provider: currentModel.providerID, - }) - } else if ( - !isDisabledThinkingConfig(thinkingConfig as Record) - ) { - Object.assign(output.message, thinkingConfig) - state.thinkingConfigInjected = true - log("Think mode: thinking config injected", { - sessionID, - provider: currentModel.providerID, - config: thinkingConfig, - }) - } else { - log("Think mode: skipping disabled thinking config", { - sessionID, - provider: currentModel.providerID, - }) - } - } - thinkModeState.set(sessionID, state) }, diff --git a/src/hooks/think-mode/index.test.ts b/src/hooks/think-mode/index.test.ts index 43f8003b1..b0d744738 100644 --- a/src/hooks/think-mode/index.test.ts +++ b/src/hooks/think-mode/index.test.ts @@ -1,452 +1,155 @@ -import { describe, expect, it, beforeEach } from "bun:test" -import type { ThinkModeInput } from "./types" +import { beforeEach, describe, expect, it } from "bun:test" -const { createThinkModeHook, clearThinkModeState } = await import("./index") +const { clearThinkModeState, createThinkModeHook } = await import("./index") + +type ThinkModeHookInput = { + sessionID: string + model?: { providerID: string; modelID: string } +} + +type ThinkModeHookOutput = { + message: Record + parts: Array<{ type: string; text?: string; [key: string]: unknown }> +} + +function createHookInput(args: { + sessionID?: string + providerID?: string + modelID?: string +}): ThinkModeHookInput { + const { sessionID = "test-session-id", providerID, modelID } = args + + if (!providerID || !modelID) { + return { sessionID } + } -/** - * Helper to create a mock ThinkModeInput for testing - */ -function createMockInput( - providerID: string, - modelID: string, - promptText: string -): ThinkModeInput { return { - parts: [{ type: "text", text: promptText }], - message: { - model: { - providerID, - modelID, - }, - }, + sessionID, + model: { providerID, modelID }, } } -/** - * Type helper for accessing dynamically injected properties on message - */ -type MessageWithInjectedProps = Record +function createHookOutput(promptText: string, variant?: string): ThinkModeHookOutput { + return { + message: variant ? { variant } : {}, + parts: [{ type: "text", text: promptText }], + } +} -describe("createThinkModeHook integration", () => { +describe("createThinkModeHook", () => { const sessionID = "test-session-id" beforeEach(() => { clearThinkModeState(sessionID) }) - describe("GitHub Copilot provider integration", () => { - describe("Claude models", () => { - it("should activate thinking mode for github-copilot Claude with think keyword", async () => { - // given a github-copilot Claude model and prompt with "think" keyword - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "claude-opus-4-6", - "Please think deeply about this problem" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should upgrade to high variant and inject thinking config - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("claude-opus-4-6-high") - expect(message.thinking).toBeDefined() - expect((message.thinking as Record)?.type).toBe( - "enabled" - ) - expect( - (message.thinking as Record)?.budgetTokens - ).toBe(64000) - }) - - it("should handle github-copilot Claude with dots in version", async () => { - // given a github-copilot Claude model with dot format (claude-opus-4.6) - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "claude-opus-4.6", - "ultrathink mode" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should upgrade to high variant (hyphen format) - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("claude-opus-4-6-high") - expect(message.thinking).toBeDefined() - }) - - it("should handle github-copilot Claude Sonnet", async () => { - // given a github-copilot Claude Sonnet model - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "claude-sonnet-4-6", - "think about this" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should upgrade to high variant - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high") - expect(message.thinking).toBeDefined() - }) + it("sets high variant and switches model when think keyword is present", async () => { + // given + const hook = createThinkModeHook() + const input = createHookInput({ + sessionID, + providerID: "github-copilot", + modelID: "claude-opus-4-6", }) + const output = createHookOutput("Please think deeply about this") - describe("Gemini models", () => { - it("should activate thinking mode for github-copilot Gemini Pro", async () => { - // given a github-copilot Gemini Pro model - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "gemini-3-pro", - "think about this" - ) + // when + await hook["chat.message"](input, output) - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should upgrade to high variant and inject google thinking config - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("gemini-3-pro-high") - expect(message.providerOptions).toBeDefined() - const googleOptions = ( - message.providerOptions as Record - )?.google as Record - expect(googleOptions?.thinkingConfig).toBeDefined() - }) - - it("should activate thinking mode for github-copilot Gemini Flash", async () => { - // given a github-copilot Gemini Flash model - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "gemini-3-flash", - "ultrathink" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should upgrade to high variant - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("gemini-3-flash-high") - expect(message.providerOptions).toBeDefined() - }) - }) - - describe("GPT models", () => { - it("should activate thinking mode for github-copilot GPT-5.2", async () => { - // given a github-copilot GPT-5.2 model - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "gpt-5.2", - "please think" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should upgrade to high variant and inject openai thinking config - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("gpt-5-2-high") - expect(message.reasoning_effort).toBe("high") - }) - - it("should activate thinking mode for github-copilot GPT-5", async () => { - // given a github-copilot GPT-5 model - const hook = createThinkModeHook() - const input = createMockInput("github-copilot", "gpt-5", "think deeply") - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should upgrade to high variant - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("gpt-5-high") - expect(message.reasoning_effort).toBe("high") - }) - }) - - describe("No think keyword", () => { - it("should NOT activate for github-copilot without think keyword", async () => { - // given a prompt without any think keyword - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "claude-opus-4-6", - "Just do this task" - ) - const originalModelID = input.message.model?.modelID - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should NOT change model or inject config - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe(originalModelID) - expect(message.thinking).toBeUndefined() - }) + // then + expect(output.message.variant).toBe("high") + expect(output.message.model).toEqual({ + providerID: "github-copilot", + modelID: "claude-opus-4-6-high", }) }) - describe("Backwards compatibility with direct providers", () => { - it("should still work for direct anthropic provider", async () => { - // given direct anthropic provider - const hook = createThinkModeHook() - const input = createMockInput( - "anthropic", - "claude-sonnet-4-6", - "think about this" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should work as before - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high") - expect(message.thinking).toBeDefined() + it("supports dotted model IDs by switching to normalized high variant", async () => { + // given + const hook = createThinkModeHook() + const input = createHookInput({ + sessionID, + providerID: "github-copilot", + modelID: "gpt-5.2", }) + const output = createHookOutput("ultrathink about this") - it("should work for direct google-vertex-anthropic provider", async () => { - //#given direct google-vertex-anthropic provider - const hook = createThinkModeHook() - const input = createMockInput( - "google-vertex-anthropic", - "claude-opus-4-6", - "think deeply" - ) + // when + await hook["chat.message"](input, output) - //#when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - //#then should upgrade model and inject Claude thinking config - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("claude-opus-4-6-high") - expect(message.thinking).toBeDefined() - expect((message.thinking as Record)?.budgetTokens).toBe( - 64000 - ) - }) - - it("should still work for direct google provider", async () => { - // given direct google provider - const hook = createThinkModeHook() - const input = createMockInput( - "google", - "gemini-3-pro", - "think about this" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should work as before - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("gemini-3-pro-high") - expect(message.providerOptions).toBeDefined() - }) - - it("should still work for direct openai provider", async () => { - // given direct openai provider - const hook = createThinkModeHook() - const input = createMockInput("openai", "gpt-5", "think about this") - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should work - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("gpt-5-high") - expect(message.reasoning_effort).toBe("high") - }) - - it("should still work for amazon-bedrock provider", async () => { - // given amazon-bedrock provider - const hook = createThinkModeHook() - const input = createMockInput( - "amazon-bedrock", - "claude-sonnet-4-6", - "think" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should inject bedrock thinking config - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high") - expect(message.reasoningConfig).toBeDefined() + // then + expect(output.message.variant).toBe("high") + expect(output.message.model).toEqual({ + providerID: "github-copilot", + modelID: "gpt-5-2-high", }) }) - describe("Already-high variants", () => { - it("should NOT re-upgrade already-high variants", async () => { - // given an already-high variant model - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "claude-opus-4-6-high", - "think deeply" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should NOT modify the model (already high) - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("claude-opus-4-6-high") - // No additional thinking config should be injected - expect(message.thinking).toBeUndefined() + it("skips when message variant is already set", async () => { + // given + const hook = createThinkModeHook() + const input = createHookInput({ + sessionID, + providerID: "github-copilot", + modelID: "claude-sonnet-4-6", }) + const output = createHookOutput("think through this", "max") - it("should NOT re-upgrade already-high GPT variants", async () => { - // given an already-high GPT variant - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "gpt-5.2-high", - "ultrathink" - ) + // when + await hook["chat.message"](input, output) - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should NOT modify the model - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("gpt-5.2-high") - expect(message.reasoning_effort).toBeUndefined() - }) + // then + expect(output.message.variant).toBe("max") + expect(output.message.model).toBeUndefined() }) - describe("Unknown models", () => { - it("should not crash for unknown models via github-copilot", async () => { - // given an unknown model type - const hook = createThinkModeHook() - const input = createMockInput( - "github-copilot", - "llama-3-70b", - "think about this" - ) - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should not crash and model should remain unchanged - expect(input.message.model?.modelID).toBe("llama-3-70b") + it("does nothing when think keyword is absent", async () => { + // given + const hook = createThinkModeHook() + const input = createHookInput({ + sessionID, + providerID: "google", + modelID: "gemini-3.1-pro", }) + const output = createHookOutput("Please solve this directly") + + // when + await hook["chat.message"](input, output) + + // then + expect(output.message.variant).toBeUndefined() + expect(output.message.model).toBeUndefined() }) - describe("Edge cases", () => { - it("should handle missing model gracefully", async () => { - // given input without a model - const hook = createThinkModeHook() - const input: ThinkModeInput = { - parts: [{ type: "text", text: "think about this" }], - message: {}, - } - - // when the chat.params hook is called - // then should not crash - await expect( - hook["chat.params"](input, sessionID) - ).resolves.toBeUndefined() + it("does not modify already-high models", async () => { + // given + const hook = createThinkModeHook() + const input = createHookInput({ + sessionID, + providerID: "openai", + modelID: "gpt-5-high", }) + const output = createHookOutput("think deeply") - it("should handle empty prompt gracefully", async () => { - // given empty prompt - const hook = createThinkModeHook() - const input = createMockInput("github-copilot", "claude-opus-4-6", "") + // when + await hook["chat.message"](input, output) - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should not upgrade (no think keyword) - expect(input.message.model?.modelID).toBe("claude-opus-4-6") - }) + // then + expect(output.message.variant).toBeUndefined() + expect(output.message.model).toBeUndefined() }) - describe("Agent-level thinking configuration respect", () => { - it("should omit Z.ai GLM disabled thinking config", async () => { - //#given a Z.ai GLM model with think prompt - const hook = createThinkModeHook() - const input = createMockInput( - "zai-coding-plan", - "glm-5", - "ultrathink mode" - ) + it("handles missing input model without crashing", async () => { + // given + const hook = createThinkModeHook() + const input = createHookInput({ sessionID }) + const output = createHookOutput("think about this") - //#when think mode resolves Z.ai thinking configuration - await hook["chat.params"](input, sessionID) + // when + await expect(hook["chat.message"](input, output)).resolves.toBeUndefined() - //#then thinking config should be omitted from request - const message = input.message as MessageWithInjectedProps - expect(input.message.model?.modelID).toBe("glm-5") - expect(message.thinking).toBeUndefined() - expect(message.providerOptions).toBeUndefined() - }) - - it("should NOT inject thinking config when agent has thinking disabled", async () => { - // given agent with thinking explicitly disabled - const hook = createThinkModeHook() - const input: ThinkModeInput = { - parts: [{ type: "text", text: "ultrathink deeply" }], - message: { - model: { providerID: "google", modelID: "gemini-3-pro" }, - thinking: { type: "disabled" }, - } as ThinkModeInput["message"], - } - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should NOT override agent's thinking disabled setting - const message = input.message as MessageWithInjectedProps - expect((message.thinking as { type: string }).type).toBe("disabled") - expect(message.providerOptions).toBeUndefined() - }) - - it("should NOT inject thinking config when agent has custom providerOptions", async () => { - // given agent with custom providerOptions - const hook = createThinkModeHook() - const input: ThinkModeInput = { - parts: [{ type: "text", text: "ultrathink" }], - message: { - model: { providerID: "google", modelID: "gemini-3-flash" }, - providerOptions: { - google: { thinkingConfig: { thinkingBudget: 0 } }, - }, - } as ThinkModeInput["message"], - } - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should NOT override agent's providerOptions - const message = input.message as MessageWithInjectedProps - const providerOpts = message.providerOptions as Record - expect((providerOpts.google as Record).thinkingConfig).toEqual({ - thinkingBudget: 0, - }) - }) - - it("should still inject thinking config when agent has no thinking override", async () => { - // given agent without thinking override - const hook = createThinkModeHook() - const input = createMockInput("google", "gemini-3-pro", "ultrathink") - - // when the chat.params hook is called - await hook["chat.params"](input, sessionID) - - // then should inject thinking config as normal - const message = input.message as MessageWithInjectedProps - expect(message.providerOptions).toBeDefined() - }) + // then + expect(output.message.variant).toBeUndefined() + expect(output.message.model).toBeUndefined() }) }) diff --git a/src/hooks/think-mode/switcher.test.ts b/src/hooks/think-mode/switcher.test.ts index 0abc4756d..bf20122fd 100644 --- a/src/hooks/think-mode/switcher.test.ts +++ b/src/hooks/think-mode/switcher.test.ts @@ -1,128 +1,10 @@ import { describe, expect, it } from "bun:test" import { getHighVariant, - getThinkingConfig, isAlreadyHighVariant, - THINKING_CONFIGS, } from "./switcher" describe("think-mode switcher", () => { - describe("GitHub Copilot provider support", () => { - describe("Claude models via github-copilot", () => { - it("should resolve github-copilot Claude Opus to anthropic config", () => { - // given a github-copilot provider with Claude Opus model - const providerID = "github-copilot" - const modelID = "claude-opus-4-6" - - // when getting thinking config - const config = getThinkingConfig(providerID, modelID) - - // then should return anthropic thinking config - expect(config).not.toBeNull() - expect(config?.thinking).toBeDefined() - expect((config?.thinking as Record)?.type).toBe( - "enabled" - ) - expect((config?.thinking as Record)?.budgetTokens).toBe( - 64000 - ) - }) - - it("should resolve github-copilot Claude Sonnet to anthropic config", () => { - // given a github-copilot provider with Claude Sonnet model - const config = getThinkingConfig("github-copilot", "claude-sonnet-4-6") - - // then should return anthropic thinking config - expect(config).not.toBeNull() - expect(config?.thinking).toBeDefined() - }) - - it("should handle Claude with dots in version number", () => { - // given a model ID with dots (claude-opus-4.6) - const config = getThinkingConfig("github-copilot", "claude-opus-4.6") - - // then should still return anthropic thinking config - expect(config).not.toBeNull() - expect(config?.thinking).toBeDefined() - }) - }) - - describe("Gemini models via github-copilot", () => { - it("should resolve github-copilot Gemini Pro to google config", () => { - // given a github-copilot provider with Gemini Pro model - const config = getThinkingConfig("github-copilot", "gemini-3-pro") - - // then should return google thinking config - expect(config).not.toBeNull() - expect(config?.providerOptions).toBeDefined() - const googleOptions = ( - config?.providerOptions as Record - )?.google as Record - expect(googleOptions?.thinkingConfig).toBeDefined() - }) - - it("should resolve github-copilot Gemini Flash to google config", () => { - // given a github-copilot provider with Gemini Flash model - const config = getThinkingConfig( - "github-copilot", - "gemini-3-flash" - ) - - // then should return google thinking config - expect(config).not.toBeNull() - expect(config?.providerOptions).toBeDefined() - }) - }) - - describe("GPT models via github-copilot", () => { - it("should resolve github-copilot GPT-5.2 to openai config", () => { - // given a github-copilot provider with GPT-5.2 model - const config = getThinkingConfig("github-copilot", "gpt-5.2") - - // then should return openai thinking config - expect(config).not.toBeNull() - expect(config?.reasoning_effort).toBe("high") - }) - - it("should resolve github-copilot GPT-5 to openai config", () => { - // given a github-copilot provider with GPT-5 model - const config = getThinkingConfig("github-copilot", "gpt-5") - - // then should return openai thinking config - expect(config).not.toBeNull() - expect(config?.reasoning_effort).toBe("high") - }) - - it("should resolve github-copilot o1 to openai config", () => { - // given a github-copilot provider with o1 model - const config = getThinkingConfig("github-copilot", "o1-preview") - - // then should return openai thinking config - expect(config).not.toBeNull() - expect(config?.reasoning_effort).toBe("high") - }) - - it("should resolve github-copilot o3 to openai config", () => { - // given a github-copilot provider with o3 model - const config = getThinkingConfig("github-copilot", "o3-mini") - - // then should return openai thinking config - expect(config).not.toBeNull() - expect(config?.reasoning_effort).toBe("high") - }) - }) - - describe("Unknown models via github-copilot", () => { - it("should return null for unknown model types", () => { - // given a github-copilot provider with unknown model - const config = getThinkingConfig("github-copilot", "llama-3-70b") - - // then should return null (no matching provider) - expect(config).toBeNull() - }) - }) - }) - describe("Model ID normalization", () => { describe("getHighVariant with dots vs hyphens", () => { it("should handle dots in Claude version numbers", () => { @@ -167,8 +49,8 @@ describe("think-mode switcher", () => { it("should handle Gemini preview variants", () => { // given Gemini preview model IDs - expect(getHighVariant("gemini-3-pro")).toBe( - "gemini-3-pro-high" + expect(getHighVariant("gemini-3.1-pro")).toBe( + "gemini-3-1-pro-high" ) expect(getHighVariant("gemini-3-flash")).toBe( "gemini-3-flash-high" @@ -179,7 +61,7 @@ describe("think-mode switcher", () => { // given model IDs that are already high variants expect(getHighVariant("claude-opus-4-6-high")).toBeNull() expect(getHighVariant("gpt-5-2-high")).toBeNull() - expect(getHighVariant("gemini-3-pro-high")).toBeNull() + expect(getHighVariant("gemini-3-1-pro-high")).toBeNull() }) it("should return null for unknown models", () => { @@ -195,7 +77,7 @@ describe("think-mode switcher", () => { // given model IDs with -high suffix expect(isAlreadyHighVariant("claude-opus-4-6-high")).toBe(true) expect(isAlreadyHighVariant("gpt-5-2-high")).toBe(true) - expect(isAlreadyHighVariant("gemini-3-pro-high")).toBe(true) + expect(isAlreadyHighVariant("gemini-3.1-pro-high")).toBe(true) }) it("should detect -high suffix after normalization", () => { @@ -208,7 +90,7 @@ describe("think-mode switcher", () => { expect(isAlreadyHighVariant("claude-opus-4-6")).toBe(false) expect(isAlreadyHighVariant("claude-opus-4.6")).toBe(false) expect(isAlreadyHighVariant("gpt-5.2")).toBe(false) - expect(isAlreadyHighVariant("gemini-3-pro")).toBe(false) + expect(isAlreadyHighVariant("gemini-3.1-pro")).toBe(false) }) it("should return false for models with 'high' in name but not suffix", () => { @@ -217,149 +99,6 @@ describe("think-mode switcher", () => { }) }) - describe("getThinkingConfig", () => { - describe("Already high variants", () => { - it("should return null for already-high variants", () => { - // given already-high model variants - expect( - getThinkingConfig("anthropic", "claude-opus-4-6-high") - ).toBeNull() - expect(getThinkingConfig("openai", "gpt-5-2-high")).toBeNull() - expect(getThinkingConfig("google", "gemini-3-pro-high")).toBeNull() - }) - - it("should return null for already-high variants via github-copilot", () => { - // given already-high model variants via github-copilot - expect( - getThinkingConfig("github-copilot", "claude-opus-4-6-high") - ).toBeNull() - expect(getThinkingConfig("github-copilot", "gpt-5.2-high")).toBeNull() - }) - }) - - describe("Non-thinking-capable models", () => { - it("should return null for non-thinking-capable models", () => { - // given models that don't support thinking mode - expect(getThinkingConfig("anthropic", "claude-2")).toBeNull() - expect(getThinkingConfig("openai", "gpt-4")).toBeNull() - expect(getThinkingConfig("google", "gemini-1")).toBeNull() - }) - }) - - describe("Unknown providers", () => { - it("should return null for unknown providers", () => { - // given unknown provider IDs - expect(getThinkingConfig("unknown-provider", "some-model")).toBeNull() - expect(getThinkingConfig("azure", "gpt-5")).toBeNull() - }) - }) - }) - - describe("Direct provider configs (backwards compatibility)", () => { - it("should still work for direct anthropic provider", () => { - // given direct anthropic provider - const config = getThinkingConfig("anthropic", "claude-opus-4-6") - - // then should return anthropic thinking config - expect(config).not.toBeNull() - expect(config?.thinking).toBeDefined() - expect((config?.thinking as Record)?.type).toBe("enabled") - }) - - it("should work for direct google-vertex-anthropic provider", () => { - //#given direct google-vertex-anthropic provider - const config = getThinkingConfig( - "google-vertex-anthropic", - "claude-opus-4-6" - ) - - //#when thinking config is resolved - - //#then it should return anthropic-style thinking config - expect(config).not.toBeNull() - expect(config?.thinking).toBeDefined() - expect((config?.thinking as Record)?.type).toBe("enabled") - expect((config?.thinking as Record)?.budgetTokens).toBe( - 64000 - ) - }) - - it("should still work for direct google provider", () => { - // given direct google provider - const config = getThinkingConfig("google", "gemini-3-pro") - - // then should return google thinking config - expect(config).not.toBeNull() - expect(config?.providerOptions).toBeDefined() - }) - - it("should still work for amazon-bedrock provider", () => { - // given amazon-bedrock provider with claude model - const config = getThinkingConfig("amazon-bedrock", "claude-sonnet-4-6") - - // then should return bedrock thinking config - expect(config).not.toBeNull() - expect(config?.reasoningConfig).toBeDefined() - }) - - it("should still work for google-vertex provider", () => { - // given google-vertex provider - const config = getThinkingConfig("google-vertex", "gemini-3-pro") - - // then should return google-vertex thinking config - expect(config).not.toBeNull() - expect(config?.providerOptions).toBeDefined() - const vertexOptions = (config?.providerOptions as Record)?.[ - "google-vertex" - ] as Record - expect(vertexOptions?.thinkingConfig).toBeDefined() - }) - - it("should work for direct openai provider", () => { - // given direct openai provider - const config = getThinkingConfig("openai", "gpt-5") - - // then should return openai thinking config - expect(config).not.toBeNull() - expect(config?.reasoning_effort).toBe("high") - }) - }) - - describe("THINKING_CONFIGS structure", () => { - it("should have correct structure for anthropic", () => { - const config = THINKING_CONFIGS.anthropic - expect(config.thinking).toBeDefined() - expect(config.maxTokens).toBe(128000) - }) - - it("should have correct structure for google-vertex-anthropic", () => { - //#given google-vertex-anthropic config entry - const config = THINKING_CONFIGS["google-vertex-anthropic"] - - //#when structure is validated - - //#then it should match anthropic style structure - expect(config.thinking).toBeDefined() - expect(config.maxTokens).toBe(128000) - }) - - it("should have correct structure for google", () => { - const config = THINKING_CONFIGS.google - expect(config.providerOptions).toBeDefined() - }) - - it("should have correct structure for openai", () => { - const config = THINKING_CONFIGS.openai - expect(config.reasoning_effort).toBe("high") - }) - - it("should have correct structure for amazon-bedrock", () => { - const config = THINKING_CONFIGS["amazon-bedrock"] - expect(config.reasoningConfig).toBeDefined() - expect(config.maxTokens).toBe(64000) - }) - }) - describe("Custom provider prefixes support", () => { describe("getHighVariant with prefixes", () => { it("should preserve vertex_ai/ prefix when getting high variant", () => { @@ -390,7 +129,7 @@ describe("think-mode switcher", () => { // given various custom prefixes expect(getHighVariant("azure/gpt-5")).toBe("azure/gpt-5-high") expect(getHighVariant("bedrock/claude-sonnet-4-6")).toBe("bedrock/claude-sonnet-4-6-high") - expect(getHighVariant("custom-llm/gemini-3-pro")).toBe("custom-llm/gemini-3-pro-high") + expect(getHighVariant("custom-llm/gemini-3.1-pro")).toBe("custom-llm/gemini-3-1-pro-high") }) it("should return null for prefixed models without high variant mapping", () => { @@ -411,7 +150,7 @@ describe("think-mode switcher", () => { // given prefixed model IDs with -high suffix expect(isAlreadyHighVariant("vertex_ai/claude-opus-4-6-high")).toBe(true) expect(isAlreadyHighVariant("openai/gpt-5-2-high")).toBe(true) - expect(isAlreadyHighVariant("custom/gemini-3-pro-high")).toBe(true) + expect(isAlreadyHighVariant("custom/gemini-3.1-pro-high")).toBe(true) }) it("should return false for prefixed base models", () => { @@ -426,141 +165,5 @@ describe("think-mode switcher", () => { expect(isAlreadyHighVariant("vertex_ai/gpt-5.2-high")).toBe(true) }) }) - - describe("getThinkingConfig with prefixes", () => { - it("should return null for custom providers (not in THINKING_CONFIGS)", () => { - // given custom provider with prefixed Claude model - const config = getThinkingConfig("dia-llm", "vertex_ai/claude-sonnet-4-6") - - // then should return null (custom provider not in THINKING_CONFIGS) - expect(config).toBeNull() - }) - - it("should work with prefixed models on known providers", () => { - // given known provider (anthropic) with prefixed model - // This tests that the base model name is correctly extracted for capability check - const config = getThinkingConfig("anthropic", "custom-prefix/claude-opus-4-6") - - // then should return thinking config (base model is capable) - expect(config).not.toBeNull() - expect(config?.thinking).toBeDefined() - }) - - it("should return null for prefixed models that are already high", () => { - // given prefixed already-high model - const config = getThinkingConfig("anthropic", "vertex_ai/claude-opus-4-6-high") - - // then should return null - expect(config).toBeNull() - }) - }) - - describe("Real-world custom provider scenario", () => { - it("should handle LLM proxy with vertex_ai prefix correctly", () => { - // given a custom LLM proxy provider using vertex_ai/ prefix - const providerID = "dia-llm" - const modelID = "vertex_ai/claude-sonnet-4-6" - - // when getting high variant - const highVariant = getHighVariant(modelID) - - // then should preserve the prefix - expect(highVariant).toBe("vertex_ai/claude-sonnet-4-6-high") - - // #and when checking if already high - expect(isAlreadyHighVariant(modelID)).toBe(false) - expect(isAlreadyHighVariant(highVariant!)).toBe(true) - - // #and when getting thinking config for custom provider - const config = getThinkingConfig(providerID, modelID) - - // then should return null (custom provider, not anthropic) - // This prevents applying incompatible thinking configs to custom providers - expect(config).toBeNull() - }) - - it("should not break when switching to high variant in think mode", () => { - // given think mode switching vertex_ai/claude model to high variant - const original = "vertex_ai/claude-opus-4-6" - const high = getHighVariant(original) - - // then the high variant should be valid - expect(high).toBe("vertex_ai/claude-opus-4-6-high") - - // #and should be recognized as already high - expect(isAlreadyHighVariant(high!)).toBe(true) - - // #and switching again should return null (already high) - expect(getHighVariant(high!)).toBeNull() - }) - }) - }) - - describe("Z.AI GLM-4.7 provider support", () => { - describe("getThinkingConfig for zai-coding-plan", () => { - it("should return thinking config for glm-5", () => { - //#given a Z.ai GLM model - const config = getThinkingConfig("zai-coding-plan", "glm-5") - - //#when thinking config is resolved - - //#then thinking type is "disabled" - expect(config).not.toBeNull() - expect(config?.providerOptions).toBeDefined() - const zaiOptions = (config?.providerOptions as Record)?.[ - "zai-coding-plan" - ] as Record - expect(zaiOptions?.extra_body).toBeDefined() - const extraBody = zaiOptions?.extra_body as Record - expect(extraBody?.thinking).toBeDefined() - expect((extraBody?.thinking as Record)?.type).toBe("disabled") - }) - - it("should return thinking config for glm-4.6v (multimodal)", () => { - // given zai-coding-plan provider with glm-4.6v model - const config = getThinkingConfig("zai-coding-plan", "glm-4.6v") - - // then should return zai-coding-plan thinking config - expect(config).not.toBeNull() - expect(config?.providerOptions).toBeDefined() - }) - - it("should return null for non-GLM models on zai-coding-plan", () => { - // given zai-coding-plan provider with unknown model - const config = getThinkingConfig("zai-coding-plan", "some-other-model") - - // then should return null - expect(config).toBeNull() - }) - }) - - describe("HIGH_VARIANT_MAP for GLM", () => { - it("should NOT have high variant for glm-5", () => { - // given glm-5 model - const variant = getHighVariant("glm-5") - - // then should return null (no high variant needed) - expect(variant).toBeNull() - }) - - it("should NOT have high variant for glm-4.6v", () => { - // given glm-4.6v model - const variant = getHighVariant("glm-4.6v") - - // then should return null - expect(variant).toBeNull() - }) - }) - }) - - describe("THINKING_CONFIGS structure for zai-coding-plan", () => { - it("should have correct structure for zai-coding-plan", () => { - const config = THINKING_CONFIGS["zai-coding-plan"] - expect(config.providerOptions).toBeDefined() - const zaiOptions = (config.providerOptions as Record)?.[ - "zai-coding-plan" - ] as Record - expect(zaiOptions?.extra_body).toBeDefined() - }) - }) +}) }) diff --git a/src/hooks/think-mode/switcher.ts b/src/hooks/think-mode/switcher.ts index 8d88506d6..0a1a1dd38 100644 --- a/src/hooks/think-mode/switcher.ts +++ b/src/hooks/think-mode/switcher.ts @@ -53,35 +53,7 @@ function normalizeModelID(modelID: string): string { return modelID.replace(/\.(\d+)/g, "-$1") } -/** - * Resolves proxy providers (like github-copilot) to their underlying provider. - * This allows GitHub Copilot to inherit thinking configurations from the actual - * model provider (Anthropic, Google, OpenAI). - * - * @example - * resolveProvider("github-copilot", "claude-opus-4-6") // "anthropic" - * resolveProvider("github-copilot", "gemini-3-pro") // "google" - * resolveProvider("github-copilot", "gpt-5.2") // "openai" - * resolveProvider("anthropic", "claude-opus-4-6") // "anthropic" (unchanged) - */ -function resolveProvider(providerID: string, modelID: string): string { - // GitHub Copilot is a proxy - infer actual provider from model name - if (providerID === "github-copilot") { - const modelLower = modelID.toLowerCase() - if (modelLower.includes("claude")) return "anthropic" - if (modelLower.includes("gemini")) return "google" - if ( - modelLower.includes("gpt") || - modelLower.includes("o1") || - modelLower.includes("o3") - ) { - return "openai" - } - } - // Direct providers or unknown - return as-is - return providerID -} // Maps model IDs to their "high reasoning" variant (internal convention) // For OpenAI models, this signals that reasoning_effort should be set to "high" @@ -90,8 +62,8 @@ const HIGH_VARIANT_MAP: Record = { "claude-sonnet-4-6": "claude-sonnet-4-6-high", "claude-opus-4-6": "claude-opus-4-6-high", // Gemini - "gemini-3-pro": "gemini-3-pro-high", - "gemini-3-pro-low": "gemini-3-pro-high", + "gemini-3-1-pro": "gemini-3-1-pro-high", + "gemini-3-1-pro-low": "gemini-3-1-pro-high", "gemini-3-flash": "gemini-3-flash-high", // GPT-5 "gpt-5": "gpt-5-high", @@ -110,77 +82,12 @@ const HIGH_VARIANT_MAP: Record = { "gpt-5-2-chat-latest": "gpt-5-2-chat-latest-high", "gpt-5-2-pro": "gpt-5-2-pro-high", // Antigravity (Google) - "antigravity-gemini-3-pro": "antigravity-gemini-3-pro-high", + "antigravity-gemini-3-1-pro": "antigravity-gemini-3-1-pro-high", "antigravity-gemini-3-flash": "antigravity-gemini-3-flash-high", } const ALREADY_HIGH: Set = new Set(Object.values(HIGH_VARIANT_MAP)) -export const THINKING_CONFIGS = { - anthropic: { - thinking: { - type: "enabled", - budgetTokens: 64000, - }, - maxTokens: 128000, - }, - "google-vertex-anthropic": { - thinking: { - type: "enabled", - budgetTokens: 64000, - }, - maxTokens: 128000, - }, - "amazon-bedrock": { - reasoningConfig: { - type: "enabled", - budgetTokens: 32000, - }, - maxTokens: 64000, - }, - google: { - providerOptions: { - google: { - thinkingConfig: { - thinkingLevel: "HIGH", - }, - }, - }, - }, - "google-vertex": { - providerOptions: { - "google-vertex": { - thinkingConfig: { - thinkingLevel: "HIGH", - }, - }, - }, - }, - openai: { - reasoning_effort: "high", - }, - "zai-coding-plan": { - providerOptions: { - "zai-coding-plan": { - extra_body: { - thinking: { - type: "disabled", - }, - }, - }, - }, - }, -} as const satisfies Record> - -const THINKING_CAPABLE_MODELS = { - anthropic: ["claude-sonnet-4", "claude-opus-4", "claude-3"], - "google-vertex-anthropic": ["claude-sonnet-4", "claude-opus-4", "claude-3"], - "amazon-bedrock": ["claude", "anthropic"], - google: ["gemini-2", "gemini-3"], - "google-vertex": ["gemini-2", "gemini-3"], - openai: ["gpt-5", "o1", "o3"], - "zai-coding-plan": ["glm"], -} as const satisfies Record export function getHighVariant(modelID: string): string | null { const normalized = normalizeModelID(modelID) @@ -207,37 +114,3 @@ export function isAlreadyHighVariant(modelID: string): boolean { return ALREADY_HIGH.has(base) || base.endsWith("-high") } -type ThinkingProvider = keyof typeof THINKING_CONFIGS - -function isThinkingProvider(provider: string): provider is ThinkingProvider { - return provider in THINKING_CONFIGS -} - -export function getThinkingConfig( - providerID: string, - modelID: string -): Record | null { - const normalized = normalizeModelID(modelID) - const { base } = extractModelPrefix(normalized) - - if (isAlreadyHighVariant(normalized)) { - return null - } - - const resolvedProvider = resolveProvider(providerID, modelID) - - if (!isThinkingProvider(resolvedProvider)) { - return null - } - - const config = THINKING_CONFIGS[resolvedProvider] - const capablePatterns = THINKING_CAPABLE_MODELS[resolvedProvider] - - // Check capability using base model name (without prefix) - const baseLower = base.toLowerCase() - const isCapable = capablePatterns.some((pattern) => - baseLower.includes(pattern.toLowerCase()) - ) - - return isCapable ? config : null -} diff --git a/src/hooks/think-mode/types.ts b/src/hooks/think-mode/types.ts index b17d654d7..a24f1ccab 100644 --- a/src/hooks/think-mode/types.ts +++ b/src/hooks/think-mode/types.ts @@ -1,21 +1,16 @@ export interface ThinkModeState { requested: boolean modelSwitched: boolean - thinkingConfigInjected: boolean + variantSet: boolean providerID?: string modelID?: string } -export interface ModelRef { +interface ModelRef { providerID: string modelID: string } -export interface MessageWithModel { +interface MessageWithModel { model?: ModelRef } - -export interface ThinkModeInput { - parts: Array<{ type: string; text?: string }> - message: MessageWithModel -} diff --git a/src/hooks/todo-continuation-enforcer/constants.ts b/src/hooks/todo-continuation-enforcer/constants.ts index db4d7b1cc..39799c531 100644 --- a/src/hooks/todo-continuation-enforcer/constants.ts +++ b/src/hooks/todo-continuation-enforcer/constants.ts @@ -17,6 +17,6 @@ export const TOAST_DURATION_MS = 900 export const COUNTDOWN_GRACE_PERIOD_MS = 500 export const ABORT_WINDOW_MS = 3000 -export const CONTINUATION_COOLDOWN_MS = 30_000 +export const CONTINUATION_COOLDOWN_MS = 5_000 export const MAX_CONSECUTIVE_FAILURES = 5 export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000 diff --git a/src/hooks/todo-continuation-enforcer/idle-event.ts b/src/hooks/todo-continuation-enforcer/idle-event.ts index 10708d1a3..1f944db59 100644 --- a/src/hooks/todo-continuation-enforcer/idle-event.ts +++ b/src/hooks/todo-continuation-enforcer/idle-event.ts @@ -15,6 +15,7 @@ import { MAX_CONSECUTIVE_FAILURES, } from "./constants" import { isLastAssistantMessageAborted } from "./abort-detection" +import { hasUnansweredQuestion } from "./pending-question-detection" import { getIncompleteCount } from "./todo" import type { MessageInfo, ResolvedMessageInfo, Todo } from "./types" import type { SessionStateStore } from "./session-state" @@ -74,6 +75,10 @@ export async function handleSessionIdle(args: { log(`[${HOOK_NAME}] Skipped: last assistant message was aborted (API fallback)`, { sessionID }) return } + if (hasUnansweredQuestion(messages)) { + log(`[${HOOK_NAME}] Skipped: pending question awaiting user response`, { sessionID }) + return + } } catch (error) { log(`[${HOOK_NAME}] Messages fetch failed, continuing`, { sessionID, error: String(error) }) } diff --git a/src/hooks/todo-continuation-enforcer/pending-question-detection.test.ts b/src/hooks/todo-continuation-enforcer/pending-question-detection.test.ts new file mode 100644 index 000000000..5ea4b214c --- /dev/null +++ b/src/hooks/todo-continuation-enforcer/pending-question-detection.test.ts @@ -0,0 +1,100 @@ +/// +import { describe, expect, test } from "bun:test" + +import { hasUnansweredQuestion } from "./pending-question-detection" + +describe("hasUnansweredQuestion", () => { + test("given empty messages, returns false", () => { + expect(hasUnansweredQuestion([])).toBe(false) + }) + + test("given null-ish input, returns false", () => { + expect(hasUnansweredQuestion(undefined as never)).toBe(false) + }) + + test("given last assistant message with question tool_use, returns true", () => { + const messages = [ + { info: { role: "user" } }, + { + info: { role: "assistant" }, + parts: [ + { type: "tool_use", name: "question" }, + ], + }, + ] + expect(hasUnansweredQuestion(messages)).toBe(true) + }) + + test("given last assistant message with question tool-invocation, returns true", () => { + const messages = [ + { info: { role: "user" } }, + { + info: { role: "assistant" }, + parts: [ + { type: "tool-invocation", toolName: "question" }, + ], + }, + ] + expect(hasUnansweredQuestion(messages)).toBe(true) + }) + + test("given user message after question (answered), returns false", () => { + const messages = [ + { + info: { role: "assistant" }, + parts: [ + { type: "tool_use", name: "question" }, + ], + }, + { info: { role: "user" } }, + ] + expect(hasUnansweredQuestion(messages)).toBe(false) + }) + + test("given assistant message with non-question tool, returns false", () => { + const messages = [ + { info: { role: "user" } }, + { + info: { role: "assistant" }, + parts: [ + { type: "tool_use", name: "bash" }, + ], + }, + ] + expect(hasUnansweredQuestion(messages)).toBe(false) + }) + + test("given assistant message with no parts, returns false", () => { + const messages = [ + { info: { role: "user" } }, + { info: { role: "assistant" } }, + ] + expect(hasUnansweredQuestion(messages)).toBe(false) + }) + + test("given role on message directly (not in info), returns true for question", () => { + const messages = [ + { role: "user" }, + { + role: "assistant", + parts: [ + { type: "tool_use", name: "question" }, + ], + }, + ] + expect(hasUnansweredQuestion(messages)).toBe(true) + }) + + test("given mixed tools including question, returns true", () => { + const messages = [ + { + info: { role: "assistant" }, + parts: [ + { type: "tool_use", name: "bash" }, + { type: "tool_use", name: "question" }, + ], + }, + ] + expect(hasUnansweredQuestion(messages)).toBe(true) + }) +}) diff --git a/src/hooks/todo-continuation-enforcer/pending-question-detection.ts b/src/hooks/todo-continuation-enforcer/pending-question-detection.ts new file mode 100644 index 000000000..fd97b6c35 --- /dev/null +++ b/src/hooks/todo-continuation-enforcer/pending-question-detection.ts @@ -0,0 +1,40 @@ +import { log } from "../../shared/logger" +import { HOOK_NAME } from "./constants" + +interface MessagePart { + type: string + name?: string + toolName?: string +} + +interface Message { + info?: { role?: string } + role?: string + parts?: MessagePart[] +} + +export function hasUnansweredQuestion(messages: Message[]): boolean { + if (!messages || messages.length === 0) return false + + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + const role = msg.info?.role ?? msg.role + + if (role === "user") return false + + if (role === "assistant" && msg.parts) { + const hasQuestion = msg.parts.some( + (part) => + (part.type === "tool_use" || part.type === "tool-invocation") && + (part.name === "question" || part.toolName === "question"), + ) + if (hasQuestion) { + log(`[${HOOK_NAME}] Detected pending question tool in last assistant message`) + return true + } + return false + } + } + + return false +} diff --git a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts index 19d2222f6..f8e7be079 100644 --- a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts +++ b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts @@ -297,6 +297,31 @@ describe("todo-continuation-enforcer", () => { expect(promptCalls).toHaveLength(0) }) + test("should not inject when remaining todos are blocked or deleted", async () => { + // given - session where non-completed todos are only blocked/deleted + const sessionID = "main-blocked-deleted" + setMainSession(sessionID) + + const mockInput = createMockPluginInput() + mockInput.client.session.todo = async () => ({ data: [ + { id: "1", content: "Blocked task", status: "blocked", priority: "high" }, + { id: "2", content: "Deleted task", status: "deleted", priority: "medium" }, + { id: "3", content: "Done task", status: "completed", priority: "low" }, + ]}) + + const hook = createTodoContinuationEnforcer(mockInput, {}) + + // when - session goes idle + await hook.handler({ + event: { type: "session.idle", properties: { sessionID } }, + }) + + await fakeTimers.advanceBy(3000) + + // then - no continuation injected + expect(promptCalls).toHaveLength(0) + }) + test("should not inject when background tasks are running", async () => { // given - session with running background tasks const sessionID = "main-789" @@ -1663,7 +1688,6 @@ describe("todo-continuation-enforcer", () => { test("should cancel all countdowns via cancelAllCountdowns", async () => { // given - multiple sessions with running countdowns const session1 = "main-cancel-all-1" - const session2 = "main-cancel-all-2" setMainSession(session1) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) diff --git a/src/hooks/todo-continuation-enforcer/todo.ts b/src/hooks/todo-continuation-enforcer/todo.ts index dbc6f5b61..1847cb527 100644 --- a/src/hooks/todo-continuation-enforcer/todo.ts +++ b/src/hooks/todo-continuation-enforcer/todo.ts @@ -1,5 +1,11 @@ import type { Todo } from "./types" export function getIncompleteCount(todos: Todo[]): number { - return todos.filter((todo) => todo.status !== "completed" && todo.status !== "cancelled").length + return todos.filter( + (todo) => + todo.status !== "completed" + && todo.status !== "cancelled" + && todo.status !== "blocked" + && todo.status !== "deleted", + ).length } diff --git a/src/plugin-config.test.ts b/src/plugin-config.test.ts index 549c5f1e8..5e2cd08aa 100644 --- a/src/plugin-config.test.ts +++ b/src/plugin-config.test.ts @@ -32,7 +32,7 @@ describe("mergeConfigs", () => { temperature: 0.3, }, visual: { - model: "google/gemini-3-pro", + model: "google/gemini-3.1-pro", }, }, } as unknown as OhMyOpenCodeConfig; @@ -46,7 +46,7 @@ describe("mergeConfigs", () => { // then quick should be preserved from base expect(result.categories?.quick?.model).toBe("anthropic/claude-haiku-4-5"); // then visual should be added from override - expect(result.categories?.visual?.model).toBe("google/gemini-3-pro"); + expect(result.categories?.visual?.model).toBe("google/gemini-3.1-pro"); }); it("should preserve base categories when override has no categories", () => { diff --git a/src/plugin-handlers/agent-config-handler.ts b/src/plugin-handlers/agent-config-handler.ts index 088bb1d06..7d8893be8 100644 --- a/src/plugin-handlers/agent-config-handler.ts +++ b/src/plugin-handlers/agent-config-handler.ts @@ -135,7 +135,14 @@ export async function applyAgentConfig(params: { useTaskSystem, disableOmoEnv, ); + const disabledAgentNames = new Set( + (migratedDisabledAgents ?? []).map(a => a.toLowerCase()) + ); + const filterDisabledAgents = (agents: Record) => + Object.fromEntries( + Object.entries(agents).filter(([name]) => !disabledAgentNames.has(name.toLowerCase())) + ); const isSisyphusEnabled = params.pluginConfig.sisyphus_agent?.disabled !== true; const builderEnabled = params.pluginConfig.sisyphus_agent?.default_builder_enabled ?? false; @@ -223,9 +230,9 @@ export async function applyAgentConfig(params: { ...Object.fromEntries( Object.entries(builtinAgents).filter(([key]) => key !== "sisyphus"), ), - ...userAgents, - ...projectAgents, - ...pluginAgents, + ...filterDisabledAgents(userAgents), + ...filterDisabledAgents(projectAgents), + ...filterDisabledAgents(pluginAgents), ...filteredConfigAgents, build: { ...migratedBuild, mode: "subagent", hidden: true }, ...(planDemoteConfig ? { plan: planDemoteConfig } : {}), @@ -233,9 +240,9 @@ export async function applyAgentConfig(params: { } else { params.config.agent = { ...builtinAgents, - ...userAgents, - ...projectAgents, - ...pluginAgents, + ...filterDisabledAgents(userAgents), + ...filterDisabledAgents(projectAgents), + ...filterDisabledAgents(pluginAgents), ...configAgent, }; } diff --git a/src/plugin-handlers/agent-key-remapper.test.ts b/src/plugin-handlers/agent-key-remapper.test.ts index fe78ea739..179f54d10 100644 --- a/src/plugin-handlers/agent-key-remapper.test.ts +++ b/src/plugin-handlers/agent-key-remapper.test.ts @@ -2,7 +2,7 @@ import { describe, it, expect } from "bun:test" import { remapAgentKeysToDisplayNames } from "./agent-key-remapper" describe("remapAgentKeysToDisplayNames", () => { - it("remaps known agent keys to display names", () => { + it("remaps known agent keys to display names while preserving original keys", () => { // given agents with lowercase keys const agents = { sisyphus: { prompt: "test", mode: "primary" }, @@ -12,10 +12,11 @@ describe("remapAgentKeysToDisplayNames", () => { // when remapping const result = remapAgentKeysToDisplayNames(agents) - // then known agents get display name keys + // then known agents get display name keys and original keys remain accessible expect(result["Sisyphus (Ultraworker)"]).toBeDefined() expect(result["oracle"]).toBeDefined() - expect(result["sisyphus"]).toBeUndefined() + expect(result["sisyphus"]).toBeDefined() + expect(result["Sisyphus (Ultraworker)"]).toBe(result["sisyphus"]) }) it("preserves unknown agent keys unchanged", () => { @@ -31,7 +32,7 @@ describe("remapAgentKeysToDisplayNames", () => { expect(result["custom-agent"]).toBeDefined() }) - it("remaps all core agents", () => { + it("remaps all core agents while preserving original keys", () => { // given all core agents const agents = { sisyphus: {}, @@ -46,15 +47,20 @@ describe("remapAgentKeysToDisplayNames", () => { // when remapping const result = remapAgentKeysToDisplayNames(agents) - // then all get display name keys - expect(Object.keys(result)).toEqual([ - "Sisyphus (Ultraworker)", - "Hephaestus (Deep Agent)", - "Prometheus (Plan Builder)", - "Atlas (Plan Executor)", - "Metis (Plan Consultant)", - "Momus (Plan Critic)", - "Sisyphus-Junior", - ]) + // then all get display name keys while original keys still work + expect(result["Sisyphus (Ultraworker)"]).toBeDefined() + expect(result["sisyphus"]).toBeDefined() + expect(result["Hephaestus (Deep Agent)"]).toBeDefined() + expect(result["hephaestus"]).toBeDefined() + expect(result["Prometheus (Plan Builder)"]).toBeDefined() + expect(result["prometheus"]).toBeDefined() + expect(result["Atlas (Plan Executor)"]).toBeDefined() + expect(result["atlas"]).toBeDefined() + expect(result["Metis (Plan Consultant)"]).toBeDefined() + expect(result["metis"]).toBeDefined() + expect(result["Momus (Plan Critic)"]).toBeDefined() + expect(result["momus"]).toBeDefined() + expect(result["Sisyphus-Junior"]).toBeDefined() + expect(result["sisyphus-junior"]).toBeDefined() }) }) diff --git a/src/plugin-handlers/agent-key-remapper.ts b/src/plugin-handlers/agent-key-remapper.ts index dd2a127e0..c60bcfcb9 100644 --- a/src/plugin-handlers/agent-key-remapper.ts +++ b/src/plugin-handlers/agent-key-remapper.ts @@ -9,6 +9,7 @@ export function remapAgentKeysToDisplayNames( const displayName = AGENT_DISPLAY_NAMES[key] if (displayName && displayName !== key) { result[displayName] = value + result[key] = value } else { result[key] = value } diff --git a/src/plugin-handlers/config-handler-formatter.test.ts b/src/plugin-handlers/config-handler-formatter.test.ts new file mode 100644 index 000000000..d8fb8494f --- /dev/null +++ b/src/plugin-handlers/config-handler-formatter.test.ts @@ -0,0 +1,120 @@ +import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test" + +import type { OhMyOpenCodeConfig } from "../config" +import { createConfigHandler } from "./config-handler" +import * as agentConfigHandler from "./agent-config-handler" +import * as commandConfigHandler from "./command-config-handler" +import * as mcpConfigHandler from "./mcp-config-handler" +import * as pluginComponentsLoader from "./plugin-components-loader" +import * as providerConfigHandler from "./provider-config-handler" +import * as shared from "../shared" +import * as toolConfigHandler from "./tool-config-handler" + +let logSpy: ReturnType +let loadPluginComponentsSpy: ReturnType +let applyAgentConfigSpy: ReturnType +let applyToolConfigSpy: ReturnType +let applyMcpConfigSpy: ReturnType +let applyCommandConfigSpy: ReturnType +let applyProviderConfigSpy: ReturnType + +beforeEach(() => { + logSpy = spyOn(shared, "log").mockImplementation(() => {}) + loadPluginComponentsSpy = spyOn( + pluginComponentsLoader, + "loadPluginComponents", + ).mockResolvedValue({ + commands: {}, + skills: {}, + agents: {}, + mcpServers: {}, + hooksConfigs: [], + plugins: [], + errors: [], + }) + applyAgentConfigSpy = spyOn(agentConfigHandler, "applyAgentConfig").mockResolvedValue( + {}, + ) + applyToolConfigSpy = spyOn(toolConfigHandler, "applyToolConfig").mockImplementation( + () => {}, + ) + applyMcpConfigSpy = spyOn(mcpConfigHandler, "applyMcpConfig").mockResolvedValue() + applyCommandConfigSpy = spyOn( + commandConfigHandler, + "applyCommandConfig", + ).mockResolvedValue() + applyProviderConfigSpy = spyOn( + providerConfigHandler, + "applyProviderConfig", + ).mockImplementation(() => {}) +}) + +afterEach(() => { + logSpy.mockRestore() + loadPluginComponentsSpy.mockRestore() + applyAgentConfigSpy.mockRestore() + applyToolConfigSpy.mockRestore() + applyMcpConfigSpy.mockRestore() + applyCommandConfigSpy.mockRestore() + applyProviderConfigSpy.mockRestore() +}) + +describe("createConfigHandler formatter pass-through", () => { + test("preserves formatter object configured in opencode config", async () => { + // given + const pluginConfig: OhMyOpenCodeConfig = {} + const formatterConfig = { + prettier: { + command: ["prettier", "--write"], + extensions: [".ts", ".tsx"], + environment: { + PRETTIERD_DEFAULT_CONFIG: ".prettierrc", + }, + }, + eslint: { + disabled: false, + command: ["eslint", "--fix"], + extensions: [".js", ".ts"], + }, + } + const config: Record = { + formatter: formatterConfig, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + // when + await handler(config) + + // then + expect(config.formatter).toEqual(formatterConfig) + }) + + test("preserves formatter=false configured in opencode config", async () => { + // given + const pluginConfig: OhMyOpenCodeConfig = {} + const config: Record = { + formatter: false, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + // when + await handler(config) + + // then + expect(config.formatter).toBe(false) + }) +}) diff --git a/src/plugin-handlers/config-handler.test.ts b/src/plugin-handlers/config-handler.test.ts index 264460f1d..6896898c2 100644 --- a/src/plugin-handlers/config-handler.test.ts +++ b/src/plugin-handlers/config-handler.test.ts @@ -823,7 +823,7 @@ describe("Prometheus category config resolution", () => { // then expect(config).toBeDefined() - expect(config?.model).toBe("google/gemini-3-pro") + expect(config?.model).toBe("google/gemini-3.1-pro") }) test("user categories override default categories", () => { diff --git a/src/plugin-handlers/config-handler.ts b/src/plugin-handlers/config-handler.ts index e9b814a4f..47050300f 100644 --- a/src/plugin-handlers/config-handler.ts +++ b/src/plugin-handlers/config-handler.ts @@ -20,6 +20,8 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { const { ctx, pluginConfig, modelCacheState } = deps; return async (config: Record) => { + const formatterConfig = config.formatter; + applyProviderConfig({ config, modelCacheState }); const pluginComponents = await loadPluginComponents({ pluginConfig }); @@ -35,6 +37,8 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { await applyMcpConfig({ config, pluginConfig, pluginComponents }); await applyCommandConfig({ config, pluginConfig, ctx, pluginComponents }); + config.formatter = formatterConfig; + log("[config-handler] config handler applied", { agentCount: Object.keys(agentResult).length, commandCount: Object.keys((config.command as Record) ?? {}) diff --git a/src/plugin/chat-message.test.ts b/src/plugin/chat-message.test.ts index 8cebd6b43..a10968303 100644 --- a/src/plugin/chat-message.test.ts +++ b/src/plugin/chat-message.test.ts @@ -19,6 +19,7 @@ function createMockHandlerArgs(overrides?: { }, hooks: { stopContinuationGuard: null, + backgroundNotificationHook: null, keywordDetector: null, claudeCodeHooks: null, autoSlashCommand: null, @@ -115,4 +116,30 @@ describe("createChatMessageHandler - TUI variant passthrough", () => { //#then - gate should still be marked as applied expect(args._appliedSessions).toContain("test-session") }) + + test("injects queued background notifications through chat.message hook", async () => { + //#given + const args = createMockHandlerArgs() + args.hooks.backgroundNotificationHook = { + "chat.message": async ( + _input: { sessionID: string }, + output: ChatMessageHandlerOutput, + ): Promise => { + output.parts.push({ + type: "text", + text: "[BACKGROUND TASK COMPLETED]", + }) + }, + } + const handler = createChatMessageHandler(args) + const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" }) + const output = createMockOutput() + + //#when + await handler(input, output) + + //#then + expect(output.parts).toHaveLength(1) + expect(output.parts[0].text).toContain("[BACKGROUND TASK COMPLETED]") + }) }) diff --git a/src/plugin/chat-message.ts b/src/plugin/chat-message.ts index f3c02297f..2cc55c892 100644 --- a/src/plugin/chat-message.ts +++ b/src/plugin/chat-message.ts @@ -97,8 +97,10 @@ export function createChatMessageHandler(args: { setSessionModel(input.sessionID, input.model) } await hooks.stopContinuationGuard?.["chat.message"]?.(input) + await hooks.backgroundNotificationHook?.["chat.message"]?.(input, output) await hooks.runtimeFallback?.["chat.message"]?.(input, output) await hooks.keywordDetector?.["chat.message"]?.(input, output) + await hooks.thinkMode?.["chat.message"]?.(input, output) await hooks.claudeCodeHooks?.["chat.message"]?.(input, output) await hooks.autoSlashCommand?.["chat.message"]?.(input, output) await hooks.noSisyphusGpt?.["chat.message"]?.(input, output) diff --git a/src/plugin/hooks/create-continuation-hooks.ts b/src/plugin/hooks/create-continuation-hooks.ts index 96bf5de0c..da453f58d 100644 --- a/src/plugin/hooks/create-continuation-hooks.ts +++ b/src/plugin/hooks/create-continuation-hooks.ts @@ -49,7 +49,10 @@ export function createContinuationHooks(args: { safeCreateHook(hookName, factory, { enabled: safeHookEnabled }) const stopContinuationGuard = isHookEnabled("stop-continuation-guard") - ? safeHook("stop-continuation-guard", () => createStopContinuationGuardHook(ctx)) + ? safeHook("stop-continuation-guard", () => + createStopContinuationGuardHook(ctx, { + backgroundManager, + })) : null const compactionContextInjector = isHookEnabled("compaction-context-injector") diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts index daa4e12e0..daa5e4ff5 100644 --- a/src/plugin/hooks/create-session-hooks.ts +++ b/src/plugin/hooks/create-session-hooks.ts @@ -232,7 +232,10 @@ export function createSessionHooks(args: { : null const noHephaestusNonGpt = isHookEnabled("no-hephaestus-non-gpt") - ? safeHook("no-hephaestus-non-gpt", () => createNoHephaestusNonGptHook(ctx)) + ? safeHook("no-hephaestus-non-gpt", () => + createNoHephaestusNonGptHook(ctx, { + allowNonGptModel: pluginConfig.agents?.hephaestus?.allow_non_gpt_model, + })) : null const questionLabelTruncator = isHookEnabled("question-label-truncator") diff --git a/src/plugin/ultrawork-db-model-override.ts b/src/plugin/ultrawork-db-model-override.ts index 17d84a928..9009fb066 100644 --- a/src/plugin/ultrawork-db-model-override.ts +++ b/src/plugin/ultrawork-db-model-override.ts @@ -21,11 +21,10 @@ function tryUpdateMessageModel( ) const result = stmt.run(targetModel.providerID, targetModel.modelID, messageId) if (result.changes === 0) return false - if (variant) { db.prepare( - `UPDATE message SET data = json_set(data, '$.variant', ?, '$.thinking', ?) WHERE id = ?`, - ).run(variant, variant, messageId) + `UPDATE message SET data = json_set(data, '$.variant', ?) WHERE id = ?`, + ).run(variant, messageId) } return true } diff --git a/src/plugin/ultrawork-model-override.test.ts b/src/plugin/ultrawork-model-override.test.ts index 4f167e963..26dae2415 100644 --- a/src/plugin/ultrawork-model-override.test.ts +++ b/src/plugin/ultrawork-model-override.test.ts @@ -279,6 +279,30 @@ describe("applyUltraworkModelOverrideOnMessage", () => { ) }) + test("should override keyword-detector variant with configured ultrawork variant on deferred path", () => { + //#given + const config = createConfig("sisyphus", { + model: "anthropic/claude-opus-4-6", + variant: "extended", + }) + const output = createOutput("ultrawork do something", { messageId: "msg_123" }) + output.message["variant"] = "max" + output.message["thinking"] = "max" + const tui = createMockTui() + + //#when + applyUltraworkModelOverrideOnMessage(config, "sisyphus", output, tui) + + //#then + expect(dbOverrideSpy).toHaveBeenCalledWith( + "msg_123", + { providerID: "anthropic", modelID: "claude-opus-4-6" }, + "extended", + ) + expect(output.message["variant"]).toBe("extended") + expect(output.message["thinking"]).toBe("extended") + }) + test("should NOT mutate output.message.model when message ID present", () => { //#given const sonnetModel = { providerID: "anthropic", modelID: "claude-sonnet-4-6" } @@ -308,7 +332,6 @@ describe("applyUltraworkModelOverrideOnMessage", () => { //#then expect(output.message.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" }) expect(output.message["variant"]).toBe("max") - expect(output.message["thinking"]).toBe("max") expect(dbOverrideSpy).not.toHaveBeenCalled() }) @@ -324,7 +347,6 @@ describe("applyUltraworkModelOverrideOnMessage", () => { //#then expect(output.message.model).toBeUndefined() expect(output.message["variant"]).toBe("high") - expect(output.message["thinking"]).toBe("high") expect(dbOverrideSpy).not.toHaveBeenCalled() }) diff --git a/src/plugin/ultrawork-model-override.ts b/src/plugin/ultrawork-model-override.ts index f6aa87bd2..736926bf6 100644 --- a/src/plugin/ultrawork-model-override.ts +++ b/src/plugin/ultrawork-model-override.ts @@ -114,11 +114,12 @@ export function applyUltraworkModelOverrideOnMessage( const override = resolveUltraworkOverride(pluginConfig, inputAgentName, output, sessionID) if (!override) return + if (override.variant) { + output.message["variant"] = override.variant + output.message["thinking"] = override.variant + } + if (!override.providerID || !override.modelID) { - if (override.variant) { - output.message["variant"] = override.variant - output.message["thinking"] = override.variant - } return } @@ -132,11 +133,8 @@ export function applyUltraworkModelOverrideOnMessage( if (!messageId) { log("[ultrawork-model-override] No message ID found, falling back to direct mutation") output.message.model = targetModel - if (override.variant) { - output.message["variant"] = override.variant - output.message["thinking"] = override.variant - } return + } const fromModel = (output.message.model as { modelID?: string } | undefined)?.modelID ?? "unknown" diff --git a/src/shared/migration.test.ts b/src/shared/migration.test.ts index 7846cc725..eb3d1d101 100644 --- a/src/shared/migration.test.ts +++ b/src/shared/migration.test.ts @@ -774,7 +774,7 @@ describe("migrateAgentConfigToCategory", () => { test("migrates model to category when mapping exists", () => { // given: Config with a model that has a category mapping const config = { - model: "google/gemini-3-pro", + model: "google/gemini-3.1-pro", temperature: 0.5, top_p: 0.9, } @@ -823,7 +823,7 @@ describe("migrateAgentConfigToCategory", () => { test("handles all mapped models correctly", () => { // given: Configs for each mapped model const configs = [ - { model: "google/gemini-3-pro" }, + { model: "google/gemini-3.1-pro" }, { model: "google/gemini-3-flash" }, { model: "openai/gpt-5.2" }, { model: "anthropic/claude-haiku-4-5" }, @@ -893,7 +893,7 @@ describe("shouldDeleteAgentConfig", () => { // given: Config with fields matching category defaults const config = { category: "visual-engineering", - model: "google/gemini-3-pro", + model: "google/gemini-3.1-pro", } // when: Check if config should be deleted @@ -1021,7 +1021,7 @@ describe("migrateConfigFile with backup", () => { agents: { "multimodal-looker": { model: "anthropic/claude-haiku-4-5" }, oracle: { model: "openai/gpt-5.2" }, - "my-custom-agent": { model: "google/gemini-3-pro" }, + "my-custom-agent": { model: "google/gemini-3.1-pro" }, }, } @@ -1037,7 +1037,7 @@ describe("migrateConfigFile with backup", () => { const agents = rawConfig.agents as Record> expect(agents["multimodal-looker"].model).toBe("anthropic/claude-haiku-4-5") expect(agents.oracle.model).toBe("openai/gpt-5.2") - expect(agents["my-custom-agent"].model).toBe("google/gemini-3-pro") + expect(agents["my-custom-agent"].model).toBe("google/gemini-3.1-pro") }) test("preserves category setting when explicitly set", () => { diff --git a/src/shared/migration/agent-category.ts b/src/shared/migration/agent-category.ts index 51aac23d7..8b7df75a4 100644 --- a/src/shared/migration/agent-category.ts +++ b/src/shared/migration/agent-category.ts @@ -12,7 +12,7 @@ * This map will be removed in a future major version once migration period ends. */ export const MODEL_TO_CATEGORY_MAP: Record = { - "google/gemini-3-pro": "visual-engineering", + "google/gemini-3.1-pro": "visual-engineering", "google/gemini-3-flash": "writing", "openai/gpt-5.2": "ultrabrain", "anthropic/claude-haiku-4-5": "quick", diff --git a/src/shared/model-availability.test.ts b/src/shared/model-availability.test.ts index 23a3f00f6..cb469b960 100644 --- a/src/shared/model-availability.test.ts +++ b/src/shared/model-availability.test.ts @@ -63,7 +63,7 @@ describe("fetchAvailableModels", () => { writeModelsCache({ openai: { id: "openai", models: { "gpt-5.2": { id: "gpt-5.2" } } }, anthropic: { id: "anthropic", models: { "claude-opus-4-6": { id: "claude-opus-4-6" } } }, - google: { id: "google", models: { "gemini-3-pro": { id: "gemini-3-pro" } } }, + google: { id: "google", models: { "gemini-3.1-pro": { id: "gemini-3.1-pro" } } }, }) const result = await fetchAvailableModels(undefined, { @@ -74,7 +74,7 @@ describe("fetchAvailableModels", () => { expect(result.size).toBe(3) expect(result.has("openai/gpt-5.2")).toBe(true) expect(result.has("anthropic/claude-opus-4-6")).toBe(true) - expect(result.has("google/gemini-3-pro")).toBe(true) + expect(result.has("google/gemini-3.1-pro")).toBe(true) }) it("#given connectedProviders unknown #when fetchAvailableModels called without options #then returns empty Set", async () => { @@ -97,7 +97,7 @@ describe("fetchAvailableModels", () => { list: async () => ({ data: [ { id: "gpt-5.3-codex", provider: "openai" }, - { id: "gemini-3-pro", provider: "google" }, + { id: "gemini-3.1-pro", provider: "google" }, ], }), }, @@ -107,7 +107,7 @@ describe("fetchAvailableModels", () => { expect(result).toBeInstanceOf(Set) expect(result.has("openai/gpt-5.3-codex")).toBe(true) - expect(result.has("google/gemini-3-pro")).toBe(false) + expect(result.has("google/gemini-3.1-pro")).toBe(false) }) it("#given cache file not found #when fetchAvailableModels called with connectedProviders #then returns empty Set", async () => { @@ -126,7 +126,7 @@ describe("fetchAvailableModels", () => { list: async () => ({ data: [ { id: "gpt-5.3-codex", provider: "openai" }, - { id: "gemini-3-pro", provider: "google" }, + { id: "gemini-3.1-pro", provider: "google" }, ], }), }, @@ -136,7 +136,7 @@ describe("fetchAvailableModels", () => { expect(result).toBeInstanceOf(Set) expect(result.has("openai/gpt-5.3-codex")).toBe(true) - expect(result.has("google/gemini-3-pro")).toBe(true) + expect(result.has("google/gemini-3.1-pro")).toBe(true) }) it("#given cache read twice #when second call made with same providers #then reads fresh each time", async () => { @@ -515,7 +515,7 @@ describe("fetchAvailableModels with connected providers filtering", () => { writeModelsCache({ openai: { models: { "gpt-5.2": { id: "gpt-5.2" } } }, anthropic: { models: { "claude-opus-4-6": { id: "claude-opus-4-6" } } }, - google: { models: { "gemini-3-pro": { id: "gemini-3-pro" } } }, + google: { models: { "gemini-3.1-pro": { id: "gemini-3.1-pro" } } }, }) const result = await fetchAvailableModels(undefined, { @@ -525,7 +525,7 @@ describe("fetchAvailableModels with connected providers filtering", () => { expect(result.size).toBe(1) expect(result.has("anthropic/claude-opus-4-6")).toBe(true) expect(result.has("openai/gpt-5.2")).toBe(false) - expect(result.has("google/gemini-3-pro")).toBe(false) + expect(result.has("google/gemini-3.1-pro")).toBe(false) }) // given cache with multiple providers @@ -535,7 +535,7 @@ describe("fetchAvailableModels with connected providers filtering", () => { writeModelsCache({ openai: { models: { "gpt-5.2": { id: "gpt-5.2" } } }, anthropic: { models: { "claude-opus-4-6": { id: "claude-opus-4-6" } } }, - google: { models: { "gemini-3-pro": { id: "gemini-3-pro" } } }, + google: { models: { "gemini-3.1-pro": { id: "gemini-3.1-pro" } } }, }) const result = await fetchAvailableModels(undefined, { @@ -544,7 +544,7 @@ describe("fetchAvailableModels with connected providers filtering", () => { expect(result.size).toBe(2) expect(result.has("anthropic/claude-opus-4-6")).toBe(true) - expect(result.has("google/gemini-3-pro")).toBe(true) + expect(result.has("google/gemini-3.1-pro")).toBe(true) expect(result.has("openai/gpt-5.2")).toBe(false) }) @@ -759,7 +759,7 @@ describe("fetchAvailableModels with provider-models cache (whitelist-filtered)", models: { opencode: ["big-pickle"], anthropic: ["claude-opus-4-6"], - google: ["gemini-3-pro"] + google: ["gemini-3.1-pro"] }, connected: ["opencode", "anthropic", "google"] }) @@ -771,7 +771,7 @@ describe("fetchAvailableModels with provider-models cache (whitelist-filtered)", expect(result.size).toBe(1) expect(result.has("opencode/big-pickle")).toBe(true) expect(result.has("anthropic/claude-opus-4-6")).toBe(false) - expect(result.has("google/gemini-3-pro")).toBe(false) + expect(result.has("google/gemini-3.1-pro")).toBe(false) }) it("should handle object[] format with metadata (Ollama-style)", async () => { @@ -953,7 +953,7 @@ describe("fallback model availability", () => { { providers: ["openai"], model: "gpt-5.2" }, { providers: ["anthropic"], model: "claude-opus-4-6" }, ] - const availableModels = new Set(["google/gemini-3-pro"]) + const availableModels = new Set(["google/gemini-3.1-pro"]) // when const result = resolveFirstAvailableFallback(fallbackChain, availableModels) diff --git a/src/shared/model-requirements.test.ts b/src/shared/model-requirements.test.ts index 2991775eb..df4cd696a 100644 --- a/src/shared/model-requirements.test.ts +++ b/src/shared/model-requirements.test.ts @@ -168,14 +168,14 @@ describe("AGENT_MODEL_REQUIREMENTS", () => { expect(primary.providers[0]).toBe("opencode") }) - test("hephaestus requires openai/opencode provider (not github-copilot since gpt-5.3-codex unavailable there)", () => { + test("hephaestus supports openai, github-copilot, venice, and opencode providers", () => { // #given - hephaestus agent requirement const hephaestus = AGENT_MODEL_REQUIREMENTS["hephaestus"] // #when - accessing hephaestus requirement - // #then - requiresProvider is set to openai and opencode only (github-copilot removed) + // #then - requiresProvider includes openai, github-copilot, venice, and opencode expect(hephaestus).toBeDefined() - expect(hephaestus.requiresProvider).toEqual(["openai", "opencode"]) + expect(hephaestus.requiresProvider).toEqual(["openai", "github-copilot", "venice", "opencode"]) expect(hephaestus.requiresModel).toBeUndefined() }) @@ -248,19 +248,19 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => { expect(primary.providers[0]).toBe("openai") }) - test("visual-engineering has valid fallbackChain with gemini-3-pro high as primary", () => { + test("visual-engineering has valid fallbackChain with gemini-3.1-pro high as primary", () => { // given - visual-engineering category requirement const visualEngineering = CATEGORY_MODEL_REQUIREMENTS["visual-engineering"] // when - accessing visual-engineering requirement - // then - fallbackChain: gemini-3-pro(high) → glm-5 → opus-4-6(max) + // then - fallbackChain: gemini-3.1-pro(high) → glm-5 → opus-4-6(max) expect(visualEngineering).toBeDefined() expect(visualEngineering.fallbackChain).toBeArray() expect(visualEngineering.fallbackChain).toHaveLength(3) const primary = visualEngineering.fallbackChain[0] expect(primary.providers[0]).toBe("google") - expect(primary.model).toBe("gemini-3-pro") + expect(primary.model).toBe("gemini-3.1-pro") expect(primary.variant).toBe("high") const second = visualEngineering.fallbackChain[1] @@ -319,39 +319,43 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => { expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"]) }) - test("artistry has valid fallbackChain with gemini-3-pro as primary", () => { + test("artistry has valid fallbackChain with gemini-3.1-pro as primary", () => { // given - artistry category requirement const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"] // when - accessing artistry requirement - // then - fallbackChain exists with gemini-3-pro as first entry + // then - fallbackChain exists with gemini-3.1-pro as first entry expect(artistry).toBeDefined() expect(artistry.fallbackChain).toBeArray() expect(artistry.fallbackChain.length).toBeGreaterThan(0) const primary = artistry.fallbackChain[0] - expect(primary.model).toBe("gemini-3-pro") + expect(primary.model).toBe("gemini-3.1-pro") expect(primary.variant).toBe("high") expect(primary.providers[0]).toBe("google") }) - test("writing has valid fallbackChain with gemini-3-flash as primary", () => { + test("writing has valid fallbackChain with kimi-k2.5-free as primary", () => { // given - writing category requirement const writing = CATEGORY_MODEL_REQUIREMENTS["writing"] // when - accessing writing requirement - // then - fallbackChain: gemini-3-flash → claude-sonnet-4-6 + // then - fallbackChain: kimi-k2.5-free -> gemini-3-flash -> claude-sonnet-4-6 expect(writing).toBeDefined() expect(writing.fallbackChain).toBeArray() - expect(writing.fallbackChain).toHaveLength(2) + expect(writing.fallbackChain).toHaveLength(3) const primary = writing.fallbackChain[0] - expect(primary.model).toBe("gemini-3-flash") - expect(primary.providers[0]).toBe("google") + expect(primary.model).toBe("kimi-k2.5-free") + expect(primary.providers[0]).toBe("opencode") const second = writing.fallbackChain[1] - expect(second.model).toBe("claude-sonnet-4-6") - expect(second.providers[0]).toBe("anthropic") + expect(second.model).toBe("gemini-3-flash") + expect(second.providers[0]).toBe("google") + + const third = writing.fallbackChain[2] + expect(third.model).toBe("claude-sonnet-4-6") + expect(third.providers[0]).toBe("anthropic") }) test("all 8 categories have valid fallbackChain arrays", () => { @@ -489,12 +493,12 @@ describe("requiresModel field in categories", () => { expect(deep.requiresModel).toBe("gpt-5.3-codex") }) - test("artistry category has requiresModel set to gemini-3-pro", () => { + test("artistry category has requiresModel set to gemini-3.1-pro", () => { // given const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"] // when / #then - expect(artistry.requiresModel).toBe("gemini-3-pro") + expect(artistry.requiresModel).toBe("gemini-3.1-pro") }) }) diff --git a/src/shared/model-requirements.ts b/src/shared/model-requirements.ts index 9a795ba76..f8e197ea8 100644 --- a/src/shared/model-requirements.ts +++ b/src/shared/model-requirements.ts @@ -24,14 +24,15 @@ export const AGENT_MODEL_REQUIREMENTS: Record = { }, hephaestus: { fallbackChain: [ - { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, + { providers: ["openai", "venice", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, + { providers: ["github-copilot"], model: "gpt-5.2", variant: "medium" }, ], - requiresProvider: ["openai", "opencode"], + requiresProvider: ["openai", "github-copilot", "venice", "opencode"], }, oracle: { fallbackChain: [ { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, ], }, @@ -64,7 +65,7 @@ export const AGENT_MODEL_REQUIREMENTS: Record = { { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, { providers: ["opencode"], model: "kimi-k2.5-free" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" }, ], }, metis: { @@ -72,14 +73,14 @@ export const AGENT_MODEL_REQUIREMENTS: Record = { { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, ], }, momus: { fallbackChain: [ { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, ], }, atlas: { @@ -94,7 +95,7 @@ export const AGENT_MODEL_REQUIREMENTS: Record = { export const CATEGORY_MODEL_REQUIREMENTS: Record = { "visual-engineering": { fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["zai-coding-plan", "opencode"], model: "glm-5" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, ], @@ -102,7 +103,7 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record = { ultrabrain: { fallbackChain: [ { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, ], }, @@ -110,17 +111,17 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record = { fallbackChain: [ { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, ], requiresModel: "gpt-5.3-codex", }, artistry: { fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, ], - requiresModel: "gemini-3-pro", + requiresModel: "gemini-3.1-pro", }, quick: { fallbackChain: [ @@ -140,11 +141,12 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record = { fallbackChain: [ { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" }, ], }, writing: { fallbackChain: [ + { providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" }, ], diff --git a/src/shared/model-resolver.test.ts b/src/shared/model-resolver.test.ts index fc828f340..d18b46f5a 100644 --- a/src/shared/model-resolver.test.ts +++ b/src/shared/model-resolver.test.ts @@ -10,7 +10,7 @@ describe("resolveModel", () => { const input: ModelResolutionInput = { userModel: "anthropic/claude-opus-4-6", inheritedModel: "openai/gpt-5.2", - systemDefault: "google/gemini-3-pro", + systemDefault: "google/gemini-3.1-pro", } // when @@ -25,7 +25,7 @@ describe("resolveModel", () => { const input: ModelResolutionInput = { userModel: undefined, inheritedModel: "openai/gpt-5.2", - systemDefault: "google/gemini-3-pro", + systemDefault: "google/gemini-3.1-pro", } // when @@ -40,14 +40,14 @@ describe("resolveModel", () => { const input: ModelResolutionInput = { userModel: undefined, inheritedModel: undefined, - systemDefault: "google/gemini-3-pro", + systemDefault: "google/gemini-3.1-pro", } // when const result = resolveModel(input) // then - expect(result).toBe("google/gemini-3-pro") + expect(result).toBe("google/gemini-3.1-pro") }) }) @@ -57,7 +57,7 @@ describe("resolveModel", () => { const input: ModelResolutionInput = { userModel: "", inheritedModel: "openai/gpt-5.2", - systemDefault: "google/gemini-3-pro", + systemDefault: "google/gemini-3.1-pro", } // when @@ -72,14 +72,14 @@ describe("resolveModel", () => { const input: ModelResolutionInput = { userModel: " ", inheritedModel: "", - systemDefault: "google/gemini-3-pro", + systemDefault: "google/gemini-3.1-pro", } // when const result = resolveModel(input) // then - expect(result).toBe("google/gemini-3-pro") + expect(result).toBe("google/gemini-3.1-pro") }) }) @@ -89,7 +89,7 @@ describe("resolveModel", () => { const input: ModelResolutionInput = { userModel: "anthropic/claude-opus-4-6", inheritedModel: "openai/gpt-5.2", - systemDefault: "google/gemini-3-pro", + systemDefault: "google/gemini-3.1-pro", } // when @@ -123,7 +123,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic", "github-copilot"], model: "claude-opus-4-6" }, ], availableModels: new Set(["anthropic/claude-opus-4-6", "github-copilot/claude-opus-4-6-preview"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -141,7 +141,7 @@ describe("resolveModelWithFallback", () => { uiSelectedModel: "opencode/big-pickle", userModel: "anthropic/claude-opus-4-6", availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -158,7 +158,7 @@ describe("resolveModelWithFallback", () => { uiSelectedModel: " ", userModel: "anthropic/claude-opus-4-6", availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -175,7 +175,7 @@ describe("resolveModelWithFallback", () => { uiSelectedModel: "", userModel: "anthropic/claude-opus-4-6", availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -195,7 +195,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic", "github-copilot"], model: "claude-opus-4-6" }, ], availableModels: new Set(["anthropic/claude-opus-4-6", "github-copilot/claude-opus-4-6-preview"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -215,7 +215,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "claude-opus-4-6" }, ], availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -234,7 +234,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "claude-opus-4-6" }, ], availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -252,7 +252,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "claude-opus-4-6" }, ], availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -271,7 +271,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6" }, ], availableModels: new Set(["github-copilot/claude-opus-4-6-preview", "opencode/claude-opus-4-7"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -294,8 +294,8 @@ describe("resolveModelWithFallback", () => { fallbackChain: [ { providers: ["openai", "anthropic", "google"], model: "gpt-5.2" }, ], - availableModels: new Set(["openai/gpt-5.2", "anthropic/claude-opus-4-6", "google/gemini-3-pro"]), - systemDefaultModel: "google/gemini-3-pro", + availableModels: new Set(["openai/gpt-5.2", "anthropic/claude-opus-4-6", "google/gemini-3.1-pro"]), + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -313,7 +313,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic", "opencode"], model: "gpt-5-nano" }, ], availableModels: new Set(["opencode/gpt-5-nano"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -331,7 +331,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic", "github-copilot"], model: "claude-opus" }, ], availableModels: new Set(["anthropic/claude-opus-4-6", "github-copilot/claude-opus-4-6-preview"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -346,7 +346,7 @@ describe("resolveModelWithFallback", () => { // given const input: ExtendedModelResolutionInput = { availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -361,7 +361,7 @@ describe("resolveModelWithFallback", () => { const input: ExtendedModelResolutionInput = { fallbackChain: [], availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -378,7 +378,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "CLAUDE-OPUS" }, ], availableModels: new Set(["anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -397,7 +397,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "claude-sonnet-4-6" }, ], availableModels: new Set(["opencode/glm-5", "anthropic/claude-sonnet-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -420,7 +420,7 @@ describe("resolveModelWithFallback", () => { { providers: ["zai-coding-plan"], model: "glm-5" }, ], availableModels: new Set(["zai-coding-plan/glm-5", "opencode/glm-5"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -438,7 +438,7 @@ describe("resolveModelWithFallback", () => { { providers: ["zai-coding-plan"], model: "glm-5", variant: "high" }, ], availableModels: new Set(["opencode/glm-5"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -457,7 +457,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "claude-sonnet-4-6" }, ], availableModels: new Set(["anthropic/claude-sonnet-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -477,14 +477,14 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "nonexistent-model" }, ], availableModels: new Set(["openai/gpt-5.2", "anthropic/claude-opus-4-6"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when const result = resolveModelWithFallback(input) // then - expect(result!.model).toBe("google/gemini-3-pro") + expect(result!.model).toBe("google/gemini-3.1-pro") expect(result!.source).toBe("system-default") expect(logSpy).toHaveBeenCalledWith("No available model found in fallback chain, falling through to system default") }) @@ -516,7 +516,7 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic", "openai"], model: "claude-opus-4-6" }, ], availableModels: new Set(), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -533,7 +533,7 @@ describe("resolveModelWithFallback", () => { const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["github-copilot"]) const input: ExtendedModelResolutionInput = { fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" }, ], availableModels: new Set(), systemDefaultModel: "anthropic/claude-sonnet-4-6", @@ -544,7 +544,7 @@ describe("resolveModelWithFallback", () => { // then - should use github-copilot (second provider) since google not connected // model name is transformed to preview variant for github-copilot provider - expect(result!.model).toBe("github-copilot/gemini-3-pro-preview") + expect(result!.model).toBe("github-copilot/gemini-3.1-pro-preview") expect(result!.source).toBe("provider-fallback") cacheSpy.mockRestore() }) @@ -577,14 +577,14 @@ describe("resolveModelWithFallback", () => { { providers: ["anthropic"], model: "claude-opus-4-6" }, ], availableModels: new Set(), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when const result = resolveModelWithFallback(input) // then - should fall through to system default - expect(result!.model).toBe("google/gemini-3-pro") + expect(result!.model).toBe("google/gemini-3.1-pro") expect(result!.source).toBe("system-default") cacheSpy.mockRestore() }) @@ -593,14 +593,14 @@ describe("resolveModelWithFallback", () => { // given const input: ExtendedModelResolutionInput = { availableModels: new Set(["openai/gpt-5.2"]), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when const result = resolveModelWithFallback(input) // then - expect(result!.model).toBe("google/gemini-3-pro") + expect(result!.model).toBe("google/gemini-3.1-pro") expect(result!.source).toBe("system-default") }) }) @@ -627,20 +627,20 @@ describe("resolveModelWithFallback", () => { test("tries all providers in first entry before moving to second entry", () => { // given - const availableModels = new Set(["google/gemini-3-pro"]) + const availableModels = new Set(["google/gemini-3.1-pro"]) // when const result = resolveModelWithFallback({ fallbackChain: [ { providers: ["openai", "anthropic"], model: "gpt-5.2" }, - { providers: ["google"], model: "gemini-3-pro" }, + { providers: ["google"], model: "gemini-3.1-pro" }, ], availableModels, systemDefaultModel: "system/default", }) // then - expect(result!.model).toBe("google/gemini-3-pro") + expect(result!.model).toBe("google/gemini-3.1-pro") expect(result!.source).toBe("provider-fallback") }) @@ -675,7 +675,7 @@ describe("resolveModelWithFallback", () => { fallbackChain: [ { providers: ["openai"], model: "gpt-5.2" }, { providers: ["anthropic"], model: "claude-opus-4-6" }, - { providers: ["google"], model: "gemini-3-pro" }, + { providers: ["google"], model: "gemini-3.1-pro" }, ], availableModels, systemDefaultModel: "system/default", @@ -693,7 +693,7 @@ describe("resolveModelWithFallback", () => { const input: ExtendedModelResolutionInput = { userModel: "anthropic/claude-opus-4-6", availableModels: new Set(), - systemDefaultModel: "google/gemini-3-pro", + systemDefaultModel: "google/gemini-3.1-pro", } // when @@ -708,32 +708,32 @@ describe("resolveModelWithFallback", () => { describe("categoryDefaultModel (fuzzy matching for category defaults)", () => { test("applies fuzzy matching to categoryDefaultModel when userModel not provided", () => { - // given - gemini-3-pro is the category default, but only gemini-3-pro-preview is available + // given - gemini-3.1-pro is the category default, but only gemini-3.1-pro-preview is available const input: ExtendedModelResolutionInput = { - categoryDefaultModel: "google/gemini-3-pro", + categoryDefaultModel: "google/gemini-3.1-pro", fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" }, ], - availableModels: new Set(["google/gemini-3-pro-preview", "anthropic/claude-opus-4-6"]), + availableModels: new Set(["google/gemini-3.1-pro-preview", "anthropic/claude-opus-4-6"]), systemDefaultModel: "anthropic/claude-sonnet-4-6", } // when const result = resolveModelWithFallback(input) - // then - should fuzzy match gemini-3-pro → gemini-3-pro-preview - expect(result!.model).toBe("google/gemini-3-pro-preview") + // then - should fuzzy match gemini-3.1-pro → gemini-3.1-pro-preview + expect(result!.model).toBe("google/gemini-3.1-pro-preview") expect(result!.source).toBe("category-default") }) test("categoryDefaultModel uses exact match when available", () => { // given - exact match exists const input: ExtendedModelResolutionInput = { - categoryDefaultModel: "google/gemini-3-pro", + categoryDefaultModel: "google/gemini-3.1-pro", fallbackChain: [ - { providers: ["google"], model: "gemini-3-pro" }, + { providers: ["google"], model: "gemini-3.1-pro" }, ], - availableModels: new Set(["google/gemini-3-pro", "google/gemini-3-pro-preview"]), + availableModels: new Set(["google/gemini-3.1-pro", "google/gemini-3.1-pro-preview"]), systemDefaultModel: "anthropic/claude-sonnet-4-6", } @@ -741,14 +741,14 @@ describe("resolveModelWithFallback", () => { const result = resolveModelWithFallback(input) // then - should use exact match - expect(result!.model).toBe("google/gemini-3-pro") + expect(result!.model).toBe("google/gemini-3.1-pro") expect(result!.source).toBe("category-default") }) test("categoryDefaultModel falls through to fallbackChain when no match in availableModels", () => { // given - categoryDefaultModel has no match, but fallbackChain does const input: ExtendedModelResolutionInput = { - categoryDefaultModel: "google/gemini-3-pro", + categoryDefaultModel: "google/gemini-3.1-pro", fallbackChain: [ { providers: ["anthropic"], model: "claude-opus-4-6" }, ], @@ -768,11 +768,11 @@ describe("resolveModelWithFallback", () => { // given - both userModel and categoryDefaultModel provided const input: ExtendedModelResolutionInput = { userModel: "anthropic/claude-opus-4-6", - categoryDefaultModel: "google/gemini-3-pro", + categoryDefaultModel: "google/gemini-3.1-pro", fallbackChain: [ - { providers: ["google"], model: "gemini-3-pro" }, + { providers: ["google"], model: "gemini-3.1-pro" }, ], - availableModels: new Set(["google/gemini-3-pro-preview", "anthropic/claude-opus-4-6"]), + availableModels: new Set(["google/gemini-3.1-pro-preview", "anthropic/claude-opus-4-6"]), systemDefaultModel: "system/default", } @@ -788,7 +788,7 @@ describe("resolveModelWithFallback", () => { // given - no availableModels but connected provider cache exists const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"]) const input: ExtendedModelResolutionInput = { - categoryDefaultModel: "google/gemini-3-pro", + categoryDefaultModel: "google/gemini-3.1-pro", availableModels: new Set(), systemDefaultModel: "anthropic/claude-sonnet-4-6", } @@ -797,7 +797,7 @@ describe("resolveModelWithFallback", () => { const result = resolveModelWithFallback(input) // then - should use transformed categoryDefaultModel since google is connected - expect(result!.model).toBe("google/gemini-3-pro-preview") + expect(result!.model).toBe("google/gemini-3.1-pro-preview") expect(result!.source).toBe("category-default") cacheSpy.mockRestore() }) @@ -824,7 +824,7 @@ describe("resolveModelWithFallback", () => { // given - category default already has -preview suffix const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"]) const input: ExtendedModelResolutionInput = { - categoryDefaultModel: "google/gemini-3-pro-preview", + categoryDefaultModel: "google/gemini-3.1-pro-preview", availableModels: new Set(), systemDefaultModel: "anthropic/claude-sonnet-4-5", } @@ -832,18 +832,18 @@ describe("resolveModelWithFallback", () => { // when const result = resolveModelWithFallback(input) - // then - should NOT become gemini-3-pro-preview-preview - expect(result!.model).toBe("google/gemini-3-pro-preview") + // then - should NOT become gemini-3.1-pro-preview-preview + expect(result!.model).toBe("google/gemini-3.1-pro-preview") expect(result!.source).toBe("category-default") cacheSpy.mockRestore() }) - test("transforms gemini-3-pro in fallback chain for google connected provider", () => { - // given - google connected, fallback chain has gemini-3-pro + test("transforms gemini-3.1-pro in fallback chain for google connected provider", () => { + // given - google connected, fallback chain has gemini-3.1-pro const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"]) const input: ExtendedModelResolutionInput = { fallbackChain: [ - { providers: ["google", "github-copilot"], model: "gemini-3-pro" }, + { providers: ["google", "github-copilot"], model: "gemini-3.1-pro" }, ], availableModels: new Set(), systemDefaultModel: "anthropic/claude-sonnet-4-5", @@ -853,7 +853,7 @@ describe("resolveModelWithFallback", () => { const result = resolveModelWithFallback(input) // then - should transform to preview variant for google provider - expect(result!.model).toBe("google/gemini-3-pro-preview") + expect(result!.model).toBe("google/gemini-3.1-pro-preview") expect(result!.source).toBe("provider-fallback") cacheSpy.mockRestore() }) diff --git a/src/shared/model-suggestion-retry.test.ts b/src/shared/model-suggestion-retry.test.ts index 52edba3aa..9732367de 100644 --- a/src/shared/model-suggestion-retry.test.ts +++ b/src/shared/model-suggestion-retry.test.ts @@ -399,6 +399,43 @@ describe("promptSyncWithModelSuggestionRetry", () => { expect(promptAsyncMock).toHaveBeenCalledTimes(0) }) + it("should abort and throw timeout error when sync prompt hangs", async () => { + // given a client where sync prompt never resolves unless aborted + let receivedSignal: AbortSignal | undefined + const promptMock = mock((input: { signal?: AbortSignal }) => { + receivedSignal = input.signal + return new Promise((_, reject) => { + const signal = input.signal + if (!signal) { + return + } + signal.addEventListener("abort", () => { + reject(signal.reason) + }) + }) + }) + const client = { + session: { + prompt: promptMock, + promptAsync: mock(() => Promise.resolve()), + }, + } + + // when calling with short timeout + // then should abort the request and throw timeout error + await expect( + promptSyncWithModelSuggestionRetry(client as any, { + path: { id: "session-1" }, + body: { + parts: [{ type: "text", text: "hello" }], + model: { providerID: "anthropic", modelID: "claude-sonnet-4" }, + }, + }, { timeoutMs: 1 }) + ).rejects.toThrow("prompt timed out after 1ms") + + expect(receivedSignal?.aborted).toBe(true) + }) + it("should retry with suggested model on ProviderModelNotFoundError", async () => { // given a client that fails first with model-not-found, then succeeds const promptMock = mock() diff --git a/src/shared/model-suggestion-retry.ts b/src/shared/model-suggestion-retry.ts index 6a34deacb..0ff9ca86e 100644 --- a/src/shared/model-suggestion-retry.ts +++ b/src/shared/model-suggestion-retry.ts @@ -1,5 +1,10 @@ import type { createOpencodeClient } from "@opencode-ai/sdk" import { log } from "./logger" +import { + createPromptTimeoutContext, + PROMPT_TIMEOUT_MS, + type PromptRetryOptions, +} from "./prompt-timeout-context" type Client = ReturnType @@ -77,30 +82,36 @@ interface PromptBody { interface PromptArgs { path: { id: string } body: PromptBody + signal?: AbortSignal [key: string]: unknown } export async function promptWithModelSuggestionRetry( client: Client, args: PromptArgs, + options: PromptRetryOptions = {}, ): Promise { + const timeoutMs = options.timeoutMs ?? PROMPT_TIMEOUT_MS + const timeoutContext = createPromptTimeoutContext(args, timeoutMs) // NOTE: Model suggestion retry removed — promptAsync returns 204 immediately, // model errors happen asynchronously server-side and cannot be caught here - const promptPromise = client.session.promptAsync( - args as Parameters[0], - ) - - let timeoutID: ReturnType | null = null - const timeoutPromise = new Promise((_, reject) => { - timeoutID = setTimeout(() => { - reject(new Error("promptAsync timed out after 120000ms")) - }, 120000) - }) + const promptPromise = client.session.promptAsync({ + ...args, + signal: timeoutContext.signal, + } as Parameters[0]) try { - await Promise.race([promptPromise, timeoutPromise]) + await promptPromise + if (timeoutContext.wasTimedOut()) { + throw new Error(`promptAsync timed out after ${timeoutMs}ms`) + } + } catch (error) { + if (timeoutContext.wasTimedOut()) { + throw new Error(`promptAsync timed out after ${timeoutMs}ms`) + } + throw error } finally { - if (timeoutID !== null) clearTimeout(timeoutID) + timeoutContext.cleanup() } } @@ -116,9 +127,28 @@ export async function promptWithModelSuggestionRetry( export async function promptSyncWithModelSuggestionRetry( client: Client, args: PromptArgs, + options: PromptRetryOptions = {}, ): Promise { + const timeoutMs = options.timeoutMs ?? PROMPT_TIMEOUT_MS + try { - await client.session.prompt(args as Parameters[0]) + const timeoutContext = createPromptTimeoutContext(args, timeoutMs) + try { + await client.session.prompt({ + ...args, + signal: timeoutContext.signal, + } as Parameters[0]) + if (timeoutContext.wasTimedOut()) { + throw new Error(`prompt timed out after ${timeoutMs}ms`) + } + } catch (error) { + if (timeoutContext.wasTimedOut()) { + throw new Error(`prompt timed out after ${timeoutMs}ms`) + } + throw error + } finally { + timeoutContext.cleanup() + } } catch (error) { const suggestion = parseModelSuggestion(error) if (!suggestion || !args.body.model) { @@ -130,7 +160,7 @@ export async function promptSyncWithModelSuggestionRetry( suggested: suggestion.suggestion, }) - await client.session.prompt({ + const retryArgs: PromptArgs = { ...args, body: { ...args.body, @@ -139,6 +169,24 @@ export async function promptSyncWithModelSuggestionRetry( modelID: suggestion.suggestion, }, }, - } as Parameters[0]) + } + + const timeoutContext = createPromptTimeoutContext(retryArgs, timeoutMs) + try { + await client.session.prompt({ + ...retryArgs, + signal: timeoutContext.signal, + } as Parameters[0]) + if (timeoutContext.wasTimedOut()) { + throw new Error(`prompt timed out after ${timeoutMs}ms`) + } + } catch (retryError) { + if (timeoutContext.wasTimedOut()) { + throw new Error(`prompt timed out after ${timeoutMs}ms`) + } + throw retryError + } finally { + timeoutContext.cleanup() + } } } diff --git a/src/shared/prompt-timeout-context.ts b/src/shared/prompt-timeout-context.ts new file mode 100644 index 000000000..99f081278 --- /dev/null +++ b/src/shared/prompt-timeout-context.ts @@ -0,0 +1,49 @@ +export interface PromptTimeoutArgs { + signal?: AbortSignal +} + +export interface PromptRetryOptions { + timeoutMs?: number +} + +export const PROMPT_TIMEOUT_MS = 120000 + +export function createPromptTimeoutContext(args: PromptTimeoutArgs, timeoutMs: number): { + signal: AbortSignal + wasTimedOut: () => boolean + cleanup: () => void +} { + const timeoutController = new AbortController() + let timeoutID: ReturnType | null = null + let timedOut = false + + const abortOnUpstreamSignal = (): void => { + timeoutController.abort(args.signal?.reason) + } + + if (args.signal) { + if (args.signal.aborted) { + timeoutController.abort(args.signal.reason) + } else { + args.signal.addEventListener("abort", abortOnUpstreamSignal, { once: true }) + } + } + + timeoutID = setTimeout(() => { + timedOut = true + timeoutController.abort(new Error(`prompt timed out after ${timeoutMs}ms`)) + }, timeoutMs) + + return { + signal: timeoutController.signal, + wasTimedOut: () => timedOut, + cleanup: () => { + if (timeoutID !== null) { + clearTimeout(timeoutID) + } + if (args.signal) { + args.signal.removeEventListener("abort", abortOnUpstreamSignal) + } + }, + } +} diff --git a/src/shared/provider-model-id-transform.ts b/src/shared/provider-model-id-transform.ts index 5b8c810bb..0cf8eb801 100644 --- a/src/shared/provider-model-id-transform.ts +++ b/src/shared/provider-model-id-transform.ts @@ -6,12 +6,12 @@ export function transformModelForProvider(provider: string, model: string): stri .replace("claude-sonnet-4-5", "claude-sonnet-4.5") .replace("claude-haiku-4-5", "claude-haiku-4.5") .replace("claude-sonnet-4", "claude-sonnet-4") - .replace(/gemini-3-pro(?!-)/g, "gemini-3-pro-preview") + .replace(/gemini-3\.1-pro(?!-)/g, "gemini-3.1-pro-preview") .replace(/gemini-3-flash(?!-)/g, "gemini-3-flash-preview") } if (provider === "google") { return model - .replace(/gemini-3-pro(?!-)/g, "gemini-3-pro-preview") + .replace(/gemini-3\.1-pro(?!-)/g, "gemini-3.1-pro-preview") .replace(/gemini-3-flash(?!-)/g, "gemini-3-flash-preview") } return model diff --git a/src/shared/spawn-with-windows-hide.ts b/src/shared/spawn-with-windows-hide.ts new file mode 100644 index 000000000..7da9ed086 --- /dev/null +++ b/src/shared/spawn-with-windows-hide.ts @@ -0,0 +1,84 @@ +import { spawn as bunSpawn } from "bun" +import { spawn as nodeSpawn, type ChildProcess } from "node:child_process" +import { Readable } from "node:stream" + +export interface SpawnOptions { + cwd?: string + env?: Record + stdin?: "pipe" | "inherit" | "ignore" + stdout?: "pipe" | "inherit" | "ignore" + stderr?: "pipe" | "inherit" | "ignore" +} + +export interface SpawnedProcess { + readonly exitCode: number | null + readonly exited: Promise + readonly stdout: ReadableStream | undefined + readonly stderr: ReadableStream | undefined + kill(signal?: NodeJS.Signals): void +} + +function toReadableStream(stream: NodeJS.ReadableStream | null): ReadableStream | undefined { + if (!stream) { + return undefined + } + + return Readable.toWeb(stream as Readable) as ReadableStream +} + +function wrapNodeProcess(proc: ChildProcess): SpawnedProcess { + let resolveExited: (exitCode: number) => void + let exitCode: number | null = null + + const exited = new Promise((resolve) => { + resolveExited = resolve + }) + + proc.on("exit", (code) => { + exitCode = code ?? 1 + resolveExited(exitCode) + }) + + proc.on("error", () => { + if (exitCode === null) { + exitCode = 1 + resolveExited(1) + } + }) + + return { + get exitCode() { + return exitCode + }, + exited, + stdout: toReadableStream(proc.stdout), + stderr: toReadableStream(proc.stderr), + kill(signal?: NodeJS.Signals): void { + try { + if (!signal) { + proc.kill() + return + } + + proc.kill(signal) + } catch {} + }, + } +} + +export function spawnWithWindowsHide(command: string[], options: SpawnOptions): SpawnedProcess { + if (process.platform !== "win32") { + return bunSpawn(command, options) + } + + const [cmd, ...args] = command + const proc = nodeSpawn(cmd, args, { + cwd: options.cwd, + env: options.env, + stdio: [options.stdin ?? "pipe", options.stdout ?? "pipe", options.stderr ?? "pipe"], + windowsHide: true, + shell: true, + }) + + return wrapNodeProcess(proc) +} diff --git a/src/tools/background-task/create-background-output.blocking.test.ts b/src/tools/background-task/create-background-output.blocking.test.ts new file mode 100644 index 000000000..82de143e9 --- /dev/null +++ b/src/tools/background-task/create-background-output.blocking.test.ts @@ -0,0 +1,112 @@ +/// + +import { describe, expect, test } from "bun:test" +import type { ToolContext } from "@opencode-ai/plugin/tool" +import type { BackgroundTask } from "../../features/background-agent" +import type { BackgroundOutputClient, BackgroundOutputManager } from "./clients" +import { createBackgroundOutput } from "./create-background-output" + +const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode" + +const mockContext = { + sessionID: "test-session", + messageID: "test-message", + agent: "test-agent", + directory: projectDir, + worktree: projectDir, + abort: new AbortController().signal, + metadata: () => {}, + ask: async () => {}, +} as unknown as ToolContext + +function createTask(overrides: Partial = {}): BackgroundTask { + return { + id: "task-1", + sessionID: "ses-1", + parentSessionID: "main-1", + parentMessageID: "msg-1", + description: "background task", + prompt: "do work", + agent: "test-agent", + status: "running", + ...overrides, + } +} + +function createMockClient(): BackgroundOutputClient { + return { + session: { + messages: async () => ({ data: [] }), + }, + } +} + +describe("createBackgroundOutput block=true polling", () => { + test("returns terminal error output when task fails during blocking wait", async () => { + // #given + let pollCount = 0 + const task = createTask({ status: "running" }) + const manager: BackgroundOutputManager = { + getTask: (id: string) => { + if (id !== task.id) return undefined + + pollCount += 1 + if (pollCount >= 2) { + task.status = "error" + task.error = "task failed" + } + + return task + }, + } + + const tool = createBackgroundOutput(manager, createMockClient()) + + // #when + const output = await tool.execute( + { + task_id: task.id, + block: true, + timeout: 3000, + full_session: false, + }, + mockContext + ) + + // #then + expect(pollCount).toBeGreaterThanOrEqual(2) + expect(output).toContain("Status | **error**") + expect(output).not.toContain("Timed out waiting") + }) + + test("returns latest output with timeout note when task stays running", async () => { + // #given + let pollCount = 0 + const task = createTask({ status: "running" }) + const manager: BackgroundOutputManager = { + getTask: (id: string) => { + if (id !== task.id) return undefined + pollCount += 1 + return task + }, + } + + const tool = createBackgroundOutput(manager, createMockClient()) + + // #when + const output = await tool.execute( + { + task_id: task.id, + block: true, + timeout: 10, + }, + mockContext + ) + + // #then + expect(pollCount).toBeGreaterThanOrEqual(2) + expect(output).toContain("# Full Session Output") + expect(output).toContain("Timed out waiting") + expect(output).toContain("still running") + }) +}) diff --git a/src/tools/background-task/create-background-output.ts b/src/tools/background-task/create-background-output.ts index 78593a884..e12cfa9aa 100644 --- a/src/tools/background-task/create-background-output.ts +++ b/src/tools/background-task/create-background-output.ts @@ -33,6 +33,14 @@ function formatResolvedTitle(task: BackgroundTask): string { return `${label} - ${task.description}` } +function isTaskActiveStatus(status: BackgroundTask["status"]): boolean { + return status === "pending" || status === "running" +} + +function appendTimeoutNote(output: string, timeoutMs: number): string { + return `${output}\n\n> **Timed out waiting** after ${timeoutMs}ms. Task is still running; showing latest available output.` +} + export function createBackgroundOutput(manager: BackgroundOutputManager, client: BackgroundOutputClient): ToolDefinition { return tool({ description: BACKGROUND_OUTPUT_DESCRIPTION, @@ -83,7 +91,9 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client: let resolvedTask = task - if (shouldBlock && (task.status === "pending" || task.status === "running")) { + let didTimeoutWhileActive = false + + if (shouldBlock && isTaskActiveStatus(task.status)) { const startTime = Date.now() while (Date.now() - startTime < timeoutMs) { await delay(1000) @@ -93,30 +103,39 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client: return `Task was deleted: ${args.task_id}` } - if (currentTask.status !== "pending" && currentTask.status !== "running") { - resolvedTask = currentTask + resolvedTask = currentTask + + if (!isTaskActiveStatus(currentTask.status)) { break } } - const finalCheck = manager.getTask(args.task_id) - if (finalCheck) { - resolvedTask = finalCheck + if (isTaskActiveStatus(resolvedTask.status)) { + const finalCheck = manager.getTask(args.task_id) + if (finalCheck) { + resolvedTask = finalCheck + } + } + + if (isTaskActiveStatus(resolvedTask.status)) { + didTimeoutWhileActive = true } } - const isActive = resolvedTask.status === "pending" || resolvedTask.status === "running" + const isActive = isTaskActiveStatus(resolvedTask.status) const includeThinking = isActive || (args.include_thinking ?? false) const includeToolResults = isActive || (args.include_tool_results ?? false) if (fullSession) { - return await formatFullSession(resolvedTask, client, { + const output = await formatFullSession(resolvedTask, client, { includeThinking, messageLimit: args.message_limit, sinceMessageId: args.since_message_id, includeToolResults, thinkingMaxChars: args.thinking_max_chars, }) + + return didTimeoutWhileActive ? appendTimeoutNote(output, timeoutMs) : output } if (resolvedTask.status === "completed") { @@ -127,7 +146,8 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client: return formatTaskStatus(resolvedTask) } - return formatTaskStatus(resolvedTask) + const statusOutput = formatTaskStatus(resolvedTask) + return didTimeoutWhileActive ? appendTimeoutNote(statusOutput, timeoutMs) : statusOutput } catch (error) { return `Error getting output: ${error instanceof Error ? error.message : String(error)}` } diff --git a/src/tools/delegate-task/category-resolver.ts b/src/tools/delegate-task/category-resolver.ts index a2f5bbd36..bc516dce7 100644 --- a/src/tools/delegate-task/category-resolver.ts +++ b/src/tools/delegate-task/category-resolver.ts @@ -14,6 +14,7 @@ export interface CategoryResolutionResult { agentToUse: string categoryModel: { providerID: string; modelID: string; variant?: string } | undefined categoryPromptAppend: string | undefined + maxPromptTokens?: number modelInfo: ModelFallbackInfo | undefined actualModel: string | undefined isUnstableAgent: boolean @@ -51,6 +52,7 @@ export async function resolveCategoryExecution( agentToUse: "", categoryModel: undefined, categoryPromptAppend: undefined, + maxPromptTokens: undefined, modelInfo: undefined, actualModel: undefined, isUnstableAgent: false, @@ -68,6 +70,7 @@ Available categories: ${allCategoryNames}`, agentToUse: "", categoryModel: undefined, categoryPromptAppend: undefined, + maxPromptTokens: undefined, modelInfo: undefined, actualModel: undefined, isUnstableAgent: false, @@ -111,6 +114,7 @@ Available categories: ${allCategoryNames}`, agentToUse: "", categoryModel: undefined, categoryPromptAppend: undefined, + maxPromptTokens: undefined, modelInfo: undefined, actualModel: undefined, isUnstableAgent: false, @@ -154,6 +158,7 @@ Available categories: ${allCategoryNames}`, agentToUse: "", categoryModel: undefined, categoryPromptAppend: undefined, + maxPromptTokens: undefined, modelInfo: undefined, actualModel: undefined, isUnstableAgent: false, @@ -177,6 +182,7 @@ Available categories: ${categoryNames.join(", ")}`, agentToUse: SISYPHUS_JUNIOR_AGENT, categoryModel, categoryPromptAppend, + maxPromptTokens: resolved.config.max_prompt_tokens, modelInfo, actualModel, isUnstableAgent, diff --git a/src/tools/delegate-task/constants.ts b/src/tools/delegate-task/constants.ts index 146838a42..ecf37135f 100644 --- a/src/tools/delegate-task/constants.ts +++ b/src/tools/delegate-task/constants.ts @@ -208,10 +208,10 @@ You are NOT an interactive assistant. You are an autonomous problem-solver. export const DEFAULT_CATEGORIES: Record = { - "visual-engineering": { model: "google/gemini-3-pro", variant: "high" }, + "visual-engineering": { model: "google/gemini-3.1-pro", variant: "high" }, ultrabrain: { model: "openai/gpt-5.3-codex", variant: "xhigh" }, deep: { model: "openai/gpt-5.3-codex", variant: "medium" }, - artistry: { model: "google/gemini-3-pro", variant: "high" }, + artistry: { model: "google/gemini-3.1-pro", variant: "high" }, quick: { model: "anthropic/claude-haiku-4-5" }, "unspecified-low": { model: "anthropic/claude-sonnet-4-6" }, "unspecified-high": { model: "anthropic/claude-opus-4-6", variant: "max" }, diff --git a/src/tools/delegate-task/prompt-builder.ts b/src/tools/delegate-task/prompt-builder.ts index 51d32366a..8230fed78 100644 --- a/src/tools/delegate-task/prompt-builder.ts +++ b/src/tools/delegate-task/prompt-builder.ts @@ -1,5 +1,21 @@ import type { BuildSystemContentInput } from "./types" import { buildPlanAgentSystemPrepend, isPlanAgent } from "./constants" +import { buildSystemContentWithTokenLimit } from "./token-limiter" + +const FREE_OR_LOCAL_PROMPT_TOKEN_LIMIT = 24000 + +function usesFreeOrLocalModel(model: { providerID: string; modelID: string; variant?: string } | undefined): boolean { + if (!model) { + return false + } + + const provider = model.providerID.toLowerCase() + const modelId = model.modelID.toLowerCase() + return provider.includes("local") + || provider === "ollama" + || provider === "lmstudio" + || modelId.includes("free") +} /** * Build the system content to inject into the agent prompt. @@ -8,7 +24,11 @@ import { buildPlanAgentSystemPrepend, isPlanAgent } from "./constants" export function buildSystemContent(input: BuildSystemContentInput): string | undefined { const { skillContent, + skillContents, categoryPromptAppend, + agentsContext, + maxPromptTokens, + model, agentName, availableCategories, availableSkills, @@ -18,23 +38,17 @@ export function buildSystemContent(input: BuildSystemContentInput): string | und ? buildPlanAgentSystemPrepend(availableCategories, availableSkills) : "" - if (!skillContent && !categoryPromptAppend && !planAgentPrepend) { - return undefined - } + const effectiveMaxPromptTokens = maxPromptTokens + ?? (usesFreeOrLocalModel(model) ? FREE_OR_LOCAL_PROMPT_TOKEN_LIMIT : undefined) - const parts: string[] = [] - - if (planAgentPrepend) { - parts.push(planAgentPrepend) - } - - if (skillContent) { - parts.push(skillContent) - } - - if (categoryPromptAppend) { - parts.push(categoryPromptAppend) - } - - return parts.join("\n\n") || undefined + return buildSystemContentWithTokenLimit( + { + skillContent, + skillContents, + categoryPromptAppend, + agentsContext: agentsContext ?? planAgentPrepend, + planAgentPrepend, + }, + effectiveMaxPromptTokens + ) } diff --git a/src/tools/delegate-task/skill-resolver.ts b/src/tools/delegate-task/skill-resolver.ts index bfd58e17b..e3bb89a50 100644 --- a/src/tools/delegate-task/skill-resolver.ts +++ b/src/tools/delegate-task/skill-resolver.ts @@ -5,17 +5,18 @@ import { discoverSkills } from "../../features/opencode-skill-loader" export async function resolveSkillContent( skills: string[], options: { gitMasterConfig?: GitMasterConfig; browserProvider?: BrowserAutomationProvider, disabledSkills?: Set, directory?: string } -): Promise<{ content: string | undefined; error: string | null }> { +): Promise<{ content: string | undefined; contents: string[]; error: string | null }> { if (skills.length === 0) { - return { content: undefined, error: null } + return { content: undefined, contents: [], error: null } } const { resolved, notFound } = await resolveMultipleSkillsAsync(skills, options) if (notFound.length > 0) { const allSkills = await discoverSkills({ includeClaudeCodePaths: true, directory: options?.directory }) const available = allSkills.map(s => s.name).join(", ") - return { content: undefined, error: `Skills not found: ${notFound.join(", ")}. Available: ${available}` } + return { content: undefined, contents: [], error: `Skills not found: ${notFound.join(", ")}. Available: ${available}` } } - return { content: Array.from(resolved.values()).join("\n\n"), error: null } + const contents = Array.from(resolved.values()) + return { content: contents.join("\n\n"), contents, error: null } } diff --git a/src/tools/delegate-task/token-limiter.test.ts b/src/tools/delegate-task/token-limiter.test.ts new file mode 100644 index 000000000..57ba081cd --- /dev/null +++ b/src/tools/delegate-task/token-limiter.test.ts @@ -0,0 +1,121 @@ +declare const require: (name: string) => unknown +const { describe, test, expect } = require("bun:test") as { + describe: (name: string, fn: () => void) => void + test: (name: string, fn: () => void) => void + expect: (value: unknown) => { + toBe: (expected: unknown) => void + toContain: (expected: string) => void + not: { + toContain: (expected: string) => void + } + toBeLessThanOrEqual: (expected: number) => void + toBeUndefined: () => void + } +} + +import { + buildSystemContentWithTokenLimit, + estimateTokenCount, + truncateToTokenBudget, +} from "./token-limiter" + +describe("token-limiter", () => { + test("estimateTokenCount uses 1 token per 4 chars approximation", () => { + // given + const text = "12345678" + + // when + const result = estimateTokenCount(text) + + // then + expect(result).toBe(2) + }) + + test("truncateToTokenBudget keeps text within requested token budget", () => { + // given + const content = "A".repeat(120) + const maxTokens = 10 + + // when + const result = truncateToTokenBudget(content, maxTokens) + + // then + expect(estimateTokenCount(result)).toBeLessThanOrEqual(maxTokens) + }) + + test("buildSystemContentWithTokenLimit returns undefined when there is no content", () => { + // given + const input = { + skillContent: undefined, + skillContents: [], + categoryPromptAppend: undefined, + agentsContext: undefined, + planAgentPrepend: "", + } + + // when + const result = buildSystemContentWithTokenLimit(input, 20) + + // then + expect(result).toBeUndefined() + }) + + test("buildSystemContentWithTokenLimit truncates skills before category and agents context", () => { + // given + const input = { + skillContents: [ + "SKILL_ALPHA:" + "a".repeat(180), + "SKILL_BETA:" + "b".repeat(180), + ], + categoryPromptAppend: "CATEGORY_APPEND:keep", + agentsContext: "AGENTS_CONTEXT:keep", + planAgentPrepend: "", + } + + // when + const result = buildSystemContentWithTokenLimit(input, 80) + + // then + expect(result).toContain("AGENTS_CONTEXT:keep") + expect(result).toContain("CATEGORY_APPEND:keep") + expect(result).toContain("SKILL_ALPHA:") + expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(80) + }) + + test("buildSystemContentWithTokenLimit truncates category after skills are exhausted", () => { + // given + const input = { + skillContents: ["SKILL_ALPHA:" + "a".repeat(220)], + categoryPromptAppend: "CATEGORY_APPEND:" + "c".repeat(220), + agentsContext: "AGENTS_CONTEXT:keep", + planAgentPrepend: "", + } + + // when + const result = buildSystemContentWithTokenLimit(input, 30) + + // then + expect(result).toContain("AGENTS_CONTEXT:keep") + expect(result).not.toContain("SKILL_ALPHA:" + "a".repeat(80)) + expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(30) + }) + + test("buildSystemContentWithTokenLimit truncates agents context last", () => { + // given + const input = { + skillContents: ["SKILL_ALPHA:" + "a".repeat(220)], + categoryPromptAppend: "CATEGORY_APPEND:" + "c".repeat(220), + agentsContext: "AGENTS_CONTEXT:" + "g".repeat(220), + planAgentPrepend: "", + } + + // when + const result = buildSystemContentWithTokenLimit(input, 10) + + // then + expect(result).toContain("AGENTS_CONTEXT:") + expect(result).not.toContain("SKILL_ALPHA:") + expect(result).not.toContain("CATEGORY_APPEND:") + expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(10) + }) +}) diff --git a/src/tools/delegate-task/token-limiter.ts b/src/tools/delegate-task/token-limiter.ts new file mode 100644 index 000000000..2ed6543c1 --- /dev/null +++ b/src/tools/delegate-task/token-limiter.ts @@ -0,0 +1,117 @@ +import type { BuildSystemContentInput } from "./types" + +const CHARACTERS_PER_TOKEN = 4 + +export function estimateTokenCount(text: string): number { + if (!text) { + return 0 + } + + return Math.ceil(text.length / CHARACTERS_PER_TOKEN) +} + +export function truncateToTokenBudget(content: string, maxTokens: number): string { + if (!content || maxTokens <= 0) { + return "" + } + + const maxCharacters = maxTokens * CHARACTERS_PER_TOKEN + if (content.length <= maxCharacters) { + return content + } + + return content.slice(0, maxCharacters) +} + +function joinSystemParts(parts: string[]): string | undefined { + const filtered = parts.filter((part) => part.trim().length > 0) + if (filtered.length === 0) { + return undefined + } + + return filtered.join("\n\n") +} + +function reduceSegmentToFitBudget(content: string, overflowTokens: number): string { + if (overflowTokens <= 0 || !content) { + return content + } + + const currentTokens = estimateTokenCount(content) + const nextBudget = Math.max(0, currentTokens - overflowTokens) + return truncateToTokenBudget(content, nextBudget) +} + +export function buildSystemContentWithTokenLimit( + input: BuildSystemContentInput, + maxTokens: number | undefined +): string | undefined { + const skillParts = input.skillContents?.length + ? [...input.skillContents] + : input.skillContent + ? [input.skillContent] + : [] + const categoryPromptAppend = input.categoryPromptAppend ?? "" + const agentsContext = input.agentsContext ?? input.planAgentPrepend ?? "" + + if (maxTokens === undefined) { + return joinSystemParts([agentsContext, ...skillParts, categoryPromptAppend]) + } + + let nextSkills = [...skillParts] + let nextCategoryPromptAppend = categoryPromptAppend + let nextAgentsContext = agentsContext + + const buildCurrentContent = (): string | undefined => + joinSystemParts([nextAgentsContext, ...nextSkills, nextCategoryPromptAppend]) + + let systemContent = buildCurrentContent() + if (!systemContent) { + return undefined + } + + let overflowTokens = estimateTokenCount(systemContent) - maxTokens + + if (overflowTokens > 0) { + for (let index = 0; index < nextSkills.length && overflowTokens > 0; index += 1) { + const skill = nextSkills[index] + const reducedSkill = reduceSegmentToFitBudget(skill, overflowTokens) + nextSkills[index] = reducedSkill + systemContent = buildCurrentContent() + if (!systemContent) { + return undefined + } + overflowTokens = estimateTokenCount(systemContent) - maxTokens + } + + nextSkills = nextSkills.filter((skill) => skill.trim().length > 0) + systemContent = buildCurrentContent() + if (!systemContent) { + return undefined + } + overflowTokens = estimateTokenCount(systemContent) - maxTokens + } + + if (overflowTokens > 0 && nextCategoryPromptAppend) { + nextCategoryPromptAppend = reduceSegmentToFitBudget(nextCategoryPromptAppend, overflowTokens) + systemContent = buildCurrentContent() + if (!systemContent) { + return undefined + } + overflowTokens = estimateTokenCount(systemContent) - maxTokens + } + + if (overflowTokens > 0 && nextAgentsContext) { + nextAgentsContext = reduceSegmentToFitBudget(nextAgentsContext, overflowTokens) + systemContent = buildCurrentContent() + if (!systemContent) { + return undefined + } + } + + if (!systemContent) { + return undefined + } + + return truncateToTokenBudget(systemContent, maxTokens) +} diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index b9b1274bc..8c0b01acf 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -17,7 +17,7 @@ const TEST_AVAILABLE_MODELS = new Set([ "anthropic/claude-opus-4-6", "anthropic/claude-sonnet-4-6", "anthropic/claude-haiku-4-5", - "google/gemini-3-pro", + "google/gemini-3.1-pro", "google/gemini-3-flash", "openai/gpt-5.2", "openai/gpt-5.3-codex", @@ -52,7 +52,7 @@ describe("sisyphus-task", () => { providerModelsSpy = spyOn(connectedProvidersCache, "readProviderModelsCache").mockReturnValue({ models: { anthropic: ["claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5"], - google: ["gemini-3-pro", "gemini-3-flash"], + google: ["gemini-3.1-pro", "gemini-3-flash"], openai: ["gpt-5.2", "gpt-5.3-codex"], }, connected: ["anthropic", "google", "openai"], @@ -73,7 +73,7 @@ describe("sisyphus-task", () => { // when / #then expect(category).toBeDefined() - expect(category.model).toBe("google/gemini-3-pro") + expect(category.model).toBe("google/gemini-3.1-pro") expect(category.variant).toBe("high") }) @@ -781,7 +781,7 @@ describe("sisyphus-task", () => { // then expect(result).not.toBeNull() - expect(result!.config.model).toBe("google/gemini-3-pro") + expect(result!.config.model).toBe("google/gemini-3.1-pro") expect(result!.promptAppend).toContain("VISUAL/UI") }) @@ -805,7 +805,7 @@ describe("sisyphus-task", () => { const categoryName = "visual-engineering" const userCategories = { "visual-engineering": { - model: "google/gemini-3-pro", + model: "google/gemini-3.1-pro", prompt_append: "Custom instructions here", }, } @@ -845,7 +845,7 @@ describe("sisyphus-task", () => { const categoryName = "visual-engineering" const userCategories = { "visual-engineering": { - model: "google/gemini-3-pro", + model: "google/gemini-3.1-pro", temperature: 0.3, }, } @@ -868,7 +868,7 @@ describe("sisyphus-task", () => { // then - category's built-in model wins over inheritedModel expect(result).not.toBeNull() - expect(result!.config.model).toBe("google/gemini-3-pro") + expect(result!.config.model).toBe("google/gemini-3.1-pro") }) test("systemDefaultModel is used as fallback when custom category has no model", () => { @@ -910,7 +910,7 @@ describe("sisyphus-task", () => { // then expect(result).not.toBeNull() - expect(result!.config.model).toBe("google/gemini-3-pro") + expect(result!.config.model).toBe("google/gemini-3.1-pro") }) }) @@ -1738,7 +1738,7 @@ describe("sisyphus-task", () => { const mockClient = { app: { agents: async () => ({ data: [] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - model: { list: async () => [{ provider: "google", id: "gemini-3-pro" }] }, + model: { list: async () => [{ provider: "google", id: "gemini-3.1-pro" }] }, session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "ses_unstable_gemini" } }), @@ -2001,7 +2001,7 @@ describe("sisyphus-task", () => { const mockClient = { app: { agents: async () => ({ data: [] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - model: { list: async () => [{ provider: "google", id: "gemini-3-pro" }] }, + model: { list: async () => [{ provider: "google", id: "gemini-3.1-pro" }] }, session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "ses_artistry_gemini" } }), @@ -2028,7 +2028,7 @@ describe("sisyphus-task", () => { abort: new AbortController().signal, } - // when - artistry category (gemini-3-pro with high variant) + // when - artistry category (gemini-3.1-pro with high variant) const result = await tool.execute( { description: "Test artistry forced background", @@ -3026,9 +3026,9 @@ describe("sisyphus-task", () => { // when resolveCategoryConfig is called const resolved = resolveCategoryConfig(categoryName, { userCategories, inheritedModel, systemDefaultModel: SYSTEM_DEFAULT_MODEL }) - // then should use category's built-in model (gemini-3-pro for visual-engineering) + // then should use category's built-in model (gemini-3.1-pro for visual-engineering) expect(resolved).not.toBeNull() - expect(resolved!.model).toBe("google/gemini-3-pro") + expect(resolved!.model).toBe("google/gemini-3.1-pro") }) test("systemDefaultModel is used when no other model is available", () => { @@ -3522,7 +3522,7 @@ describe("sisyphus-task", () => { ) // then - should resolve via AGENT_MODEL_REQUIREMENTS fallback chain for oracle - // oracle fallback chain: gpt-5.2 (openai) > gemini-3-pro (google) > claude-opus-4-6 (anthropic) + // oracle fallback chain: gpt-5.2 (openai) > gemini-3.1-pro (google) > claude-opus-4-6 (anthropic) // Since openai is in connectedProviders, should resolve to openai/gpt-5.2 expect(promptBody.model).toBeDefined() expect(promptBody.model.providerID).toBe("openai") diff --git a/src/tools/delegate-task/tools.ts b/src/tools/delegate-task/tools.ts index 0ab4c1baa..9b0915330 100644 --- a/src/tools/delegate-task/tools.ts +++ b/src/tools/delegate-task/tools.ts @@ -142,7 +142,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini const runInBackground = args.run_in_background === true - const { content: skillContent, error: skillError } = await resolveSkillContent(args.load_skills, { + const { content: skillContent, contents: skillContents, error: skillError } = await resolveSkillContent(args.load_skills, { gitMasterConfig: options.gitMasterConfig, browserProvider: options.browserProvider, disabledSkills: options.disabledSkills, @@ -184,6 +184,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini let actualModel: string | undefined let isUnstableAgent = false let fallbackChain: import("../../shared/model-requirements").FallbackEntry[] | undefined + let maxPromptTokens: number | undefined if (args.category) { const resolution = await resolveCategoryExecution(args, options, inheritedModel, systemDefaultModel) @@ -197,6 +198,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini actualModel = resolution.actualModel isUnstableAgent = resolution.isUnstableAgent fallbackChain = resolution.fallbackChain + maxPromptTokens = resolution.maxPromptTokens const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean @@ -213,8 +215,11 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini if (isUnstableAgent && isRunInBackgroundExplicitlyFalse) { const systemContent = buildSystemContent({ skillContent, + skillContents, categoryPromptAppend, agentName: agentToUse, + maxPromptTokens, + model: categoryModel, availableCategories, availableSkills, }) @@ -239,8 +244,11 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini const systemContent = buildSystemContent({ skillContent, + skillContents, categoryPromptAppend, agentName: agentToUse, + maxPromptTokens, + model: categoryModel, availableCategories, availableSkills, }) diff --git a/src/tools/delegate-task/types.ts b/src/tools/delegate-task/types.ts index 13d1973a4..7c749d208 100644 --- a/src/tools/delegate-task/types.ts +++ b/src/tools/delegate-task/types.ts @@ -72,7 +72,12 @@ export interface DelegateTaskToolOptions { export interface BuildSystemContentInput { skillContent?: string + skillContents?: string[] categoryPromptAppend?: string + agentsContext?: string + planAgentPrepend?: string + maxPromptTokens?: number + model?: { providerID: string; modelID: string; variant?: string } agentName?: string availableCategories?: AvailableCategory[] availableSkills?: AvailableSkill[] diff --git a/src/tools/hashline-edit/diff-utils.test.ts b/src/tools/hashline-edit/diff-utils.test.ts index c3373d995..c7d218728 100644 --- a/src/tools/hashline-edit/diff-utils.test.ts +++ b/src/tools/hashline-edit/diff-utils.test.ts @@ -41,6 +41,23 @@ describe("generateUnifiedDiff", () => { expect(diff).toContain(" line 13") }) + it("limits each hunk to three context lines", () => { + //#given + const oldContent = createNumberedLines(20) + const newLines = oldContent.split("\n") + newLines[9] = "line 10 updated" + const newContent = newLines.join("\n") + + //#when + const diff = generateUnifiedDiff(oldContent, newContent, "sample.txt") + + //#then + expect(diff).toContain(" line 7") + expect(diff).toContain(" line 13") + expect(diff).not.toContain(" line 6") + expect(diff).not.toContain(" line 14") + }) + it("returns a diff string for identical content", () => { //#given const oldContent = "alpha\nbeta\ngamma" diff --git a/src/tools/hashline-edit/diff-utils.ts b/src/tools/hashline-edit/diff-utils.ts index 975438d27..10c3dfc9d 100644 --- a/src/tools/hashline-edit/diff-utils.ts +++ b/src/tools/hashline-edit/diff-utils.ts @@ -16,7 +16,7 @@ export function toHashlineContent(content: string): string { } export function generateUnifiedDiff(oldContent: string, newContent: string, filePath: string): string { - return createTwoFilesPatch(filePath, filePath, oldContent, newContent) + return createTwoFilesPatch(filePath, filePath, oldContent, newContent, undefined, undefined, { context: 3 }) } export function countLineDiffs(oldContent: string, newContent: string): { additions: number; deletions: number } { diff --git a/src/tools/hashline-edit/edit-deduplication.ts b/src/tools/hashline-edit/edit-deduplication.ts index e689bb53a..8818b61ae 100644 --- a/src/tools/hashline-edit/edit-deduplication.ts +++ b/src/tools/hashline-edit/edit-deduplication.ts @@ -1,18 +1,24 @@ import type { HashlineEdit } from "./types" import { toNewLines } from "./edit-text-normalization" +import { normalizeLineRef } from "./validation" function normalizeEditPayload(payload: string | string[]): string { return toNewLines(payload).join("\n") } +function canonicalAnchor(anchor: string | undefined): string { + if (!anchor) return "" + return normalizeLineRef(anchor) +} + function buildDedupeKey(edit: HashlineEdit): string { switch (edit.op) { case "replace": - return `replace|${edit.pos}|${edit.end ?? ""}|${normalizeEditPayload(edit.lines)}` + return `replace|${canonicalAnchor(edit.pos)}|${edit.end ? canonicalAnchor(edit.end) : ""}|${normalizeEditPayload(edit.lines)}` case "append": - return `append|${edit.pos ?? ""}|${normalizeEditPayload(edit.lines)}` + return `append|${canonicalAnchor(edit.pos)}|${normalizeEditPayload(edit.lines)}` case "prepend": - return `prepend|${edit.pos ?? ""}|${normalizeEditPayload(edit.lines)}` + return `prepend|${canonicalAnchor(edit.pos)}|${normalizeEditPayload(edit.lines)}` default: return JSON.stringify(edit) } diff --git a/src/tools/hashline-edit/edit-operations.test.ts b/src/tools/hashline-edit/edit-operations.test.ts index 5d8ad08ba..40585210f 100644 --- a/src/tools/hashline-edit/edit-operations.test.ts +++ b/src/tools/hashline-edit/edit-operations.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "bun:test" -import { applyHashlineEdits } from "./edit-operations" +import { applyHashlineEdits, applyHashlineEditsWithReport } from "./edit-operations" import { applyAppend, applyInsertAfter, applyPrepend, applyReplaceLines, applySetLine } from "./edit-operation-primitives" import { computeLineHash } from "./hash-computation" import type { HashlineEdit } from "./types" @@ -389,3 +389,23 @@ describe("hashline edit operations", () => { expect(result).toEqual("replaced A\nline 3\nreplaced B") }) }) + +describe("dedupe anchor canonicalization", () => { + it("deduplicates edits with whitespace-variant anchors", () => { + //#given + const content = "line 1\nline 2" + const lines = content.split("\n") + const canonical = `1#${computeLineHash(1, lines[0])}` + const spaced = ` 1 # ${computeLineHash(1, lines[0])} ` + + //#when + const report = applyHashlineEditsWithReport(content, [ + { op: "append", pos: canonical, lines: ["inserted"] }, + { op: "append", pos: spaced, lines: ["inserted"] }, + ]) + + //#then + expect(report.deduplicatedEdits).toBe(1) + expect(report.content).toBe("line 1\ninserted\nline 2") + }) +}) diff --git a/src/tools/hashline-edit/hashline-edit-executor.ts b/src/tools/hashline-edit/hashline-edit-executor.ts index e20ebbf96..d316307db 100644 --- a/src/tools/hashline-edit/hashline-edit-executor.ts +++ b/src/tools/hashline-edit/hashline-edit-executor.ts @@ -33,7 +33,7 @@ function resolveToolCallID(ctx: ToolContextWithCallID): string | undefined { function canCreateFromMissingFile(edits: HashlineEdit[]): boolean { if (edits.length === 0) return false - return edits.every((edit) => edit.op === "append" || edit.op === "prepend") + return edits.every((edit) => (edit.op === "append" || edit.op === "prepend") && !edit.pos) } function buildSuccessMeta( @@ -86,19 +86,19 @@ export async function executeHashlineEditTool(args: HashlineEditArgs, context: T const filePath = args.filePath const { delete: deleteMode, rename } = args + if (deleteMode && rename) { + return "Error: delete and rename cannot be used together" + } + if (deleteMode && args.edits.length > 0) { + return "Error: delete mode requires edits to be an empty array" + } + if (!deleteMode && (!args.edits || !Array.isArray(args.edits) || args.edits.length === 0)) { return "Error: edits parameter must be a non-empty array" } const edits = deleteMode ? [] : normalizeHashlineEdits(args.edits) - if (deleteMode && rename) { - return "Error: delete and rename cannot be used together" - } - if (deleteMode && edits.length > 0) { - return "Error: delete mode requires edits to be an empty array" - } - const file = Bun.file(filePath) const exists = await file.exists() if (!exists && !deleteMode && !canCreateFromMissingFile(edits)) { diff --git a/src/tools/hashline-edit/tool-description.ts b/src/tools/hashline-edit/tool-description.ts index 0b0ee00fa..2d452ccfa 100644 --- a/src/tools/hashline-edit/tool-description.ts +++ b/src/tools/hashline-edit/tool-description.ts @@ -10,7 +10,7 @@ WORKFLOW: VALIDATION: Payload shape: { "filePath": string, "edits": [...], "delete"?: boolean, "rename"?: string } Each edit must be one of: replace, append, prepend - Edit shape: { "op": "replace"|"append"|"prepend", "pos"?: "LINE#ID", "end"?: "LINE#ID", "lines"?: string|string[]|null } + Edit shape: { "op": "replace"|"append"|"prepend", "pos"?: "LINE#ID", "end"?: "LINE#ID", "lines": string|string[]|null } lines must contain plain replacement text only (no LINE#ID prefixes, no diff + markers) CRITICAL: all operations validate against the same pre-edit file snapshot and apply bottom-up. Refs/tags are interpreted against the last-read version of the file. diff --git a/src/tools/hashline-edit/tools.test.ts b/src/tools/hashline-edit/tools.test.ts index cb76b834b..1158ca3d2 100644 --- a/src/tools/hashline-edit/tools.test.ts +++ b/src/tools/hashline-edit/tools.test.ts @@ -341,4 +341,81 @@ describe("createHashlineEditTool", () => { //#then expect(envelope.lineEnding).toBe("\r\n") }) + + it("rejects delete=true with non-empty edits before normalization", async () => { + //#given + const filePath = path.join(tempDir, "delete-reject.txt") + fs.writeFileSync(filePath, "line1") + + //#when + const result = await tool.execute( + { + filePath, + delete: true, + edits: [{ op: "replace", pos: "1#ZZ", lines: "bad" }], + }, + createMockContext(), + ) + + //#then + expect(result).toContain("delete mode requires edits to be an empty array") + expect(fs.existsSync(filePath)).toBe(true) + }) + + it("rejects delete=true combined with rename", async () => { + //#given + const filePath = path.join(tempDir, "delete-rename.txt") + fs.writeFileSync(filePath, "line1") + + //#when + const result = await tool.execute( + { + filePath, + delete: true, + rename: path.join(tempDir, "new-name.txt"), + edits: [], + }, + createMockContext(), + ) + + //#then + expect(result).toContain("delete and rename cannot be used together") + expect(fs.existsSync(filePath)).toBe(true) + }) + + it("rejects missing file creation with anchored append", async () => { + //#given + const filePath = path.join(tempDir, "nonexistent.txt") + + //#when + const result = await tool.execute( + { + filePath, + edits: [{ op: "append", pos: "1#ZZ", lines: ["bad"] }], + }, + createMockContext(), + ) + + //#then + expect(result).toContain("File not found") + }) + + it("allows missing file creation with unanchored append", async () => { + //#given + const filePath = path.join(tempDir, "newfile.txt") + + //#when + const result = await tool.execute( + { + filePath, + edits: [{ op: "append", lines: ["created"] }], + }, + createMockContext(), + ) + + //#then + expect(fs.existsSync(filePath)).toBe(true) + expect(fs.readFileSync(filePath, "utf-8")).toBe("created") + expect(result).toBe(`Updated ${filePath}`) + }) }) diff --git a/src/tools/hashline-edit/tools.ts b/src/tools/hashline-edit/tools.ts index 132650297..bd2bf1f90 100644 --- a/src/tools/hashline-edit/tools.ts +++ b/src/tools/hashline-edit/tools.ts @@ -31,7 +31,6 @@ export function createHashlineEditTool(): ToolDefinition { end: tool.schema.string().optional().describe("Range end anchor in LINE#ID format"), lines: tool.schema .union([tool.schema.string(), tool.schema.array(tool.schema.string()), tool.schema.null()]) - .optional() .describe("Replacement or inserted lines. null/[] deletes with replace"), }) ) diff --git a/src/tools/hashline-edit/validation.ts b/src/tools/hashline-edit/validation.ts index fc5b395a1..ed6061557 100644 --- a/src/tools/hashline-edit/validation.ts +++ b/src/tools/hashline-edit/validation.ts @@ -15,7 +15,7 @@ const MISMATCH_CONTEXT = 2 const LINE_REF_EXTRACT_PATTERN = /([0-9]+#[ZPMQVRWSNKTXJBYH]{2})/ -function normalizeLineRef(ref: string): string { +export function normalizeLineRef(ref: string): string { const originalTrimmed = ref.trim() let trimmed = originalTrimmed trimmed = trimmed.replace(/^(?:>>>|[+-])\s*/, "") diff --git a/src/tools/interactive-bash/tools.ts b/src/tools/interactive-bash/tools.ts index dac46bd60..a0795ee36 100644 --- a/src/tools/interactive-bash/tools.ts +++ b/src/tools/interactive-bash/tools.ts @@ -1,4 +1,5 @@ import { tool, type ToolDefinition } from "@opencode-ai/plugin/tool" +import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide" import { BLOCKED_TMUX_SUBCOMMANDS, DEFAULT_TIMEOUT_MS, INTERACTIVE_BASH_DESCRIPTION } from "./constants" import { getCachedTmuxPath } from "./tmux-path-resolver" @@ -89,7 +90,7 @@ tmux capture-pane -p -t ${sessionName} -S -1000 The Bash tool can execute these commands directly. Do NOT retry with interactive_bash.` } - const proc = Bun.spawn([tmuxPath, ...parts], { + const proc = spawnWithWindowsHide([tmuxPath, ...parts], { stdout: "pipe", stderr: "pipe", })