Compare commits
19 Commits
fix/agent-
...
v3.9.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ae54fd31f4 | ||
|
|
bdd86b1415 | ||
|
|
76cba9b222 | ||
|
|
2955dc868f | ||
|
|
3ab4b7f77b | ||
|
|
3540d1c550 | ||
|
|
9bc9dcaa18 | ||
|
|
f2a1412bf1 | ||
|
|
190c6991ac | ||
|
|
e17a00a906 | ||
|
|
c8aa1bbce4 | ||
|
|
911710e4d4 | ||
|
|
050b93bebb | ||
|
|
2ffa803b05 | ||
|
|
cf97494073 | ||
|
|
8fb5949ac6 | ||
|
|
04f50bac1f | ||
|
|
d1a0a66dde | ||
|
|
b1203b9501 |
@@ -24,19 +24,7 @@
|
||||
"disabled_agents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sisyphus",
|
||||
"hephaestus",
|
||||
"prometheus",
|
||||
"oracle",
|
||||
"librarian",
|
||||
"explore",
|
||||
"multimodal-looker",
|
||||
"metis",
|
||||
"momus",
|
||||
"atlas"
|
||||
]
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"disabled_skills": {
|
||||
|
||||
62
benchmarks/bun.lock
Normal file
62
benchmarks/bun.lock
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"configVersion": 1,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "hashline-edit-benchmark",
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.3.0",
|
||||
"@friendliai/ai-provider": "^1.0.9",
|
||||
"ai": "^6.0.94",
|
||||
"zod": "^4.1.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="],
|
||||
|
||||
"@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="],
|
||||
|
||||
"@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="],
|
||||
|
||||
"@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
|
||||
|
||||
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
|
||||
|
||||
"@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="],
|
||||
|
||||
"@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="],
|
||||
|
||||
"@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
|
||||
|
||||
"@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
|
||||
|
||||
"ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="],
|
||||
|
||||
"eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
|
||||
|
||||
"json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
|
||||
|
||||
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
|
||||
|
||||
"secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="],
|
||||
|
||||
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
|
||||
|
||||
"@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
}
|
||||
}
|
||||
193
benchmarks/headless.ts
Normal file
193
benchmarks/headless.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env bun
|
||||
import { readFile, writeFile, mkdir } from "node:fs/promises"
|
||||
import { join, dirname } from "node:path"
|
||||
import { stepCountIs, streamText, type CoreMessage } from "ai"
|
||||
import { tool } from "ai"
|
||||
import { createFriendli } from "@friendliai/ai-provider"
|
||||
import { z } from "zod"
|
||||
import { formatHashLines } from "../src/tools/hashline-edit/hash-computation"
|
||||
import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits"
|
||||
import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations"
|
||||
import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization"
|
||||
|
||||
const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5"
|
||||
const MAX_STEPS = 50
|
||||
const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||
|
||||
const emit = (event: Record<string, unknown>) =>
|
||||
console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event }))
|
||||
|
||||
// ── CLI ──────────────────────────────────────────────────────
|
||||
function parseArgs(): { prompt: string; modelId: string } {
|
||||
const args = process.argv.slice(2)
|
||||
let prompt = ""
|
||||
let modelId = DEFAULT_MODEL
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) {
|
||||
prompt = args[++i]
|
||||
} else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) {
|
||||
modelId = args[++i]
|
||||
} else if (args[i] === "--reasoning-mode" && args[i + 1]) {
|
||||
i++ // consume
|
||||
}
|
||||
// --no-translate, --think consumed silently
|
||||
}
|
||||
if (!prompt) {
|
||||
console.error("Usage: bun run benchmarks/headless.ts -p <prompt> [-m <model>]")
|
||||
process.exit(1)
|
||||
}
|
||||
return { prompt, modelId }
|
||||
}
|
||||
|
||||
// ── Tools ────────────────────────────────────────────────────
|
||||
const readFileTool = tool({
|
||||
description: "Read a file with hashline-tagged content (LINE#ID format)",
|
||||
inputSchema: z.object({ path: z.string().describe("File path") }),
|
||||
execute: async ({ path }) => {
|
||||
const fullPath = join(process.cwd(), path)
|
||||
try {
|
||||
const content = await readFile(fullPath, "utf-8")
|
||||
const lines = content.split("\n")
|
||||
const tagged = formatHashLines(content)
|
||||
return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}`
|
||||
} catch {
|
||||
return `Error: File not found: ${path}`
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
const editFileTool = tool({
|
||||
description: "Edit a file using hashline anchors (LINE#ID format)",
|
||||
inputSchema: z.object({
|
||||
path: z.string(),
|
||||
edits: z.array(
|
||||
z.object({
|
||||
op: z.enum(["replace", "append", "prepend"]),
|
||||
pos: z.string().optional(),
|
||||
end: z.string().optional(),
|
||||
lines: z.union([z.array(z.string()), z.string(), z.null()]),
|
||||
})
|
||||
).min(1),
|
||||
}),
|
||||
execute: async ({ path, edits }) => {
|
||||
const fullPath = join(process.cwd(), path)
|
||||
try {
|
||||
let rawContent = ""
|
||||
let exists = true
|
||||
try {
|
||||
rawContent = await readFile(fullPath, "utf-8")
|
||||
} catch {
|
||||
exists = false
|
||||
}
|
||||
|
||||
const normalized = normalizeHashlineEdits(edits)
|
||||
|
||||
if (!exists) {
|
||||
const canCreate = normalized.every(
|
||||
(e) => (e.op === "append" || e.op === "prepend") && !e.pos
|
||||
)
|
||||
if (!canCreate) return `Error: File not found: ${path}`
|
||||
}
|
||||
|
||||
const envelope = canonicalizeFileText(rawContent)
|
||||
const result = applyHashlineEditsWithReport(envelope.content, normalized)
|
||||
|
||||
if (result.content === envelope.content) {
|
||||
return `Error: No changes made to ${path}. The edits produced identical content.`
|
||||
}
|
||||
|
||||
const writeContent = restoreFileText(result.content, envelope)
|
||||
await mkdir(dirname(fullPath), { recursive: true })
|
||||
await writeFile(fullPath, writeContent, "utf-8")
|
||||
|
||||
const oldLineCount = rawContent.split("\n").length
|
||||
const newLineCount = writeContent.split("\n").length
|
||||
const delta = newLineCount - oldLineCount
|
||||
const sign = delta > 0 ? "+" : ""
|
||||
const action = exists ? "Updated" : "Created"
|
||||
return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)`
|
||||
} catch (error) {
|
||||
return `Error: ${error instanceof Error ? error.message : String(error)}`
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
// ── Agent Loop ───────────────────────────────────────────────
|
||||
async function run() {
|
||||
const { prompt, modelId } = parseArgs()
|
||||
|
||||
const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! })
|
||||
const model = friendli(modelId)
|
||||
const tools = { read_file: readFileTool, edit_file: editFileTool }
|
||||
|
||||
emit({ type: "user", content: prompt })
|
||||
|
||||
const messages: CoreMessage[] = [{ role: "user", content: prompt }]
|
||||
const system =
|
||||
"You are a code editing assistant. Use read_file to read files and edit_file to edit them. " +
|
||||
"Always read a file before editing it to get fresh LINE#ID anchors."
|
||||
|
||||
for (let step = 0; step < MAX_STEPS; step++) {
|
||||
const stream = streamText({
|
||||
model,
|
||||
tools,
|
||||
messages,
|
||||
system,
|
||||
stopWhen: stepCountIs(1),
|
||||
})
|
||||
|
||||
let currentText = ""
|
||||
for await (const part of stream.fullStream) {
|
||||
switch (part.type) {
|
||||
case "text-delta":
|
||||
currentText += part.text
|
||||
break
|
||||
case "tool-call":
|
||||
emit({
|
||||
type: "tool_call",
|
||||
tool_call_id: part.toolCallId,
|
||||
tool_name: part.toolName,
|
||||
tool_input: part.args,
|
||||
model: modelId,
|
||||
})
|
||||
break
|
||||
case "tool-result": {
|
||||
const output = typeof part.result === "string" ? part.result : JSON.stringify(part.result)
|
||||
const isError = typeof output === "string" && output.startsWith("Error:")
|
||||
emit({
|
||||
type: "tool_result",
|
||||
tool_call_id: part.toolCallId,
|
||||
output,
|
||||
...(isError ? { error: output } : {}),
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const response = await stream.response
|
||||
messages.push(...response.messages)
|
||||
|
||||
const finishReason = await stream.finishReason
|
||||
if (finishReason !== "tool-calls") {
|
||||
if (currentText.trim()) {
|
||||
emit({ type: "assistant", content: currentText, model: modelId })
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Signal + Startup ─────────────────────────────────────────
|
||||
process.once("SIGINT", () => process.exit(0))
|
||||
process.once("SIGTERM", () => process.exit(143))
|
||||
|
||||
const startTime = Date.now()
|
||||
run()
|
||||
.catch((error) => {
|
||||
emit({ type: "error", error: error instanceof Error ? error.message : String(error) })
|
||||
process.exit(1)
|
||||
})
|
||||
.then(() => {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2)
|
||||
console.error(`[headless] Completed in ${elapsed}s`)
|
||||
})
|
||||
19
benchmarks/package.json
Normal file
19
benchmarks/package.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "hashline-edit-benchmark",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
|
||||
"scripts": {
|
||||
"bench:basic": "bun run test-edit-ops.ts",
|
||||
"bench:edge": "bun run test-edge-cases.ts",
|
||||
"bench:multi": "bun run test-multi-model.ts",
|
||||
"bench:all": "bun run bench:basic && bun run bench:edge"
|
||||
},
|
||||
"dependencies": {
|
||||
"ai": "^6.0.94",
|
||||
"@ai-sdk/openai": "^1.3.0",
|
||||
"@friendliai/ai-provider": "^1.0.9",
|
||||
"zod": "^4.1.0"
|
||||
}
|
||||
}
|
||||
1121
benchmarks/test-edge-cases.ts
Normal file
1121
benchmarks/test-edge-cases.ts
Normal file
File diff suppressed because it is too large
Load Diff
808
benchmarks/test-edit-ops.ts
Normal file
808
benchmarks/test-edit-ops.ts
Normal file
@@ -0,0 +1,808 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Comprehensive headless edit_file stress test: 21 operation types
|
||||
*
|
||||
* Tests: 5 basic ops + 10 creative cases + 6 whitespace cases
|
||||
* Each runs via headless mode with its own demo file + prompt.
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/test-headless-edit-ops.ts [-m <model>] [--provider <provider>]
|
||||
*/
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join, resolve } from "node:path";
|
||||
|
||||
// ── CLI arg passthrough ───────────────────────────────────────
|
||||
const extraArgs: string[] = [];
|
||||
const rawArgs = process.argv.slice(2);
|
||||
for (let i = 0; i < rawArgs.length; i++) {
|
||||
const arg = rawArgs[i];
|
||||
if (
|
||||
(arg === "-m" || arg === "--model" || arg === "--provider") &&
|
||||
i + 1 < rawArgs.length
|
||||
) {
|
||||
extraArgs.push(arg, rawArgs[i + 1]);
|
||||
i++;
|
||||
} else if (arg === "--think" || arg === "--no-translate") {
|
||||
extraArgs.push(arg);
|
||||
} else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) {
|
||||
extraArgs.push(arg, rawArgs[i + 1]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Colors ────────────────────────────────────────────────────
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const DIM = "\x1b[2m";
|
||||
const CYAN = "\x1b[36m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
const pass = (msg: string) => console.log(` ${GREEN}✓${RESET} ${msg}`);
|
||||
const fail = (msg: string) => console.log(` ${RED}✗${RESET} ${msg}`);
|
||||
const info = (msg: string) => console.log(` ${DIM}${msg}${RESET}`);
|
||||
const warn = (msg: string) => console.log(` ${YELLOW}⚠${RESET} ${msg}`);
|
||||
|
||||
// ── Test case definition ─────────────────────────────────────
|
||||
interface TestCase {
|
||||
fileContent: string;
|
||||
fileName: string;
|
||||
name: string;
|
||||
prompt: string;
|
||||
validate: (content: string) => { passed: boolean; reason: string };
|
||||
}
|
||||
|
||||
const TEST_CASES: TestCase[] = [
|
||||
{
|
||||
name: "1. Replace single line",
|
||||
fileName: "config.txt",
|
||||
fileContent: [
|
||||
"host: localhost",
|
||||
"port: 3000",
|
||||
"debug: false",
|
||||
"timeout: 30",
|
||||
"retries: 3",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Follow these steps exactly:",
|
||||
"Step 1: Call read_file on config.txt.",
|
||||
"Step 2: Note the anchor for the port line (line 2).",
|
||||
"Step 3: Call edit_file with path='config.txt' and edits containing ONE object:",
|
||||
" { op: 'replace', pos: '<line2 anchor>', lines: ['port: 8080'] }",
|
||||
"IMPORTANT: pos must be ONLY the anchor (like '2#KB'). lines must be a SEPARATE array field with the new content.",
|
||||
].join(" "),
|
||||
validate: (content) => {
|
||||
const has8080 = content.includes("port: 8080");
|
||||
const has3000 = content.includes("port: 3000");
|
||||
if (has8080 && !has3000) {
|
||||
return { passed: true, reason: "port changed to 8080" };
|
||||
}
|
||||
if (has3000) {
|
||||
return { passed: false, reason: "port still 3000 — edit not applied" };
|
||||
}
|
||||
return {
|
||||
passed: false,
|
||||
reason: `unexpected content: ${content.slice(0, 100)}`,
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "2. Append after line",
|
||||
fileName: "fruits.txt",
|
||||
fileContent: ["apple", "banana", "cherry"].join("\n"),
|
||||
prompt:
|
||||
"Read fruits.txt with read_file. Then use edit_file with op='append' to insert a new line 'grape' after the 'banana' line. Use pos='LINE#HASH' of the banana line and lines=['grape'].",
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const bananaIdx = lines.findIndex((l) => l.trim() === "banana");
|
||||
const grapeIdx = lines.findIndex((l) => l.trim() === "grape");
|
||||
if (grapeIdx === -1) {
|
||||
return { passed: false, reason: '"grape" not found in file' };
|
||||
}
|
||||
if (bananaIdx === -1) {
|
||||
return { passed: false, reason: '"banana" was removed' };
|
||||
}
|
||||
if (grapeIdx !== bananaIdx + 1) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `"grape" at line ${grapeIdx + 1} but expected after "banana" at line ${bananaIdx + 1}`,
|
||||
};
|
||||
}
|
||||
if (lines.length !== 4) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `expected 4 lines, got ${lines.length}`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
passed: true,
|
||||
reason: '"grape" correctly appended after "banana"',
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "3. Prepend before line",
|
||||
fileName: "code.txt",
|
||||
fileContent: ["function greet() {", ' return "hello";', "}"].join("\n"),
|
||||
prompt:
|
||||
"Read code.txt with read_file. Then use edit_file with op='prepend' to add '// Greeting function' before the function line. Use pos='LINE#HASH' of the function line and lines=['// Greeting function'].",
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const commentIdx = lines.findIndex(
|
||||
(l) => l.trim().startsWith("//") && l.toLowerCase().includes("greet")
|
||||
);
|
||||
const funcIdx = lines.findIndex((l) =>
|
||||
l.trim().startsWith("function greet")
|
||||
);
|
||||
if (commentIdx === -1) {
|
||||
return { passed: false, reason: "comment line not found" };
|
||||
}
|
||||
if (funcIdx === -1) {
|
||||
return { passed: false, reason: '"function greet" line was removed' };
|
||||
}
|
||||
if (commentIdx !== funcIdx - 1) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `comment at line ${commentIdx + 1} but function at ${funcIdx + 1} — not directly before`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
passed: true,
|
||||
reason: "comment correctly prepended before function",
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "4. Range replace (multi-line → single line)",
|
||||
fileName: "log.txt",
|
||||
fileContent: [
|
||||
"=== Log Start ===",
|
||||
"INFO: started",
|
||||
"WARN: slow query",
|
||||
"ERROR: timeout",
|
||||
"INFO: recovered",
|
||||
"=== Log End ===",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Follow these steps exactly:",
|
||||
"Step 1: Call read_file on log.txt to see line anchors.",
|
||||
"Step 2: Note the anchor for 'WARN: slow query' (line 3) and 'ERROR: timeout' (line 4).",
|
||||
"Step 3: Call edit_file with path='log.txt' and edits containing ONE object with THREE separate JSON fields:",
|
||||
" { op: 'replace', pos: '<line3 anchor>', end: '<line4 anchor>', lines: ['RESOLVED: issues cleared'] }",
|
||||
"CRITICAL: pos, end, and lines are THREE SEPARATE JSON fields. pos is ONLY '3#XX'. end is ONLY '4#YY'. lines is ['RESOLVED: issues cleared'].",
|
||||
"If edit_file fails or errors, use write_file to write the complete correct file content instead.",
|
||||
"The correct final content should be: === Log Start ===, INFO: started, RESOLVED: issues cleared, INFO: recovered, === Log End ===",
|
||||
"Do not make any other changes.",
|
||||
].join(" "),
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const hasResolved = lines.some(
|
||||
(l) => l.trim() === "RESOLVED: issues cleared"
|
||||
);
|
||||
const hasWarn = content.includes("WARN: slow query");
|
||||
const hasError = content.includes("ERROR: timeout");
|
||||
if (!hasResolved) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: '"RESOLVED: issues cleared" not found',
|
||||
};
|
||||
}
|
||||
if (hasWarn || hasError) {
|
||||
return { passed: false, reason: "old WARN/ERROR lines still present" };
|
||||
}
|
||||
// Core assertion: 2 old lines removed, 1 new line added = net -1 line
|
||||
// Allow slight overshoot from model adding extra content
|
||||
if (lines.length < 4 || lines.length > 6) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `expected ~5 lines, got ${lines.length}`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
passed: true,
|
||||
reason: "range replace succeeded — 2 lines → 1 line",
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "5. Delete line",
|
||||
fileName: "settings.txt",
|
||||
fileContent: [
|
||||
"mode: production",
|
||||
"debug: true",
|
||||
"cache: enabled",
|
||||
"log_level: info",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Follow these steps exactly:",
|
||||
"Step 1: Call read_file on settings.txt to see line anchors.",
|
||||
"Step 2: Note the anchor for 'debug: true' (line 2).",
|
||||
"Step 3: Call edit_file with path='settings.txt' and edits containing ONE object:",
|
||||
" { op: 'replace', pos: '<line2 anchor>', lines: [] }",
|
||||
"IMPORTANT: lines must be an empty array [] to delete the line. pos must be ONLY the anchor like '2#SR'.",
|
||||
].join(" "),
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const hasDebug = content.includes("debug: true");
|
||||
if (hasDebug) {
|
||||
return { passed: false, reason: '"debug: true" still present' };
|
||||
}
|
||||
if (lines.length !== 3) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `expected 3 lines, got ${lines.length}`,
|
||||
};
|
||||
}
|
||||
if (
|
||||
!(
|
||||
content.includes("mode: production") &&
|
||||
content.includes("cache: enabled")
|
||||
)
|
||||
) {
|
||||
return { passed: false, reason: "other lines were removed" };
|
||||
}
|
||||
return { passed: true, reason: '"debug: true" successfully deleted' };
|
||||
},
|
||||
},
|
||||
|
||||
// ── Creative cases (6-15) ────────────────────────────────────
|
||||
{
|
||||
name: "6. Batch edit — two replacements in one call",
|
||||
fileName: "batch.txt",
|
||||
fileContent: ["red", "green", "blue", "yellow"].join("\n"),
|
||||
prompt: [
|
||||
"Read batch.txt with read_file.",
|
||||
"Then call edit_file ONCE with path='batch.txt' and edits containing TWO objects:",
|
||||
" 1) { op: 'replace', pos: '<line1 anchor>', lines: ['crimson'] }",
|
||||
" 2) { op: 'replace', pos: '<line3 anchor>', lines: ['navy'] }",
|
||||
"Both edits must be in the SAME edits array in a single edit_file call.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("crimson")) return { passed: false, reason: "'crimson' not found" };
|
||||
if (!c.includes("navy")) return { passed: false, reason: "'navy' not found" };
|
||||
if (c.includes("red")) return { passed: false, reason: "'red' still present" };
|
||||
if (c.includes("blue")) return { passed: false, reason: "'blue' still present" };
|
||||
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "both lines replaced in single call" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "7. Line expansion — 1 line → 3 lines",
|
||||
fileName: "expand.txt",
|
||||
fileContent: ["header", "TODO: implement", "footer"].join("\n"),
|
||||
prompt: [
|
||||
"Read expand.txt with read_file.",
|
||||
"Replace the 'TODO: implement' line (line 2) with THREE lines:",
|
||||
" 'step 1: init', 'step 2: process', 'step 3: cleanup'",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, lines=['step 1: init', 'step 2: process', 'step 3: cleanup'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("TODO")) return { passed: false, reason: "TODO line still present" };
|
||||
if (!c.includes("step 1: init")) return { passed: false, reason: "'step 1: init' not found" };
|
||||
if (!c.includes("step 3: cleanup")) return { passed: false, reason: "'step 3: cleanup' not found" };
|
||||
if (lines.length !== 5) return { passed: false, reason: `expected 5 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "1 line expanded to 3 lines" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "8. Append at EOF",
|
||||
fileName: "eof.txt",
|
||||
fileContent: ["line one", "line two"].join("\n"),
|
||||
prompt: [
|
||||
"Read eof.txt with read_file.",
|
||||
"Use edit_file to append 'line three' after the LAST line of the file.",
|
||||
"Use op='append', pos=<last line anchor>, lines=['line three'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("line three")) return { passed: false, reason: "'line three' not found" };
|
||||
if (lines[lines.length - 1].trim() !== "line three")
|
||||
return { passed: false, reason: "'line three' not at end" };
|
||||
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "appended at EOF" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "9. Special characters in content",
|
||||
fileName: "special.json",
|
||||
fileContent: [
|
||||
'{',
|
||||
' "name": "old-value",',
|
||||
' "count": 42',
|
||||
'}',
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Read special.json with read_file.",
|
||||
'Replace the line containing \"name\": \"old-value\" with \"name\": \"new-value\".',
|
||||
"Use edit_file with op='replace', pos=<that line's anchor>, lines=[' \"name\": \"new-value\",'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("old-value")) return { passed: false, reason: "'old-value' still present" };
|
||||
if (!c.includes('"new-value"')) return { passed: false, reason: "'new-value' not found" };
|
||||
if (!c.includes('"count": 42')) return { passed: false, reason: "other content was modified" };
|
||||
return { passed: true, reason: "JSON value replaced with special chars intact" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "10. Replace first line",
|
||||
fileName: "first.txt",
|
||||
fileContent: ["OLD HEADER", "body content", "footer"].join("\n"),
|
||||
prompt: [
|
||||
"Read first.txt with read_file.",
|
||||
"Replace the very first line 'OLD HEADER' with 'NEW HEADER'.",
|
||||
"Use edit_file with op='replace', pos=<line1 anchor>, lines=['NEW HEADER'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("OLD HEADER")) return { passed: false, reason: "'OLD HEADER' still present" };
|
||||
if (lines[0].trim() !== "NEW HEADER") return { passed: false, reason: "first line is not 'NEW HEADER'" };
|
||||
if (!c.includes("body content")) return { passed: false, reason: "body was modified" };
|
||||
return { passed: true, reason: "first line replaced" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "11. Replace last line",
|
||||
fileName: "last.txt",
|
||||
fileContent: ["alpha", "bravo", "OLD_FOOTER"].join("\n"),
|
||||
prompt: [
|
||||
"Read last.txt with read_file.",
|
||||
"Replace the last line 'OLD_FOOTER' with 'NEW_FOOTER'.",
|
||||
"Use edit_file with op='replace', pos=<last line anchor>, lines=['NEW_FOOTER'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("OLD_FOOTER")) return { passed: false, reason: "'OLD_FOOTER' still present" };
|
||||
if (lines[lines.length - 1].trim() !== "NEW_FOOTER")
|
||||
return { passed: false, reason: "last line is not 'NEW_FOOTER'" };
|
||||
return { passed: true, reason: "last line replaced" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "12. Adjacent line edits",
|
||||
fileName: "adjacent.txt",
|
||||
fileContent: ["aaa", "bbb", "ccc", "ddd"].join("\n"),
|
||||
prompt: [
|
||||
"Read adjacent.txt with read_file.",
|
||||
"Replace line 2 ('bbb') with 'BBB' and line 3 ('ccc') with 'CCC'.",
|
||||
"Use edit_file with TWO edits in the same call:",
|
||||
" { op: 'replace', pos: <line2 anchor>, lines: ['BBB'] }",
|
||||
" { op: 'replace', pos: <line3 anchor>, lines: ['CCC'] }",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("bbb")) return { passed: false, reason: "'bbb' still present" };
|
||||
if (c.includes("ccc")) return { passed: false, reason: "'ccc' still present" };
|
||||
if (!c.includes("BBB")) return { passed: false, reason: "'BBB' not found" };
|
||||
if (!c.includes("CCC")) return { passed: false, reason: "'CCC' not found" };
|
||||
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "two adjacent lines replaced" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "13. Prepend multi-line block",
|
||||
fileName: "block.py",
|
||||
fileContent: ["def main():", " print('hello')", "", "main()"].join("\n"),
|
||||
prompt: [
|
||||
"Read block.py with read_file.",
|
||||
"Prepend a 2-line comment block before 'def main():' (line 1).",
|
||||
"The two lines are: '# Author: test' and '# Date: 2025-01-01'.",
|
||||
"Use edit_file with op='prepend', pos=<line1 anchor>, lines=['# Author: test', '# Date: 2025-01-01'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("# Author: test")) return { passed: false, reason: "author comment not found" };
|
||||
if (!c.includes("# Date: 2025-01-01")) return { passed: false, reason: "date comment not found" };
|
||||
const defIdx = lines.findIndex((l) => l.startsWith("def main"));
|
||||
const authorIdx = lines.findIndex((l) => l.includes("Author"));
|
||||
if (authorIdx >= defIdx) return { passed: false, reason: "comments not before def" };
|
||||
return { passed: true, reason: "2-line block prepended before function" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "14. Delete range — 3 consecutive lines",
|
||||
fileName: "cleanup.txt",
|
||||
fileContent: ["keep1", "remove-a", "remove-b", "remove-c", "keep2"].join("\n"),
|
||||
prompt: [
|
||||
"Read cleanup.txt with read_file.",
|
||||
"Delete lines 2-4 ('remove-a', 'remove-b', 'remove-c') using a single range replace.",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, end=<line4 anchor>, lines=[].",
|
||||
"An empty lines array deletes the range.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("remove")) return { passed: false, reason: "'remove' lines still present" };
|
||||
if (!c.includes("keep1")) return { passed: false, reason: "'keep1' was deleted" };
|
||||
if (!c.includes("keep2")) return { passed: false, reason: "'keep2' was deleted" };
|
||||
if (lines.length !== 2) return { passed: false, reason: `expected 2 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "3 consecutive lines deleted via range" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "15. Replace with duplicate-content line",
|
||||
fileName: "dupes.txt",
|
||||
fileContent: ["item", "item", "item", "item"].join("\n"),
|
||||
prompt: [
|
||||
"Read dupes.txt with read_file. All 4 lines have the same text 'item'.",
|
||||
"Replace ONLY line 3 with 'CHANGED'. Do NOT modify any other line.",
|
||||
"Use edit_file with op='replace', pos=<line3 anchor>, lines=['CHANGED'].",
|
||||
"The anchor hash uniquely identifies line 3 even though the content is identical.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("CHANGED")) return { passed: false, reason: "'CHANGED' not found" };
|
||||
const changedCount = lines.filter((l) => l.trim() === "CHANGED").length;
|
||||
const itemCount = lines.filter((l) => l.trim() === "item").length;
|
||||
if (changedCount !== 1) return { passed: false, reason: `expected 1 CHANGED, got ${changedCount}` };
|
||||
if (itemCount !== 3) return { passed: false, reason: `expected 3 item lines, got ${itemCount}` };
|
||||
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "only line 3 changed among duplicates" };
|
||||
},
|
||||
},
|
||||
|
||||
// ── Whitespace cases (16-21) ──────────────────────────────────
|
||||
{
|
||||
name: "16. Fix indentation — 2 spaces → 4 spaces",
|
||||
fileName: "indent.js",
|
||||
fileContent: ["function foo() {", " const x = 1;", " return x;", "}"].join("\n"),
|
||||
prompt: [
|
||||
"Read indent.js with read_file.",
|
||||
"Replace line 2 ' const x = 1;' (2-space indent) with ' const x = 1;' (4-space indent).",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' const x = 1;'].",
|
||||
"The ONLY change is the indentation: 2 spaces → 4 spaces. Content stays the same.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.split("\n");
|
||||
const line2 = lines[1];
|
||||
if (!line2) return { passed: false, reason: "line 2 missing" };
|
||||
if (line2 === " const x = 1;") return { passed: true, reason: "indentation fixed to 4 spaces" };
|
||||
if (line2 === " const x = 1;") return { passed: false, reason: "still 2-space indent" };
|
||||
return { passed: false, reason: `unexpected line 2: '${line2}'` };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "17. Replace preserving leading whitespace",
|
||||
fileName: "preserve.py",
|
||||
fileContent: [
|
||||
"class Foo:",
|
||||
" def old_method(self):",
|
||||
" pass",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Read preserve.py with read_file.",
|
||||
"Replace line 2 ' def old_method(self):' with ' def new_method(self):'.",
|
||||
"Keep the 4-space indentation. Only change the method name.",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' def new_method(self):'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("old_method")) return { passed: false, reason: "'old_method' still present" };
|
||||
const lines = c.split("\n");
|
||||
const methodLine = lines.find((l) => l.includes("new_method"));
|
||||
if (!methodLine) return { passed: false, reason: "'new_method' not found" };
|
||||
if (!methodLine.startsWith(" ")) return { passed: false, reason: "indentation lost" };
|
||||
return { passed: true, reason: "method renamed with indentation preserved" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "18. Insert blank line between sections",
|
||||
fileName: "sections.txt",
|
||||
fileContent: ["[section-a]", "value-a=1", "[section-b]", "value-b=2"].join("\n"),
|
||||
prompt: [
|
||||
"Read sections.txt with read_file.",
|
||||
"Insert a blank empty line between 'value-a=1' (line 2) and '[section-b]' (line 3).",
|
||||
"Use edit_file with op='append', pos=<line2 anchor>, lines=[''].",
|
||||
"lines=[''] inserts one empty line.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.split("\n");
|
||||
const valAIdx = lines.findIndex((l) => l.includes("value-a=1"));
|
||||
const secBIdx = lines.findIndex((l) => l.includes("[section-b]"));
|
||||
if (valAIdx === -1) return { passed: false, reason: "'value-a=1' missing" };
|
||||
if (secBIdx === -1) return { passed: false, reason: "'[section-b]' missing" };
|
||||
if (secBIdx - valAIdx < 2) return { passed: false, reason: "no blank line between sections" };
|
||||
const between = lines[valAIdx + 1];
|
||||
if (between.trim() !== "") return { passed: false, reason: `line between is '${between}', not blank` };
|
||||
return { passed: true, reason: "blank line inserted between sections" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "19. Delete blank line",
|
||||
fileName: "noblank.txt",
|
||||
fileContent: ["first", "", "second", "third"].join("\n"),
|
||||
prompt: [
|
||||
"Read noblank.txt with read_file.",
|
||||
"Delete the empty blank line (line 2). Use edit_file with op='replace', pos=<line2 anchor>, lines=[].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
|
||||
if (lines[0].trim() !== "first") return { passed: false, reason: "'first' not on line 1" };
|
||||
if (lines[1].trim() !== "second") return { passed: false, reason: "'second' not on line 2" };
|
||||
return { passed: true, reason: "blank line deleted" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "20. Tab → spaces conversion",
|
||||
fileName: "tabs.txt",
|
||||
fileContent: ["start", "\tindented-with-tab", "end"].join("\n"),
|
||||
prompt: [
|
||||
"Read tabs.txt with read_file.",
|
||||
"Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: [' indented-with-spaces'] }].",
|
||||
"Expected final line 2 to be 4 spaces followed by indented-with-spaces.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("\t")) return { passed: false, reason: "tab still present" };
|
||||
if (!c.includes(" indented-with-spaces"))
|
||||
return { passed: false, reason: "' indented-with-spaces' not found" };
|
||||
if (!c.includes("start")) return { passed: false, reason: "'start' was modified" };
|
||||
return { passed: true, reason: "tab converted to 4 spaces" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "21. Deeply nested indent replacement",
|
||||
fileName: "nested.ts",
|
||||
fileContent: [
|
||||
"if (a) {",
|
||||
" if (b) {",
|
||||
" if (c) {",
|
||||
" old_call();",
|
||||
" }",
|
||||
" }",
|
||||
"}",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Read nested.ts with read_file.",
|
||||
"Replace line 4 ' old_call();' with ' new_call();'.",
|
||||
"Preserve the exact 6-space indentation. Only change the function name.",
|
||||
"Use edit_file with op='replace', pos=<line4 anchor>, lines=[' new_call();'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("old_call")) return { passed: false, reason: "'old_call' still present" };
|
||||
const lines = c.split("\n");
|
||||
const callLine = lines.find((l) => l.includes("new_call"));
|
||||
if (!callLine) return { passed: false, reason: "'new_call' not found" };
|
||||
const leadingSpaces = callLine.match(/^ */)?.[0].length ?? 0;
|
||||
if (leadingSpaces !== 6) return { passed: false, reason: `expected 6-space indent, got ${leadingSpaces}` };
|
||||
return { passed: true, reason: "deeply nested line replaced with indent preserved" };
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
// ── JSONL event types ─────────────────────────────────────────
|
||||
interface ToolCallEvent {
|
||||
tool_call_id: string;
|
||||
tool_input: Record<string, unknown>;
|
||||
tool_name: string;
|
||||
type: "tool_call";
|
||||
}
|
||||
|
||||
interface ToolResultEvent {
|
||||
error?: string;
|
||||
output: string;
|
||||
tool_call_id: string;
|
||||
type: "tool_result";
|
||||
}
|
||||
|
||||
interface AnyEvent {
|
||||
type: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ── Run single test case ─────────────────────────────────────
|
||||
async function runTestCase(
|
||||
tc: TestCase,
|
||||
testDir: string
|
||||
): Promise<{
|
||||
passed: boolean;
|
||||
editCalls: number;
|
||||
editSuccesses: number;
|
||||
duration: number;
|
||||
}> {
|
||||
const testFile = join(testDir, tc.fileName);
|
||||
writeFileSync(testFile, tc.fileContent, "utf-8");
|
||||
|
||||
const headlessScript = resolve(import.meta.dir, "headless.ts");
|
||||
const headlessArgs = [
|
||||
"run",
|
||||
headlessScript,
|
||||
"-p",
|
||||
tc.prompt,
|
||||
"--no-translate",
|
||||
...extraArgs,
|
||||
];
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
const output = await new Promise<string>((res, reject) => {
|
||||
const proc = spawn("bun", headlessArgs, {
|
||||
cwd: testDir,
|
||||
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
proc.stdout.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
proc.stderr.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
const timeout = setTimeout(
|
||||
() => {
|
||||
proc.kill("SIGTERM");
|
||||
reject(new Error("Timed out after 4 minutes"));
|
||||
},
|
||||
4 * 60 * 1000
|
||||
);
|
||||
|
||||
proc.on("close", (code) => {
|
||||
clearTimeout(timeout);
|
||||
if (code !== 0) {
|
||||
reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`));
|
||||
} else {
|
||||
res(stdout);
|
||||
}
|
||||
});
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// Parse events
|
||||
const events: AnyEvent[] = [];
|
||||
for (const line of output.split("\n").filter((l) => l.trim())) {
|
||||
try {
|
||||
events.push(JSON.parse(line) as AnyEvent);
|
||||
} catch {
|
||||
// skip non-JSON
|
||||
}
|
||||
}
|
||||
|
||||
const toolCalls = events.filter(
|
||||
(e) => e.type === "tool_call"
|
||||
) as unknown as ToolCallEvent[];
|
||||
const toolResults = events.filter(
|
||||
(e) => e.type === "tool_result"
|
||||
) as unknown as ToolResultEvent[];
|
||||
|
||||
const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file");
|
||||
const editCallIds = new Set(editCalls.map((e) => e.tool_call_id));
|
||||
const editResults = toolResults.filter((e) =>
|
||||
editCallIds.has(e.tool_call_id)
|
||||
);
|
||||
const editSuccesses = editResults.filter((e) => !e.error);
|
||||
|
||||
// Show blocked calls
|
||||
const editErrors = editResults.filter((e) => e.error);
|
||||
for (const err of editErrors) {
|
||||
const matchingCall = editCalls.find(
|
||||
(c) => c.tool_call_id === err.tool_call_id
|
||||
);
|
||||
info(` blocked: ${err.error?.slice(0, 120)}`);
|
||||
if (matchingCall) {
|
||||
info(` input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Validate file content
|
||||
let finalContent: string;
|
||||
try {
|
||||
finalContent = readFileSync(testFile, "utf-8");
|
||||
} catch {
|
||||
return {
|
||||
passed: false,
|
||||
editCalls: editCalls.length,
|
||||
editSuccesses: editSuccesses.length,
|
||||
duration,
|
||||
};
|
||||
}
|
||||
|
||||
const validation = tc.validate(finalContent);
|
||||
|
||||
return {
|
||||
passed: validation.passed,
|
||||
editCalls: editCalls.length,
|
||||
editSuccesses: editSuccesses.length,
|
||||
duration,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────
|
||||
const main = async () => {
|
||||
console.log(`\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n`);
|
||||
|
||||
const testDir = join(tmpdir(), `edit-ops-${Date.now()}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
info(`Test dir: ${testDir}`);
|
||||
console.log();
|
||||
|
||||
let totalPassed = 0;
|
||||
const results: { name: string; passed: boolean; detail: string }[] = [];
|
||||
|
||||
for (const tc of TEST_CASES) {
|
||||
console.log(`${CYAN}${BOLD}${tc.name}${RESET}`);
|
||||
info(`File: ${tc.fileName}`);
|
||||
info(`Prompt: "${tc.prompt.slice(0, 80)}..."`);
|
||||
|
||||
try {
|
||||
const result = await runTestCase(tc, testDir);
|
||||
const status = result.passed
|
||||
? `${GREEN}PASS${RESET}`
|
||||
: `${RED}FAIL${RESET}`;
|
||||
const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`;
|
||||
|
||||
console.log(` ${status} — ${detail}`);
|
||||
|
||||
if (result.passed) {
|
||||
totalPassed++;
|
||||
// Validate the file to show reason
|
||||
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
|
||||
const v = tc.validate(content);
|
||||
pass(v.reason);
|
||||
} else {
|
||||
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
|
||||
const v = tc.validate(content);
|
||||
fail(v.reason);
|
||||
info(
|
||||
`Final content:\n${content
|
||||
.split("\n")
|
||||
.map((l, i) => ` ${i + 1}: ${l}`)
|
||||
.join("\n")}`
|
||||
);
|
||||
}
|
||||
|
||||
results.push({ name: tc.name, passed: result.passed, detail });
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.log(` ${RED}ERROR${RESET} — ${msg.slice(0, 200)}`);
|
||||
fail(msg.slice(0, 200));
|
||||
results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) });
|
||||
}
|
||||
|
||||
// Reset file for next test (in case of side effects)
|
||||
try {
|
||||
rmSync(join(testDir, tc.fileName), { force: true });
|
||||
} catch {}
|
||||
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Summary
|
||||
console.log(`${BOLD}━━━ Summary ━━━${RESET}`);
|
||||
for (const r of results) {
|
||||
const icon = r.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
|
||||
console.log(` ${icon} ${r.name} — ${r.detail}`);
|
||||
}
|
||||
console.log();
|
||||
console.log(
|
||||
`${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}`
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
try {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
} catch {}
|
||||
|
||||
if (totalPassed === TEST_CASES.length) {
|
||||
console.log(
|
||||
`\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n`
|
||||
);
|
||||
process.exit(0);
|
||||
} else {
|
||||
console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
main();
|
||||
280
benchmarks/test-multi-model.ts
Normal file
280
benchmarks/test-multi-model.ts
Normal file
@@ -0,0 +1,280 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Multi-model edit_file test runner
|
||||
*
|
||||
* Runs test-headless-edit-ops.ts against every available model
|
||||
* and produces a summary table.
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/test-multi-model-edit.ts [--timeout <seconds>]
|
||||
*/
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { resolve } from "node:path";
|
||||
|
||||
// ── Models ────────────────────────────────────────────────────
|
||||
const MODELS = [
|
||||
{ id: "MiniMaxAI/MiniMax-M2.5", short: "M2.5" },
|
||||
// { id: "MiniMaxAI/MiniMax-M2.1", short: "M2.1" }, // masked: slow + timeout-prone
|
||||
// { id: "zai-org/GLM-5", short: "GLM-5" }, // masked: API 503
|
||||
{ id: "zai-org/GLM-4.7", short: "GLM-4.7" },
|
||||
];
|
||||
|
||||
// ── CLI args ──────────────────────────────────────────────────
|
||||
let perModelTimeoutSec = 900; // 15 min default per model (5 tests)
|
||||
const rawArgs = process.argv.slice(2);
|
||||
for (let i = 0; i < rawArgs.length; i++) {
|
||||
if (rawArgs[i] === "--timeout" && i + 1 < rawArgs.length) {
|
||||
const parsed = Number.parseInt(rawArgs[i + 1], 10);
|
||||
if (Number.isNaN(parsed) || parsed <= 0) {
|
||||
console.error(`Invalid --timeout value: ${rawArgs[i + 1]}`);
|
||||
process.exit(1);
|
||||
}
|
||||
perModelTimeoutSec = parsed;
|
||||
i++;
|
||||
}
|
||||
|
||||
// ── Colors ────────────────────────────────────────────────────
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const DIM = "\x1b[2m";
|
||||
const CYAN = "\x1b[36m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────
|
||||
interface TestResult {
|
||||
detail: string;
|
||||
name: string;
|
||||
passed: boolean;
|
||||
}
|
||||
|
||||
interface ModelResult {
|
||||
durationMs: number;
|
||||
error?: string;
|
||||
modelId: string;
|
||||
modelShort: string;
|
||||
tests: TestResult[];
|
||||
totalPassed: number;
|
||||
totalTests: number;
|
||||
}
|
||||
|
||||
// ── Parse test-headless-edit-ops stdout ───────────────────────
|
||||
function parseOpsOutput(stdout: string): TestResult[] {
|
||||
const results: TestResult[] = [];
|
||||
|
||||
// Match lines like: " PASS — edit_file: 1/1 succeeded, 32.5s"
|
||||
// or " FAIL — edit_file: 0/3 succeeded, 15.2s"
|
||||
// or " ERROR — Timed out after 10 minutes"
|
||||
// Following a line like: "1. Replace single line"
|
||||
const lines = stdout.split("\n");
|
||||
|
||||
let currentTestName = "";
|
||||
for (const line of lines) {
|
||||
// Detect test name: starts with ANSI-colored bold cyan + "N. Name"
|
||||
// Strip ANSI codes for matching
|
||||
const stripped = line.replace(/\x1b\[[0-9;]*m/g, "");
|
||||
|
||||
// Test name pattern: "N. <name>"
|
||||
const testNameMatch = stripped.match(/^\s*(\d+\.\s+.+)$/);
|
||||
if (
|
||||
testNameMatch &&
|
||||
!stripped.includes("—") &&
|
||||
!stripped.includes("✓") &&
|
||||
!stripped.includes("✗")
|
||||
) {
|
||||
currentTestName = testNameMatch[1].trim();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Result line: PASS/FAIL/ERROR
|
||||
if (currentTestName && stripped.includes("PASS")) {
|
||||
const detail = stripped.replace(/^\s*PASS\s*—?\s*/, "").trim();
|
||||
results.push({
|
||||
name: currentTestName,
|
||||
passed: true,
|
||||
detail: detail || "passed",
|
||||
});
|
||||
currentTestName = "";
|
||||
} else if (currentTestName && stripped.includes("FAIL")) {
|
||||
const detail = stripped.replace(/^\s*FAIL\s*—?\s*/, "").trim();
|
||||
results.push({
|
||||
name: currentTestName,
|
||||
passed: false,
|
||||
detail: detail || "failed",
|
||||
});
|
||||
currentTestName = "";
|
||||
} else if (currentTestName && stripped.includes("ERROR")) {
|
||||
const detail = stripped.replace(/^\s*ERROR\s*—?\s*/, "").trim();
|
||||
results.push({
|
||||
name: currentTestName,
|
||||
passed: false,
|
||||
detail: detail || "error",
|
||||
});
|
||||
currentTestName = "";
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ── Run one model ────────────────────────────────────────────
|
||||
async function runModel(model: {
|
||||
id: string;
|
||||
short: string;
|
||||
}): Promise<ModelResult> {
|
||||
const opsScript = resolve(import.meta.dir, "test-edit-ops.ts");
|
||||
const startTime = Date.now();
|
||||
|
||||
return new Promise<ModelResult>((resolvePromise) => {
|
||||
const proc = spawn(
|
||||
"bun",
|
||||
["run", opsScript, "-m", model.id, "--no-translate"],
|
||||
{
|
||||
cwd: resolve(import.meta.dir),
|
||||
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
}
|
||||
);
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
proc.stdout.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
proc.stderr.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
proc.kill("SIGTERM");
|
||||
resolvePromise({
|
||||
modelId: model.id,
|
||||
modelShort: model.short,
|
||||
tests: [],
|
||||
totalPassed: 0,
|
||||
totalTests: 0,
|
||||
durationMs: Date.now() - startTime,
|
||||
error: `Timed out after ${perModelTimeoutSec}s`,
|
||||
});
|
||||
}, perModelTimeoutSec * 1000);
|
||||
|
||||
proc.on("close", () => {
|
||||
clearTimeout(timeout);
|
||||
const tests = parseOpsOutput(stdout);
|
||||
const totalPassed = tests.filter((t) => t.passed).length;
|
||||
|
||||
resolvePromise({
|
||||
modelId: model.id,
|
||||
modelShort: model.short,
|
||||
tests,
|
||||
totalPassed,
|
||||
totalTests: Math.max(tests.length, 5),
|
||||
durationMs: Date.now() - startTime,
|
||||
});
|
||||
});
|
||||
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
resolvePromise({
|
||||
modelId: model.id,
|
||||
modelShort: model.short,
|
||||
tests: [],
|
||||
totalPassed: 0,
|
||||
totalTests: 0,
|
||||
durationMs: Date.now() - startTime,
|
||||
error: err.message,
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────
|
||||
const main = async () => {
|
||||
console.log(`\n${BOLD}═══ Multi-Model edit_file Test Runner ═══${RESET}\n`);
|
||||
console.log(`${DIM}Models: ${MODELS.map((m) => m.short).join(", ")}${RESET}`);
|
||||
console.log(`${DIM}Timeout: ${perModelTimeoutSec}s per model${RESET}`);
|
||||
console.log();
|
||||
|
||||
const allResults: ModelResult[] = [];
|
||||
|
||||
for (const model of MODELS) {
|
||||
console.log(`${CYAN}${BOLD}▶ Testing ${model.short} (${model.id})${RESET}`);
|
||||
const result = await runModel(model);
|
||||
allResults.push(result);
|
||||
|
||||
const timeStr = `${(result.durationMs / 1000).toFixed(1)}s`;
|
||||
if (result.error) {
|
||||
console.log(` ${RED}ERROR${RESET}: ${result.error} (${timeStr})`);
|
||||
} else {
|
||||
const color =
|
||||
result.totalPassed === result.totalTests
|
||||
? GREEN
|
||||
: result.totalPassed > 0
|
||||
? YELLOW
|
||||
: RED;
|
||||
console.log(
|
||||
` ${color}${result.totalPassed}/${result.totalTests} passed${RESET} (${timeStr})`
|
||||
);
|
||||
for (const t of result.tests) {
|
||||
const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
|
||||
console.log(` ${icon} ${t.name}`);
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
// ── Summary Table ──────────────────────────────────────────
|
||||
console.log(`${BOLD}═══ Summary ═══${RESET}\n`);
|
||||
|
||||
// Per-model results
|
||||
for (const r of allResults) {
|
||||
const timeStr = `${(r.durationMs / 1000).toFixed(0)}s`;
|
||||
const color = r.error ? RED : r.totalPassed === r.totalTests ? GREEN : r.totalPassed > 0 ? YELLOW : RED;
|
||||
const label = r.error ? `ERROR: ${r.error}` : `${r.totalPassed}/${r.totalTests}`;
|
||||
console.log(` ${r.modelShort.padEnd(8)} ${color}${label}${RESET} (${timeStr})`);
|
||||
for (const t of r.tests) {
|
||||
const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
|
||||
console.log(` ${icon} ${t.name}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log();
|
||||
|
||||
// Overall
|
||||
const totalModels = allResults.length;
|
||||
const erroredModels = allResults.filter((r) => r.error).length;
|
||||
const perfectModels = allResults.filter(
|
||||
(r) => !r.error && r.totalPassed === r.totalTests && r.totalTests > 0
|
||||
).length;
|
||||
console.log(
|
||||
`${BOLD}Models with 100%: ${perfectModels}/${totalModels}${RESET}`
|
||||
);
|
||||
|
||||
const overallPassed = allResults.reduce((sum, r) => sum + r.totalPassed, 0);
|
||||
const overallTotal = allResults.reduce((sum, r) => sum + r.totalTests, 0);
|
||||
console.log(
|
||||
`${BOLD}Overall: ${overallPassed}/${overallTotal} (${Math.round((overallPassed / overallTotal) * 100)}%)${RESET}`
|
||||
);
|
||||
|
||||
console.log();
|
||||
|
||||
if (erroredModels > 0) {
|
||||
console.log(
|
||||
`${BOLD}${RED}${erroredModels} model(s) errored. See details above.${RESET}\n`
|
||||
);
|
||||
process.exit(1);
|
||||
} else if (perfectModels === totalModels) {
|
||||
console.log(`${BOLD}${GREEN}🎉 ALL MODELS PASSED ALL TESTS!${RESET}\n`);
|
||||
process.exit(0);
|
||||
} else {
|
||||
console.log(
|
||||
`${BOLD}${YELLOW}Some models have failures. See details above.${RESET}\n`
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
main();
|
||||
24
package.json
24
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -75,17 +75,17 @@
|
||||
"typescript": "^5.7.3"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.8.5",
|
||||
"oh-my-opencode-darwin-x64": "3.8.5",
|
||||
"oh-my-opencode-darwin-x64-baseline": "3.8.5",
|
||||
"oh-my-opencode-linux-arm64": "3.8.5",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.8.5",
|
||||
"oh-my-opencode-linux-x64": "3.8.5",
|
||||
"oh-my-opencode-linux-x64-baseline": "3.8.5",
|
||||
"oh-my-opencode-linux-x64-musl": "3.8.5",
|
||||
"oh-my-opencode-linux-x64-musl-baseline": "3.8.5",
|
||||
"oh-my-opencode-windows-x64": "3.8.5",
|
||||
"oh-my-opencode-windows-x64-baseline": "3.8.5"
|
||||
"oh-my-opencode-darwin-arm64": "3.9.0",
|
||||
"oh-my-opencode-darwin-x64": "3.9.0",
|
||||
"oh-my-opencode-darwin-x64-baseline": "3.9.0",
|
||||
"oh-my-opencode-linux-arm64": "3.9.0",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.9.0",
|
||||
"oh-my-opencode-linux-x64": "3.9.0",
|
||||
"oh-my-opencode-linux-x64-baseline": "3.9.0",
|
||||
"oh-my-opencode-linux-x64-musl": "3.9.0",
|
||||
"oh-my-opencode-linux-x64-musl-baseline": "3.9.0",
|
||||
"oh-my-opencode-windows-x64": "3.9.0",
|
||||
"oh-my-opencode-windows-x64-baseline": "3.9.0"
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"@ast-grep/cli",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-arm64",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-x64-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-x64-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-x64",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64-musl",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-musl-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-musl",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-windows-x64-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (windows-x64-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-windows-x64",
|
||||
"version": "3.8.5",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (windows-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -448,21 +448,6 @@ ${oracleSection}
|
||||
4. **Run build** if applicable — exit code 0 required
|
||||
5. **Tell user** what you verified and the results — keep it clear and helpful
|
||||
|
||||
### Auto-Commit Policy (MANDATORY for implementation/fix work)
|
||||
|
||||
1. **Auto-commit after implementation is complete** when the task includes feature/fix code changes
|
||||
2. **Commit ONLY after verification gates pass**:
|
||||
- \`lsp_diagnostics\` clean on all modified files
|
||||
- Related tests pass
|
||||
- Typecheck/build pass when applicable
|
||||
3. **If any gate fails, DO NOT commit** — fix issues first, re-run verification, then commit
|
||||
4. **Use Conventional Commits format** with meaningful intent-focused messages:
|
||||
- \`feat(scope): add ...\` for new functionality
|
||||
- \`fix(scope): resolve ...\` for bug fixes
|
||||
- \`refactor(scope): simplify ...\` for internal restructuring
|
||||
5. **Do not make placeholder commits** (\`wip\`, \`temp\`, \`update\`) or commit unverified code
|
||||
6. **If user explicitly says not to commit**, skip commit and report that changes are left uncommitted
|
||||
|
||||
- **File edit** — \`lsp_diagnostics\` clean
|
||||
- **Build** — Exit code 0
|
||||
- **Tests** — Pass (or pre-existing failures noted)
|
||||
|
||||
@@ -2987,6 +2987,28 @@ describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
|
||||
manager.shutdown()
|
||||
resetToastManager()
|
||||
})
|
||||
|
||||
test("should clean pending notifications for deleted sessions", () => {
|
||||
//#given
|
||||
const manager = createBackgroundManager()
|
||||
const sessionID = "session-pending-notifications"
|
||||
|
||||
manager.queuePendingNotification(sessionID, "<system-reminder>queued</system-reminder>")
|
||||
expect(getPendingNotifications(manager).get(sessionID)).toEqual([
|
||||
"<system-reminder>queued</system-reminder>",
|
||||
])
|
||||
|
||||
//#when
|
||||
manager.handleEvent({
|
||||
type: "session.deleted",
|
||||
properties: { info: { id: sessionID } },
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(getPendingNotifications(manager).has(sessionID)).toBe(false)
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.handleEvent - session.error", () => {
|
||||
|
||||
@@ -830,6 +830,8 @@ export class BackgroundManager {
|
||||
tasksToCancel.set(descendant.id, descendant)
|
||||
}
|
||||
|
||||
this.pendingNotifications.delete(sessionID)
|
||||
|
||||
if (tasksToCancel.size === 0) return
|
||||
|
||||
for (const task of tasksToCancel.values()) {
|
||||
@@ -866,6 +868,13 @@ export class BackgroundManager {
|
||||
subagentSessions.delete(task.sessionID)
|
||||
}
|
||||
}
|
||||
|
||||
for (const task of tasksToCancel.values()) {
|
||||
if (task.parentSessionID) {
|
||||
this.pendingNotifications.delete(task.parentSessionID)
|
||||
}
|
||||
}
|
||||
|
||||
SessionCategoryRegistry.remove(sessionID)
|
||||
}
|
||||
|
||||
|
||||
111
src/hooks/ralph-loop/completion-promise-detector.test.ts
Normal file
111
src/hooks/ralph-loop/completion-promise-detector.test.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
/// <reference types="bun-types" />
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { detectCompletionInSessionMessages } from "./completion-promise-detector"
|
||||
|
||||
type SessionMessage = {
|
||||
info?: { role?: string }
|
||||
parts?: Array<{ type: string; text?: string }>
|
||||
}
|
||||
|
||||
function createPluginInput(messages: SessionMessage[]): PluginInput {
|
||||
const pluginInput = {
|
||||
client: { session: {} } as PluginInput["client"],
|
||||
project: {} as PluginInput["project"],
|
||||
directory: "/tmp",
|
||||
worktree: "/tmp",
|
||||
serverUrl: new URL("http://localhost"),
|
||||
$: {} as PluginInput["$"],
|
||||
} as PluginInput
|
||||
|
||||
pluginInput.client.session.messages =
|
||||
(async () => ({ data: messages })) as unknown as PluginInput["client"]["session"]["messages"]
|
||||
|
||||
return pluginInput
|
||||
}
|
||||
|
||||
describe("detectCompletionInSessionMessages", () => {
|
||||
describe("#given session with prior DONE and new messages", () => {
|
||||
test("#when sinceMessageIndex excludes prior DONE #then should NOT detect completion", async () => {
|
||||
// #given
|
||||
const messages: SessionMessage[] = [
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "Old completion <promise>DONE</promise>" }],
|
||||
},
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "Working on the new task" }],
|
||||
},
|
||||
]
|
||||
const ctx = createPluginInput(messages)
|
||||
|
||||
// #when
|
||||
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||
sessionID: "session-123",
|
||||
promise: "DONE",
|
||||
apiTimeoutMs: 1000,
|
||||
directory: "/tmp",
|
||||
sinceMessageIndex: 1,
|
||||
})
|
||||
|
||||
// #then
|
||||
expect(detected).toBe(false)
|
||||
})
|
||||
|
||||
test("#when sinceMessageIndex includes current DONE #then should detect completion", async () => {
|
||||
// #given
|
||||
const messages: SessionMessage[] = [
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "Old completion <promise>DONE</promise>" }],
|
||||
},
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "Current completion <promise>DONE</promise>" }],
|
||||
},
|
||||
]
|
||||
const ctx = createPluginInput(messages)
|
||||
|
||||
// #when
|
||||
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||
sessionID: "session-123",
|
||||
promise: "DONE",
|
||||
apiTimeoutMs: 1000,
|
||||
directory: "/tmp",
|
||||
sinceMessageIndex: 1,
|
||||
})
|
||||
|
||||
// #then
|
||||
expect(detected).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given no sinceMessageIndex (backward compat)", () => {
|
||||
test("#then should scan all messages", async () => {
|
||||
// #given
|
||||
const messages: SessionMessage[] = [
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "Old completion <promise>DONE</promise>" }],
|
||||
},
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "No completion in latest message" }],
|
||||
},
|
||||
]
|
||||
const ctx = createPluginInput(messages)
|
||||
|
||||
// #when
|
||||
const detected = await detectCompletionInSessionMessages(ctx, {
|
||||
sessionID: "session-123",
|
||||
promise: "DONE",
|
||||
apiTimeoutMs: 1000,
|
||||
directory: "/tmp",
|
||||
})
|
||||
|
||||
// #then
|
||||
expect(detected).toBe(true)
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -52,6 +52,7 @@ export async function detectCompletionInSessionMessages(
|
||||
promise: string
|
||||
apiTimeoutMs: number
|
||||
directory: string
|
||||
sinceMessageIndex?: number
|
||||
},
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
@@ -75,7 +76,12 @@ export async function detectCompletionInSessionMessages(
|
||||
? responseData
|
||||
: []
|
||||
|
||||
const assistantMessages = (messageArray as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant")
|
||||
const scopedMessages =
|
||||
typeof options.sinceMessageIndex === "number" && options.sinceMessageIndex >= 0 && options.sinceMessageIndex < messageArray.length
|
||||
? messageArray.slice(options.sinceMessageIndex)
|
||||
: messageArray
|
||||
|
||||
const assistantMessages = (scopedMessages as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant")
|
||||
if (assistantMessages.length === 0) return false
|
||||
|
||||
const pattern = buildPromisePattern(options.promise)
|
||||
|
||||
@@ -603,7 +603,7 @@ describe("ralph-loop", () => {
|
||||
expect(hook.getState()).toBeNull()
|
||||
|
||||
// then - messages API was called with correct session ID
|
||||
expect(messagesCalls.length).toBe(1)
|
||||
expect(messagesCalls.length).toBe(2)
|
||||
expect(messagesCalls[0].sessionID).toBe("session-123")
|
||||
})
|
||||
|
||||
@@ -633,7 +633,7 @@ describe("ralph-loop", () => {
|
||||
expect(hook.getState()).toBeNull()
|
||||
|
||||
// then - messages API was called with correct session ID
|
||||
expect(messagesCalls.length).toBe(1)
|
||||
expect(messagesCalls.length).toBe(2)
|
||||
expect(messagesCalls[0].sessionID).toBe("session-123")
|
||||
})
|
||||
|
||||
@@ -1075,7 +1075,7 @@ Original task: Build something`
|
||||
expect(promptCalls.length).toBe(0)
|
||||
expect(hook.getState()).toBeNull()
|
||||
// API should NOT be called since transcript found completion
|
||||
expect(messagesCalls.length).toBe(0)
|
||||
expect(messagesCalls.length).toBe(1)
|
||||
})
|
||||
|
||||
test("should show ultrawork completion toast", async () => {
|
||||
|
||||
@@ -23,6 +23,7 @@ export function createLoopStateController(options: {
|
||||
loopOptions?: {
|
||||
maxIterations?: number
|
||||
completionPromise?: string
|
||||
messageCountAtStart?: number
|
||||
ultrawork?: boolean
|
||||
strategy?: "reset" | "continue"
|
||||
},
|
||||
@@ -34,6 +35,7 @@ export function createLoopStateController(options: {
|
||||
loopOptions?.maxIterations ??
|
||||
config?.default_max_iterations ??
|
||||
DEFAULT_MAX_ITERATIONS,
|
||||
message_count_at_start: loopOptions?.messageCountAtStart,
|
||||
completion_promise:
|
||||
loopOptions?.completionPromise ??
|
||||
DEFAULT_COMPLETION_PROMISE,
|
||||
@@ -93,5 +95,19 @@ export function createLoopStateController(options: {
|
||||
|
||||
return state
|
||||
},
|
||||
|
||||
setMessageCountAtStart(sessionID: string, messageCountAtStart: number): RalphLoopState | null {
|
||||
const state = readState(directory, stateDir)
|
||||
if (!state || state.session_id !== sessionID) {
|
||||
return null
|
||||
}
|
||||
|
||||
state.message_count_at_start = messageCountAtStart
|
||||
if (!writeState(directory, state, stateDir)) {
|
||||
return null
|
||||
}
|
||||
|
||||
return state
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,6 +84,7 @@ export function createRalphLoopEventHandler(
|
||||
promise: state.completion_promise,
|
||||
apiTimeoutMs: options.apiTimeoutMs,
|
||||
directory: options.directory,
|
||||
sinceMessageIndex: state.message_count_at_start,
|
||||
})
|
||||
|
||||
if (completionViaTranscript || completionViaApi) {
|
||||
|
||||
@@ -13,6 +13,7 @@ export interface RalphLoopHook {
|
||||
options?: {
|
||||
maxIterations?: number
|
||||
completionPromise?: string
|
||||
messageCountAtStart?: number
|
||||
ultrawork?: boolean
|
||||
strategy?: "reset" | "continue"
|
||||
}
|
||||
@@ -23,6 +24,19 @@ export interface RalphLoopHook {
|
||||
|
||||
const DEFAULT_API_TIMEOUT = 5000 as const
|
||||
|
||||
function getMessageCountFromResponse(messagesResponse: unknown): number {
|
||||
if (Array.isArray(messagesResponse)) {
|
||||
return messagesResponse.length
|
||||
}
|
||||
|
||||
if (typeof messagesResponse === "object" && messagesResponse !== null && "data" in messagesResponse) {
|
||||
const data = (messagesResponse as { data?: unknown }).data
|
||||
return Array.isArray(data) ? data.length : 0
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
export function createRalphLoopHook(
|
||||
ctx: PluginInput,
|
||||
options?: RalphLoopOptions
|
||||
@@ -51,7 +65,25 @@ export function createRalphLoopHook(
|
||||
|
||||
return {
|
||||
event,
|
||||
startLoop: loopState.startLoop,
|
||||
startLoop: (sessionID, prompt, loopOptions): boolean => {
|
||||
const startSuccess = loopState.startLoop(sessionID, prompt, loopOptions)
|
||||
if (!startSuccess || typeof loopOptions?.messageCountAtStart === "number") {
|
||||
return startSuccess
|
||||
}
|
||||
|
||||
ctx.client.session
|
||||
.messages({
|
||||
path: { id: sessionID },
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
.then((messagesResponse: unknown) => {
|
||||
const messageCountAtStart = getMessageCountFromResponse(messagesResponse)
|
||||
loopState.setMessageCountAtStart(sessionID, messageCountAtStart)
|
||||
})
|
||||
.catch(() => {})
|
||||
|
||||
return startSuccess
|
||||
},
|
||||
cancelLoop: loopState.cancelLoop,
|
||||
getState: loopState.getState as () => RalphLoopState | null,
|
||||
}
|
||||
|
||||
@@ -44,6 +44,12 @@ export function readState(directory: string, customPath?: string): RalphLoopStat
|
||||
active: isActive,
|
||||
iteration: iterationNum,
|
||||
max_iterations: Number(data.max_iterations) || DEFAULT_MAX_ITERATIONS,
|
||||
message_count_at_start:
|
||||
typeof data.message_count_at_start === "number"
|
||||
? data.message_count_at_start
|
||||
: typeof data.message_count_at_start === "string" && data.message_count_at_start.trim() !== ""
|
||||
? Number(data.message_count_at_start)
|
||||
: undefined,
|
||||
completion_promise: stripQuotes(data.completion_promise) || DEFAULT_COMPLETION_PROMISE,
|
||||
started_at: stripQuotes(data.started_at) || new Date().toISOString(),
|
||||
prompt: body.trim(),
|
||||
@@ -72,13 +78,17 @@ export function writeState(
|
||||
const sessionIdLine = state.session_id ? `session_id: "${state.session_id}"\n` : ""
|
||||
const ultraworkLine = state.ultrawork !== undefined ? `ultrawork: ${state.ultrawork}\n` : ""
|
||||
const strategyLine = state.strategy ? `strategy: "${state.strategy}"\n` : ""
|
||||
const messageCountAtStartLine =
|
||||
typeof state.message_count_at_start === "number"
|
||||
? `message_count_at_start: ${state.message_count_at_start}\n`
|
||||
: ""
|
||||
const content = `---
|
||||
active: ${state.active}
|
||||
iteration: ${state.iteration}
|
||||
max_iterations: ${state.max_iterations}
|
||||
completion_promise: "${state.completion_promise}"
|
||||
started_at: "${state.started_at}"
|
||||
${sessionIdLine}${ultraworkLine}${strategyLine}---
|
||||
${sessionIdLine}${ultraworkLine}${strategyLine}${messageCountAtStartLine}---
|
||||
${state.prompt}
|
||||
`
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ export interface RalphLoopState {
|
||||
active: boolean
|
||||
iteration: number
|
||||
max_iterations: number
|
||||
message_count_at_start?: number
|
||||
completion_promise: string
|
||||
started_at: string
|
||||
prompt: string
|
||||
|
||||
@@ -23,8 +23,8 @@ function tryUpdateMessageModel(
|
||||
if (result.changes === 0) return false
|
||||
if (variant) {
|
||||
db.prepare(
|
||||
`UPDATE message SET data = json_set(data, '$.variant', ?) WHERE id = ?`,
|
||||
).run(variant, messageId)
|
||||
`UPDATE message SET data = json_set(data, '$.variant', ?, '$.thinking', ?) WHERE id = ?`,
|
||||
).run(variant, variant, messageId)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -19,6 +19,8 @@ import {
|
||||
truncateToTokenBudget,
|
||||
} from "./token-limiter"
|
||||
|
||||
const TRUNCATION_MARKER_TOKEN_OVERHEAD = estimateTokenCount("\n[TRUNCATED]")
|
||||
|
||||
describe("token-limiter", () => {
|
||||
test("estimateTokenCount uses 1 token per 4 chars approximation", () => {
|
||||
// given
|
||||
@@ -40,7 +42,62 @@ describe("token-limiter", () => {
|
||||
const result = truncateToTokenBudget(content, maxTokens)
|
||||
|
||||
// then
|
||||
expect(estimateTokenCount(result)).toBeLessThanOrEqual(maxTokens)
|
||||
expect(estimateTokenCount(result)).toBeLessThanOrEqual(maxTokens + TRUNCATION_MARKER_TOKEN_OVERHEAD)
|
||||
})
|
||||
|
||||
describe("truncateToTokenBudget", () => {
|
||||
describe("#given content that exceeds budget", () => {
|
||||
describe("#when content has newlines", () => {
|
||||
test("#then should truncate at last newline boundary", () => {
|
||||
// #given
|
||||
const content = "line-1\nline-2\nline-3"
|
||||
|
||||
// #when
|
||||
const result = truncateToTokenBudget(content, 2)
|
||||
|
||||
// #then
|
||||
expect(result).toBe("line-1\n[TRUNCATED]")
|
||||
})
|
||||
|
||||
test("#then should append [TRUNCATED] marker", () => {
|
||||
// #given
|
||||
const content = "line-1\nline-2\nline-3"
|
||||
|
||||
// #when
|
||||
const result = truncateToTokenBudget(content, 2)
|
||||
|
||||
// #then
|
||||
expect(result).toContain("[TRUNCATED]")
|
||||
})
|
||||
})
|
||||
|
||||
describe("#when content is single long line with no newlines", () => {
|
||||
test("#then should slice and append [TRUNCATED] marker", () => {
|
||||
// #given
|
||||
const content = "A".repeat(30)
|
||||
|
||||
// #when
|
||||
const result = truncateToTokenBudget(content, 2)
|
||||
|
||||
// #then
|
||||
expect(result).toBe("AAAAAAAA\n[TRUNCATED]")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given content within budget", () => {
|
||||
test("#then should return content unchanged without marker", () => {
|
||||
// #given
|
||||
const content = "line-1\nline-2"
|
||||
|
||||
// #when
|
||||
const result = truncateToTokenBudget(content, 20)
|
||||
|
||||
// #then
|
||||
expect(result).toBe(content)
|
||||
expect(result).not.toContain("[TRUNCATED]")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
test("buildSystemContentWithTokenLimit returns undefined when there is no content", () => {
|
||||
@@ -76,10 +133,10 @@ describe("token-limiter", () => {
|
||||
const result = buildSystemContentWithTokenLimit(input, 80)
|
||||
|
||||
// then
|
||||
expect(result).toContain("AGENTS_CONTEXT:keep")
|
||||
expect(result).toContain("CATEGORY_APPEND:keep")
|
||||
expect(result).toContain("AGENTS_C")
|
||||
expect(result).toContain("CATE")
|
||||
expect(result).toContain("SKILL_ALPHA:")
|
||||
expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(80)
|
||||
expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(80 + TRUNCATION_MARKER_TOKEN_OVERHEAD)
|
||||
})
|
||||
|
||||
test("buildSystemContentWithTokenLimit truncates category after skills are exhausted", () => {
|
||||
@@ -95,9 +152,9 @@ describe("token-limiter", () => {
|
||||
const result = buildSystemContentWithTokenLimit(input, 30)
|
||||
|
||||
// then
|
||||
expect(result).toContain("AGENTS_CONTEXT:keep")
|
||||
expect(result).toContain("AGENTS_C")
|
||||
expect(result).not.toContain("SKILL_ALPHA:" + "a".repeat(80))
|
||||
expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(30)
|
||||
expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(30 + TRUNCATION_MARKER_TOKEN_OVERHEAD)
|
||||
})
|
||||
|
||||
test("buildSystemContentWithTokenLimit truncates agents context last", () => {
|
||||
@@ -116,6 +173,6 @@ describe("token-limiter", () => {
|
||||
expect(result).toContain("AGENTS_CONTEXT:")
|
||||
expect(result).not.toContain("SKILL_ALPHA:")
|
||||
expect(result).not.toContain("CATEGORY_APPEND:")
|
||||
expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(10)
|
||||
expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(10 + TRUNCATION_MARKER_TOKEN_OVERHEAD)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -20,7 +20,13 @@ export function truncateToTokenBudget(content: string, maxTokens: number): strin
|
||||
return content
|
||||
}
|
||||
|
||||
return content.slice(0, maxCharacters)
|
||||
const sliced = content.slice(0, maxCharacters)
|
||||
const lastNewline = sliced.lastIndexOf("\n")
|
||||
if (lastNewline > 0) {
|
||||
return `${sliced.slice(0, lastNewline)}\n[TRUNCATED]`
|
||||
}
|
||||
|
||||
return `${sliced}\n[TRUNCATED]`
|
||||
}
|
||||
|
||||
function joinSystemParts(parts: string[]): string | undefined {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/// <reference types="bun-types" />
|
||||
import { describe, expect, it } from "bun:test"
|
||||
import { parsePatch } from "diff"
|
||||
import { generateUnifiedDiff } from "./diff-utils"
|
||||
|
||||
function createNumberedLines(totalLineCount: number): string {
|
||||
@@ -7,6 +8,66 @@ function createNumberedLines(totalLineCount: number): string {
|
||||
}
|
||||
|
||||
describe("generateUnifiedDiff", () => {
|
||||
describe("#given OpenCode compatibility format", () => {
|
||||
it("#then includes the Index header emitted by diff library", () => {
|
||||
//#given
|
||||
const oldContent = "a\n"
|
||||
const newContent = "b\n"
|
||||
|
||||
//#when
|
||||
const diff = generateUnifiedDiff(oldContent, newContent, "test.ts")
|
||||
|
||||
//#then
|
||||
expect(diff).toContain("Index: test.ts")
|
||||
})
|
||||
|
||||
it("#then includes unified --- and +++ file headers", () => {
|
||||
//#given
|
||||
const oldContent = "a\n"
|
||||
const newContent = "b\n"
|
||||
|
||||
//#when
|
||||
const diff = generateUnifiedDiff(oldContent, newContent, "test.ts")
|
||||
|
||||
//#then
|
||||
expect(diff).toContain("--- test.ts")
|
||||
expect(diff).toContain("+++ test.ts")
|
||||
})
|
||||
|
||||
it("#then remains parseable by OpenCode parsePatch flow", () => {
|
||||
//#given
|
||||
const oldContent = "line1\nline2\n"
|
||||
const newContent = "line1\nline2-updated\n"
|
||||
|
||||
//#when
|
||||
const diff = generateUnifiedDiff(oldContent, newContent, "test.ts")
|
||||
const patches = parsePatch(diff)
|
||||
|
||||
//#then
|
||||
expect(patches).toHaveLength(1)
|
||||
expect(patches[0]?.oldFileName).toBe("test.ts")
|
||||
expect(patches[0]?.newFileName).toBe("test.ts")
|
||||
expect(patches[0]?.hunks).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given content without trailing newline", () => {
|
||||
it("#then keeps no-newline markers parseable", () => {
|
||||
//#given
|
||||
const oldContent = "a"
|
||||
const newContent = "b"
|
||||
|
||||
//#when
|
||||
const diff = generateUnifiedDiff(oldContent, newContent, "test.ts")
|
||||
const patches = parsePatch(diff)
|
||||
const hunkLines = patches[0]?.hunks[0]?.lines ?? []
|
||||
|
||||
//#then
|
||||
expect(diff).toContain("\\ No newline at end of file")
|
||||
expect(hunkLines).toEqual(["-a", "\\ No newline at end of file", "+b", "\\ No newline at end of file"])
|
||||
})
|
||||
})
|
||||
|
||||
it("creates separate hunks for distant changes", () => {
|
||||
//#given
|
||||
const oldContent = createNumberedLines(60)
|
||||
|
||||
@@ -1,18 +1,24 @@
|
||||
import type { HashlineEdit } from "./types"
|
||||
import { toNewLines } from "./edit-text-normalization"
|
||||
import { normalizeLineRef } from "./validation"
|
||||
|
||||
function normalizeEditPayload(payload: string | string[]): string {
|
||||
return toNewLines(payload).join("\n")
|
||||
}
|
||||
|
||||
function canonicalAnchor(anchor: string | undefined): string {
|
||||
if (!anchor) return ""
|
||||
return normalizeLineRef(anchor)
|
||||
}
|
||||
|
||||
function buildDedupeKey(edit: HashlineEdit): string {
|
||||
switch (edit.op) {
|
||||
case "replace":
|
||||
return `replace|${edit.pos}|${edit.end ?? ""}|${normalizeEditPayload(edit.lines)}`
|
||||
return `replace|${canonicalAnchor(edit.pos)}|${edit.end ? canonicalAnchor(edit.end) : ""}|${normalizeEditPayload(edit.lines)}`
|
||||
case "append":
|
||||
return `append|${edit.pos ?? ""}|${normalizeEditPayload(edit.lines)}`
|
||||
return `append|${canonicalAnchor(edit.pos)}|${normalizeEditPayload(edit.lines)}`
|
||||
case "prepend":
|
||||
return `prepend|${edit.pos ?? ""}|${normalizeEditPayload(edit.lines)}`
|
||||
return `prepend|${canonicalAnchor(edit.pos)}|${normalizeEditPayload(edit.lines)}`
|
||||
default:
|
||||
return JSON.stringify(edit)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from "bun:test"
|
||||
import { applyHashlineEdits } from "./edit-operations"
|
||||
import { applyHashlineEdits, applyHashlineEditsWithReport } from "./edit-operations"
|
||||
import { applyAppend, applyInsertAfter, applyPrepend, applyReplaceLines, applySetLine } from "./edit-operation-primitives"
|
||||
import { computeLineHash } from "./hash-computation"
|
||||
import type { HashlineEdit } from "./types"
|
||||
@@ -389,3 +389,23 @@ describe("hashline edit operations", () => {
|
||||
expect(result).toEqual("replaced A\nline 3\nreplaced B")
|
||||
})
|
||||
})
|
||||
|
||||
describe("dedupe anchor canonicalization", () => {
|
||||
it("deduplicates edits with whitespace-variant anchors", () => {
|
||||
//#given
|
||||
const content = "line 1\nline 2"
|
||||
const lines = content.split("\n")
|
||||
const canonical = `1#${computeLineHash(1, lines[0])}`
|
||||
const spaced = ` 1 # ${computeLineHash(1, lines[0])} `
|
||||
|
||||
//#when
|
||||
const report = applyHashlineEditsWithReport(content, [
|
||||
{ op: "append", pos: canonical, lines: ["inserted"] },
|
||||
{ op: "append", pos: spaced, lines: ["inserted"] },
|
||||
])
|
||||
|
||||
//#then
|
||||
expect(report.deduplicatedEdits).toBe(1)
|
||||
expect(report.content).toBe("line 1\ninserted\nline 2")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -33,7 +33,7 @@ function resolveToolCallID(ctx: ToolContextWithCallID): string | undefined {
|
||||
|
||||
function canCreateFromMissingFile(edits: HashlineEdit[]): boolean {
|
||||
if (edits.length === 0) return false
|
||||
return edits.every((edit) => edit.op === "append" || edit.op === "prepend")
|
||||
return edits.every((edit) => (edit.op === "append" || edit.op === "prepend") && !edit.pos)
|
||||
}
|
||||
|
||||
function buildSuccessMeta(
|
||||
@@ -86,19 +86,19 @@ export async function executeHashlineEditTool(args: HashlineEditArgs, context: T
|
||||
const filePath = args.filePath
|
||||
const { delete: deleteMode, rename } = args
|
||||
|
||||
if (deleteMode && rename) {
|
||||
return "Error: delete and rename cannot be used together"
|
||||
}
|
||||
if (deleteMode && args.edits.length > 0) {
|
||||
return "Error: delete mode requires edits to be an empty array"
|
||||
}
|
||||
|
||||
if (!deleteMode && (!args.edits || !Array.isArray(args.edits) || args.edits.length === 0)) {
|
||||
return "Error: edits parameter must be a non-empty array"
|
||||
}
|
||||
|
||||
const edits = deleteMode ? [] : normalizeHashlineEdits(args.edits)
|
||||
|
||||
if (deleteMode && rename) {
|
||||
return "Error: delete and rename cannot be used together"
|
||||
}
|
||||
if (deleteMode && edits.length > 0) {
|
||||
return "Error: delete mode requires edits to be an empty array"
|
||||
}
|
||||
|
||||
const file = Bun.file(filePath)
|
||||
const exists = await file.exists()
|
||||
if (!exists && !deleteMode && !canCreateFromMissingFile(edits)) {
|
||||
|
||||
@@ -10,7 +10,7 @@ WORKFLOW:
|
||||
VALIDATION:
|
||||
Payload shape: { "filePath": string, "edits": [...], "delete"?: boolean, "rename"?: string }
|
||||
Each edit must be one of: replace, append, prepend
|
||||
Edit shape: { "op": "replace"|"append"|"prepend", "pos"?: "LINE#ID", "end"?: "LINE#ID", "lines"?: string|string[]|null }
|
||||
Edit shape: { "op": "replace"|"append"|"prepend", "pos"?: "LINE#ID", "end"?: "LINE#ID", "lines": string|string[]|null }
|
||||
lines must contain plain replacement text only (no LINE#ID prefixes, no diff + markers)
|
||||
CRITICAL: all operations validate against the same pre-edit file snapshot and apply bottom-up. Refs/tags are interpreted against the last-read version of the file.
|
||||
|
||||
|
||||
@@ -341,4 +341,81 @@ describe("createHashlineEditTool", () => {
|
||||
//#then
|
||||
expect(envelope.lineEnding).toBe("\r\n")
|
||||
})
|
||||
|
||||
it("rejects delete=true with non-empty edits before normalization", async () => {
|
||||
//#given
|
||||
const filePath = path.join(tempDir, "delete-reject.txt")
|
||||
fs.writeFileSync(filePath, "line1")
|
||||
|
||||
//#when
|
||||
const result = await tool.execute(
|
||||
{
|
||||
filePath,
|
||||
delete: true,
|
||||
edits: [{ op: "replace", pos: "1#ZZ", lines: "bad" }],
|
||||
},
|
||||
createMockContext(),
|
||||
)
|
||||
|
||||
//#then
|
||||
expect(result).toContain("delete mode requires edits to be an empty array")
|
||||
expect(fs.existsSync(filePath)).toBe(true)
|
||||
})
|
||||
|
||||
it("rejects delete=true combined with rename", async () => {
|
||||
//#given
|
||||
const filePath = path.join(tempDir, "delete-rename.txt")
|
||||
fs.writeFileSync(filePath, "line1")
|
||||
|
||||
//#when
|
||||
const result = await tool.execute(
|
||||
{
|
||||
filePath,
|
||||
delete: true,
|
||||
rename: path.join(tempDir, "new-name.txt"),
|
||||
edits: [],
|
||||
},
|
||||
createMockContext(),
|
||||
)
|
||||
|
||||
//#then
|
||||
expect(result).toContain("delete and rename cannot be used together")
|
||||
expect(fs.existsSync(filePath)).toBe(true)
|
||||
})
|
||||
|
||||
it("rejects missing file creation with anchored append", async () => {
|
||||
//#given
|
||||
const filePath = path.join(tempDir, "nonexistent.txt")
|
||||
|
||||
//#when
|
||||
const result = await tool.execute(
|
||||
{
|
||||
filePath,
|
||||
edits: [{ op: "append", pos: "1#ZZ", lines: ["bad"] }],
|
||||
},
|
||||
createMockContext(),
|
||||
)
|
||||
|
||||
//#then
|
||||
expect(result).toContain("File not found")
|
||||
})
|
||||
|
||||
it("allows missing file creation with unanchored append", async () => {
|
||||
//#given
|
||||
const filePath = path.join(tempDir, "newfile.txt")
|
||||
|
||||
//#when
|
||||
const result = await tool.execute(
|
||||
{
|
||||
filePath,
|
||||
edits: [{ op: "append", lines: ["created"] }],
|
||||
},
|
||||
createMockContext(),
|
||||
)
|
||||
|
||||
//#then
|
||||
expect(fs.existsSync(filePath)).toBe(true)
|
||||
expect(fs.readFileSync(filePath, "utf-8")).toBe("created")
|
||||
expect(result).toBe(`Updated ${filePath}`)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -31,7 +31,6 @@ export function createHashlineEditTool(): ToolDefinition {
|
||||
end: tool.schema.string().optional().describe("Range end anchor in LINE#ID format"),
|
||||
lines: tool.schema
|
||||
.union([tool.schema.string(), tool.schema.array(tool.schema.string()), tool.schema.null()])
|
||||
.optional()
|
||||
.describe("Replacement or inserted lines. null/[] deletes with replace"),
|
||||
})
|
||||
)
|
||||
|
||||
@@ -15,7 +15,7 @@ const MISMATCH_CONTEXT = 2
|
||||
|
||||
const LINE_REF_EXTRACT_PATTERN = /([0-9]+#[ZPMQVRWSNKTXJBYH]{2})/
|
||||
|
||||
function normalizeLineRef(ref: string): string {
|
||||
export function normalizeLineRef(ref: string): string {
|
||||
const originalTrimmed = ref.trim()
|
||||
let trimmed = originalTrimmed
|
||||
trimmed = trimmed.replace(/^(?:>>>|[+-])\s*/, "")
|
||||
|
||||
Reference in New Issue
Block a user