Compare commits
111 Commits
feat/hashl
...
v3.9.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ae54fd31f4 | ||
|
|
bdd86b1415 | ||
|
|
76cba9b222 | ||
|
|
2955dc868f | ||
|
|
3ab4b7f77b | ||
|
|
3540d1c550 | ||
|
|
9bc9dcaa18 | ||
|
|
f2a1412bf1 | ||
|
|
190c6991ac | ||
|
|
e17a00a906 | ||
|
|
c8aa1bbce4 | ||
|
|
911710e4d4 | ||
|
|
050b93bebb | ||
|
|
2ffa803b05 | ||
|
|
cf97494073 | ||
|
|
8fb5949ac6 | ||
|
|
04f50bac1f | ||
|
|
d1a0a66dde | ||
|
|
b1203b9501 | ||
|
|
58201220cc | ||
|
|
4efad491e7 | ||
|
|
4df69c58bf | ||
|
|
cc8ef7fe39 | ||
|
|
2ece7c3d0a | ||
|
|
decff3152a | ||
|
|
0526bac873 | ||
|
|
0c62656cc6 | ||
|
|
aff43bfc77 | ||
|
|
6865cee8ca | ||
|
|
8721ba471c | ||
|
|
96d27ff56b | ||
|
|
017c18c1b3 | ||
|
|
fb194fc944 | ||
|
|
10c25d1d47 | ||
|
|
86fcade9a4 | ||
|
|
5bc3a9e0db | ||
|
|
810ebec1cd | ||
|
|
8f7ed2988a | ||
|
|
7ff8352a0a | ||
|
|
d425f9bb80 | ||
|
|
cc5e9d1e9b | ||
|
|
269f37af1c | ||
|
|
1e060e9028 | ||
|
|
ccb789e5df | ||
|
|
a6617d93c0 | ||
|
|
2295161022 | ||
|
|
0516f2febc | ||
|
|
df02c73a54 | ||
|
|
52658ac1c4 | ||
|
|
fab820e919 | ||
|
|
6f54404a51 | ||
|
|
a3169c9287 | ||
|
|
0639ce8df7 | ||
|
|
685b8023dd | ||
|
|
07e8d965a8 | ||
|
|
c505989ad4 | ||
|
|
088984a8d4 | ||
|
|
0b69a6c507 | ||
|
|
5fe1640f2a | ||
|
|
ad01f60e99 | ||
|
|
87d6b2b519 | ||
|
|
b7b6721796 | ||
|
|
0c59d2dbe7 | ||
|
|
52d366e866 | ||
|
|
9cd6fc6135 | ||
|
|
f872f5e171 | ||
|
|
f500fb0286 | ||
|
|
9a94e12065 | ||
|
|
808a50d808 | ||
|
|
a263188abd | ||
|
|
155ed5248d | ||
|
|
ed5a2fe393 | ||
|
|
cd504a2694 | ||
|
|
e556c4a5c8 | ||
|
|
be7f408049 | ||
|
|
2ab40124ee | ||
|
|
840c612be8 | ||
|
|
235bb58779 | ||
|
|
ace1790c72 | ||
|
|
31eb7f5d28 | ||
|
|
6b5622c62f | ||
|
|
cf0d157673 | ||
|
|
adf62267aa | ||
|
|
9f64e2a869 | ||
|
|
e00f461eb1 | ||
|
|
da6c54ed93 | ||
|
|
1d99fdf843 | ||
|
|
de70c3a332 | ||
|
|
5e07dfe19b | ||
|
|
2acf6fa124 | ||
|
|
7e5872935a | ||
|
|
6458fe9fce | ||
|
|
640d9fb773 | ||
|
|
fc1b6e4917 | ||
|
|
a0e57c13c3 | ||
|
|
997db0e05b | ||
|
|
565ab8c13a | ||
|
|
15519b9580 | ||
|
|
b174513725 | ||
|
|
465f5e13a8 | ||
|
|
73453a7191 | ||
|
|
fcb90d92a4 | ||
|
|
ddf426c4b3 | ||
|
|
a882e6f027 | ||
|
|
dab2f90051 | ||
|
|
99f4c7e222 | ||
|
|
54d0dcde48 | ||
|
|
159ade05cc | ||
|
|
55b9ad60d8 | ||
|
|
e997e0071c | ||
|
|
718884210b |
18
.github/workflows/publish-platform.yml
vendored
18
.github/workflows/publish-platform.yml
vendored
@@ -35,15 +35,15 @@ jobs:
|
||||
# - Uploads compressed artifacts for the publish job
|
||||
# =============================================================================
|
||||
build:
|
||||
runs-on: ${{ matrix.platform == 'windows-x64' && 'windows-latest' || 'ubuntu-latest' }}
|
||||
runs-on: ${{ startsWith(matrix.platform, 'windows-') && 'windows-latest' || 'ubuntu-latest' }}
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 7
|
||||
max-parallel: 11
|
||||
matrix:
|
||||
platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64]
|
||||
platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@@ -95,14 +95,18 @@ jobs:
|
||||
case "$PLATFORM" in
|
||||
darwin-arm64) TARGET="bun-darwin-arm64" ;;
|
||||
darwin-x64) TARGET="bun-darwin-x64" ;;
|
||||
darwin-x64-baseline) TARGET="bun-darwin-x64-baseline" ;;
|
||||
linux-x64) TARGET="bun-linux-x64" ;;
|
||||
linux-x64-baseline) TARGET="bun-linux-x64-baseline" ;;
|
||||
linux-arm64) TARGET="bun-linux-arm64" ;;
|
||||
linux-x64-musl) TARGET="bun-linux-x64-musl" ;;
|
||||
linux-x64-musl-baseline) TARGET="bun-linux-x64-musl-baseline" ;;
|
||||
linux-arm64-musl) TARGET="bun-linux-arm64-musl" ;;
|
||||
windows-x64) TARGET="bun-windows-x64" ;;
|
||||
windows-x64-baseline) TARGET="bun-windows-x64-baseline" ;;
|
||||
esac
|
||||
|
||||
if [ "$PLATFORM" = "windows-x64" ]; then
|
||||
if [[ "$PLATFORM" == windows-* ]]; then
|
||||
OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode.exe"
|
||||
else
|
||||
OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode"
|
||||
@@ -119,7 +123,7 @@ jobs:
|
||||
PLATFORM="${{ matrix.platform }}"
|
||||
cd packages/${PLATFORM}
|
||||
|
||||
if [ "$PLATFORM" = "windows-x64" ]; then
|
||||
if [[ "$PLATFORM" == windows-* ]]; then
|
||||
# Windows: use 7z (pre-installed on windows-latest)
|
||||
7z a -tzip ../../binary-${PLATFORM}.zip bin/ package.json
|
||||
else
|
||||
@@ -155,7 +159,7 @@ jobs:
|
||||
fail-fast: false
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64]
|
||||
platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
|
||||
steps:
|
||||
- name: Check if already published
|
||||
id: check
|
||||
@@ -184,7 +188,7 @@ jobs:
|
||||
PLATFORM="${{ matrix.platform }}"
|
||||
mkdir -p packages/${PLATFORM}
|
||||
|
||||
if [ "$PLATFORM" = "windows-x64" ]; then
|
||||
if [[ "$PLATFORM" == windows-* ]]; then
|
||||
unzip binary-${PLATFORM}.zip -d packages/${PLATFORM}/
|
||||
else
|
||||
tar -xzvf binary-${PLATFORM}.tar.gz -C packages/${PLATFORM}/
|
||||
|
||||
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@@ -189,7 +189,7 @@ jobs:
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json
|
||||
|
||||
for platform in darwin-arm64 darwin-x64 linux-x64 linux-arm64 linux-x64-musl linux-arm64-musl windows-x64; do
|
||||
for platform in darwin-arm64 darwin-x64 darwin-x64-baseline linux-x64 linux-x64-baseline linux-arm64 linux-x64-musl linux-x64-musl-baseline linux-arm64-musl windows-x64 windows-x64-baseline; do
|
||||
jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json
|
||||
mv tmp.json "packages/${platform}/package.json"
|
||||
done
|
||||
|
||||
61
.issue-comment-2064.md
Normal file
61
.issue-comment-2064.md
Normal file
@@ -0,0 +1,61 @@
|
||||
[sisyphus-bot]
|
||||
|
||||
## Confirmed Bug
|
||||
|
||||
We have identified the root cause of this issue. The bug is in the config writing logic during installation.
|
||||
|
||||
### Root Cause
|
||||
|
||||
**File:** `src/cli/config-manager/write-omo-config.ts` (line 46)
|
||||
|
||||
```typescript
|
||||
const merged = deepMergeRecord(existing, newConfig)
|
||||
```
|
||||
|
||||
When a user runs `oh-my-opencode install` (even just to update settings), the installer:
|
||||
1. Reads the existing config (with user's custom model settings)
|
||||
2. Generates a **new** config based on detected provider availability
|
||||
3. Calls `deepMergeRecord(existing, newConfig)`
|
||||
4. Writes the result back
|
||||
|
||||
**The problem:** `deepMergeRecord` overwrites values in `existing` with values from `newConfig`. This means your custom `"model": "openai/gpt-5.2-codex"` gets overwritten by the generated default model (e.g., `anthropic/claude-opus-4-6` if Claude is available).
|
||||
|
||||
### Why This Happens
|
||||
|
||||
Looking at `deepMergeRecord` (line 24-25):
|
||||
```typescript
|
||||
} else if (sourceValue !== undefined) {
|
||||
result[key] = sourceValue as TTarget[keyof TTarget]
|
||||
}
|
||||
```
|
||||
|
||||
Any defined value in the source (generated config) overwrites the target (user's config).
|
||||
|
||||
### Fix Approach
|
||||
|
||||
The merge direction should be reversed to respect user overrides:
|
||||
```typescript
|
||||
const merged = deepMergeRecord(newConfig, existing)
|
||||
```
|
||||
|
||||
This ensures:
|
||||
- User's explicit settings take precedence
|
||||
- Only new/undefined keys get populated from generated defaults
|
||||
- Custom model choices are preserved
|
||||
|
||||
### SEVERITY: HIGH
|
||||
|
||||
- **Impact:** User configuration is overwritten without consent
|
||||
- **Affected Files:**
|
||||
- `src/cli/config-manager/write-omo-config.ts`
|
||||
- `src/cli/config-manager/deep-merge-record.ts`
|
||||
- **Trigger:** Running `oh-my-opencode install` (even for unrelated updates)
|
||||
|
||||
### Workaround (Until Fix)
|
||||
|
||||
Backup your config before running install:
|
||||
```bash
|
||||
cp ~/.config/opencode/oh-my-opencode.jsonc ~/.config/opencode/oh-my-opencode.jsonc.backup
|
||||
```
|
||||
|
||||
We're working on a fix that will preserve your explicit model configurations.
|
||||
12
AGENTS.md
12
AGENTS.md
@@ -1,10 +1,10 @@
|
||||
# oh-my-opencode — OpenCode Plugin
|
||||
|
||||
**Generated:** 2026-02-21 | **Commit:** 86e3c7d1 | **Branch:** dev
|
||||
**Generated:** 2026-02-24 | **Commit:** fcb90d92 | **Branch:** dev
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
OpenCode plugin (npm: `oh-my-opencode`) that extends Claude Code (OpenCode fork) with multi-agent orchestration, 44 lifecycle hooks, 26 tools, skill/command/MCP systems, and Claude Code compatibility. 1208 TypeScript files, 143k LOC.
|
||||
OpenCode plugin (npm: `oh-my-opencode`) that extends Claude Code (OpenCode fork) with multi-agent orchestration, 46 lifecycle hooks, 26 tools, skill/command/MCP systems, and Claude Code compatibility. 1208 TypeScript files, 143k LOC.
|
||||
|
||||
## STRUCTURE
|
||||
|
||||
@@ -14,14 +14,14 @@ oh-my-opencode/
|
||||
│ ├── index.ts # Plugin entry: loadConfig → createManagers → createTools → createHooks → createPluginInterface
|
||||
│ ├── plugin-config.ts # JSONC multi-level config: user → project → defaults (Zod v4)
|
||||
│ ├── agents/ # 11 agents (Sisyphus, Hephaestus, Oracle, Librarian, Explore, Atlas, Prometheus, Metis, Momus, Multimodal-Looker, Sisyphus-Junior)
|
||||
│ ├── hooks/ # 44 hooks across 39 directories + 6 standalone files
|
||||
| `hooks/` # 46 hooks across 39 directories + 6 standalone files
|
||||
│ ├── tools/ # 26 tools across 15 directories
|
||||
│ ├── features/ # 19 feature modules (background-agent, skill-loader, tmux, MCP-OAuth, etc.)
|
||||
│ ├── shared/ # 100+ utility files in 13 categories
|
||||
│ ├── config/ # Zod v4 schema system (22+ files)
|
||||
│ ├── cli/ # CLI: install, run, doctor, mcp-oauth (Commander.js)
|
||||
│ ├── mcp/ # 3 built-in remote MCPs (websearch, context7, grep_app)
|
||||
│ ├── plugin/ # 8 OpenCode hook handlers + 44 hook composition
|
||||
│ ├── plugin/ # 8 OpenCode hook handlers + 46 hook composition
|
||||
│ └── plugin-handlers/ # 6-phase config loading pipeline
|
||||
├── packages/ # Monorepo: comment-checker, opencode-sdk, 10 platform binaries
|
||||
└── local-ignore/ # Dev-only test fixtures
|
||||
@@ -34,7 +34,7 @@ OhMyOpenCodePlugin(ctx)
|
||||
├─→ loadPluginConfig() # JSONC parse → project/user merge → Zod validate → migrate
|
||||
├─→ createManagers() # TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler
|
||||
├─→ createTools() # SkillContext + AvailableCategories + ToolRegistry (26 tools)
|
||||
├─→ createHooks() # 3-tier: Core(35) + Continuation(7) + Skill(2) = 44 hooks
|
||||
├─→ createHooks() # 3-tier: Core(37) + Continuation(7) + Skill(2) = 46 hooks
|
||||
└─→ createPluginInterface() # 8 OpenCode hook handlers → PluginInterface
|
||||
```
|
||||
|
||||
@@ -87,7 +87,7 @@ Fields: agents (14 overridable, 21 fields each), categories (8 built-in + custom
|
||||
|
||||
- **Test pattern**: Bun test (`bun:test`), co-located `*.test.ts`, given/when/then style (nested describe with `#given`/`#when`/`#then` prefixes)
|
||||
- **Factory pattern**: `createXXX()` for all tools, hooks, agents
|
||||
- **Hook tiers**: Session (22) → Tool-Guard (10) → Transform (4) → Continuation (7) → Skill (2)
|
||||
- **Hook tiers**: Session (23) → Tool-Guard (10) → Transform (4) → Continuation (7) → Skill (2)
|
||||
- **Agent modes**: `primary` (respects UI model) vs `subagent` (own fallback chain) vs `all`
|
||||
- **Model resolution**: 3-step: override → category-default → provider-fallback → system-default
|
||||
- **Config format**: JSONC with comments, Zod v4 validation, snake_case keys
|
||||
|
||||
@@ -24,19 +24,7 @@
|
||||
"disabled_agents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sisyphus",
|
||||
"hephaestus",
|
||||
"prometheus",
|
||||
"oracle",
|
||||
"librarian",
|
||||
"explore",
|
||||
"multimodal-looker",
|
||||
"metis",
|
||||
"momus",
|
||||
"atlas"
|
||||
]
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"disabled_skills": {
|
||||
@@ -960,6 +948,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"allow_non_gpt_model": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
@@ -3248,6 +3239,11 @@
|
||||
"prompt_append": {
|
||||
"type": "string"
|
||||
},
|
||||
"max_prompt_tokens": {
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0,
|
||||
"maximum": 9007199254740991
|
||||
},
|
||||
"is_unstable_agent": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
||||
62
benchmarks/bun.lock
Normal file
62
benchmarks/bun.lock
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"configVersion": 1,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "hashline-edit-benchmark",
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.3.0",
|
||||
"@friendliai/ai-provider": "^1.0.9",
|
||||
"ai": "^6.0.94",
|
||||
"zod": "^4.1.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="],
|
||||
|
||||
"@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="],
|
||||
|
||||
"@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="],
|
||||
|
||||
"@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
|
||||
|
||||
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
|
||||
|
||||
"@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="],
|
||||
|
||||
"@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="],
|
||||
|
||||
"@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
|
||||
|
||||
"@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
|
||||
|
||||
"ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="],
|
||||
|
||||
"eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
|
||||
|
||||
"json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
|
||||
|
||||
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
|
||||
|
||||
"secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="],
|
||||
|
||||
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
|
||||
|
||||
"@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
}
|
||||
}
|
||||
193
benchmarks/headless.ts
Normal file
193
benchmarks/headless.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env bun
|
||||
import { readFile, writeFile, mkdir } from "node:fs/promises"
|
||||
import { join, dirname } from "node:path"
|
||||
import { stepCountIs, streamText, type CoreMessage } from "ai"
|
||||
import { tool } from "ai"
|
||||
import { createFriendli } from "@friendliai/ai-provider"
|
||||
import { z } from "zod"
|
||||
import { formatHashLines } from "../src/tools/hashline-edit/hash-computation"
|
||||
import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits"
|
||||
import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations"
|
||||
import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization"
|
||||
|
||||
const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5"
|
||||
const MAX_STEPS = 50
|
||||
const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||
|
||||
const emit = (event: Record<string, unknown>) =>
|
||||
console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event }))
|
||||
|
||||
// ── CLI ──────────────────────────────────────────────────────
|
||||
function parseArgs(): { prompt: string; modelId: string } {
|
||||
const args = process.argv.slice(2)
|
||||
let prompt = ""
|
||||
let modelId = DEFAULT_MODEL
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) {
|
||||
prompt = args[++i]
|
||||
} else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) {
|
||||
modelId = args[++i]
|
||||
} else if (args[i] === "--reasoning-mode" && args[i + 1]) {
|
||||
i++ // consume
|
||||
}
|
||||
// --no-translate, --think consumed silently
|
||||
}
|
||||
if (!prompt) {
|
||||
console.error("Usage: bun run benchmarks/headless.ts -p <prompt> [-m <model>]")
|
||||
process.exit(1)
|
||||
}
|
||||
return { prompt, modelId }
|
||||
}
|
||||
|
||||
// ── Tools ────────────────────────────────────────────────────
|
||||
const readFileTool = tool({
|
||||
description: "Read a file with hashline-tagged content (LINE#ID format)",
|
||||
inputSchema: z.object({ path: z.string().describe("File path") }),
|
||||
execute: async ({ path }) => {
|
||||
const fullPath = join(process.cwd(), path)
|
||||
try {
|
||||
const content = await readFile(fullPath, "utf-8")
|
||||
const lines = content.split("\n")
|
||||
const tagged = formatHashLines(content)
|
||||
return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}`
|
||||
} catch {
|
||||
return `Error: File not found: ${path}`
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
const editFileTool = tool({
|
||||
description: "Edit a file using hashline anchors (LINE#ID format)",
|
||||
inputSchema: z.object({
|
||||
path: z.string(),
|
||||
edits: z.array(
|
||||
z.object({
|
||||
op: z.enum(["replace", "append", "prepend"]),
|
||||
pos: z.string().optional(),
|
||||
end: z.string().optional(),
|
||||
lines: z.union([z.array(z.string()), z.string(), z.null()]),
|
||||
})
|
||||
).min(1),
|
||||
}),
|
||||
execute: async ({ path, edits }) => {
|
||||
const fullPath = join(process.cwd(), path)
|
||||
try {
|
||||
let rawContent = ""
|
||||
let exists = true
|
||||
try {
|
||||
rawContent = await readFile(fullPath, "utf-8")
|
||||
} catch {
|
||||
exists = false
|
||||
}
|
||||
|
||||
const normalized = normalizeHashlineEdits(edits)
|
||||
|
||||
if (!exists) {
|
||||
const canCreate = normalized.every(
|
||||
(e) => (e.op === "append" || e.op === "prepend") && !e.pos
|
||||
)
|
||||
if (!canCreate) return `Error: File not found: ${path}`
|
||||
}
|
||||
|
||||
const envelope = canonicalizeFileText(rawContent)
|
||||
const result = applyHashlineEditsWithReport(envelope.content, normalized)
|
||||
|
||||
if (result.content === envelope.content) {
|
||||
return `Error: No changes made to ${path}. The edits produced identical content.`
|
||||
}
|
||||
|
||||
const writeContent = restoreFileText(result.content, envelope)
|
||||
await mkdir(dirname(fullPath), { recursive: true })
|
||||
await writeFile(fullPath, writeContent, "utf-8")
|
||||
|
||||
const oldLineCount = rawContent.split("\n").length
|
||||
const newLineCount = writeContent.split("\n").length
|
||||
const delta = newLineCount - oldLineCount
|
||||
const sign = delta > 0 ? "+" : ""
|
||||
const action = exists ? "Updated" : "Created"
|
||||
return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)`
|
||||
} catch (error) {
|
||||
return `Error: ${error instanceof Error ? error.message : String(error)}`
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
// ── Agent Loop ───────────────────────────────────────────────
|
||||
async function run() {
|
||||
const { prompt, modelId } = parseArgs()
|
||||
|
||||
const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! })
|
||||
const model = friendli(modelId)
|
||||
const tools = { read_file: readFileTool, edit_file: editFileTool }
|
||||
|
||||
emit({ type: "user", content: prompt })
|
||||
|
||||
const messages: CoreMessage[] = [{ role: "user", content: prompt }]
|
||||
const system =
|
||||
"You are a code editing assistant. Use read_file to read files and edit_file to edit them. " +
|
||||
"Always read a file before editing it to get fresh LINE#ID anchors."
|
||||
|
||||
for (let step = 0; step < MAX_STEPS; step++) {
|
||||
const stream = streamText({
|
||||
model,
|
||||
tools,
|
||||
messages,
|
||||
system,
|
||||
stopWhen: stepCountIs(1),
|
||||
})
|
||||
|
||||
let currentText = ""
|
||||
for await (const part of stream.fullStream) {
|
||||
switch (part.type) {
|
||||
case "text-delta":
|
||||
currentText += part.text
|
||||
break
|
||||
case "tool-call":
|
||||
emit({
|
||||
type: "tool_call",
|
||||
tool_call_id: part.toolCallId,
|
||||
tool_name: part.toolName,
|
||||
tool_input: part.args,
|
||||
model: modelId,
|
||||
})
|
||||
break
|
||||
case "tool-result": {
|
||||
const output = typeof part.result === "string" ? part.result : JSON.stringify(part.result)
|
||||
const isError = typeof output === "string" && output.startsWith("Error:")
|
||||
emit({
|
||||
type: "tool_result",
|
||||
tool_call_id: part.toolCallId,
|
||||
output,
|
||||
...(isError ? { error: output } : {}),
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const response = await stream.response
|
||||
messages.push(...response.messages)
|
||||
|
||||
const finishReason = await stream.finishReason
|
||||
if (finishReason !== "tool-calls") {
|
||||
if (currentText.trim()) {
|
||||
emit({ type: "assistant", content: currentText, model: modelId })
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Signal + Startup ─────────────────────────────────────────
|
||||
process.once("SIGINT", () => process.exit(0))
|
||||
process.once("SIGTERM", () => process.exit(143))
|
||||
|
||||
const startTime = Date.now()
|
||||
run()
|
||||
.catch((error) => {
|
||||
emit({ type: "error", error: error instanceof Error ? error.message : String(error) })
|
||||
process.exit(1)
|
||||
})
|
||||
.then(() => {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2)
|
||||
console.error(`[headless] Completed in ${elapsed}s`)
|
||||
})
|
||||
19
benchmarks/package.json
Normal file
19
benchmarks/package.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "hashline-edit-benchmark",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
|
||||
"scripts": {
|
||||
"bench:basic": "bun run test-edit-ops.ts",
|
||||
"bench:edge": "bun run test-edge-cases.ts",
|
||||
"bench:multi": "bun run test-multi-model.ts",
|
||||
"bench:all": "bun run bench:basic && bun run bench:edge"
|
||||
},
|
||||
"dependencies": {
|
||||
"ai": "^6.0.94",
|
||||
"@ai-sdk/openai": "^1.3.0",
|
||||
"@friendliai/ai-provider": "^1.0.9",
|
||||
"zod": "^4.1.0"
|
||||
}
|
||||
}
|
||||
1121
benchmarks/test-edge-cases.ts
Normal file
1121
benchmarks/test-edge-cases.ts
Normal file
File diff suppressed because it is too large
Load Diff
808
benchmarks/test-edit-ops.ts
Normal file
808
benchmarks/test-edit-ops.ts
Normal file
@@ -0,0 +1,808 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Comprehensive headless edit_file stress test: 21 operation types
|
||||
*
|
||||
* Tests: 5 basic ops + 10 creative cases + 6 whitespace cases
|
||||
* Each runs via headless mode with its own demo file + prompt.
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/test-headless-edit-ops.ts [-m <model>] [--provider <provider>]
|
||||
*/
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join, resolve } from "node:path";
|
||||
|
||||
// ── CLI arg passthrough ───────────────────────────────────────
|
||||
const extraArgs: string[] = [];
|
||||
const rawArgs = process.argv.slice(2);
|
||||
for (let i = 0; i < rawArgs.length; i++) {
|
||||
const arg = rawArgs[i];
|
||||
if (
|
||||
(arg === "-m" || arg === "--model" || arg === "--provider") &&
|
||||
i + 1 < rawArgs.length
|
||||
) {
|
||||
extraArgs.push(arg, rawArgs[i + 1]);
|
||||
i++;
|
||||
} else if (arg === "--think" || arg === "--no-translate") {
|
||||
extraArgs.push(arg);
|
||||
} else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) {
|
||||
extraArgs.push(arg, rawArgs[i + 1]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Colors ────────────────────────────────────────────────────
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const DIM = "\x1b[2m";
|
||||
const CYAN = "\x1b[36m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
const pass = (msg: string) => console.log(` ${GREEN}✓${RESET} ${msg}`);
|
||||
const fail = (msg: string) => console.log(` ${RED}✗${RESET} ${msg}`);
|
||||
const info = (msg: string) => console.log(` ${DIM}${msg}${RESET}`);
|
||||
const warn = (msg: string) => console.log(` ${YELLOW}⚠${RESET} ${msg}`);
|
||||
|
||||
// ── Test case definition ─────────────────────────────────────
|
||||
interface TestCase {
|
||||
fileContent: string;
|
||||
fileName: string;
|
||||
name: string;
|
||||
prompt: string;
|
||||
validate: (content: string) => { passed: boolean; reason: string };
|
||||
}
|
||||
|
||||
const TEST_CASES: TestCase[] = [
|
||||
{
|
||||
name: "1. Replace single line",
|
||||
fileName: "config.txt",
|
||||
fileContent: [
|
||||
"host: localhost",
|
||||
"port: 3000",
|
||||
"debug: false",
|
||||
"timeout: 30",
|
||||
"retries: 3",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Follow these steps exactly:",
|
||||
"Step 1: Call read_file on config.txt.",
|
||||
"Step 2: Note the anchor for the port line (line 2).",
|
||||
"Step 3: Call edit_file with path='config.txt' and edits containing ONE object:",
|
||||
" { op: 'replace', pos: '<line2 anchor>', lines: ['port: 8080'] }",
|
||||
"IMPORTANT: pos must be ONLY the anchor (like '2#KB'). lines must be a SEPARATE array field with the new content.",
|
||||
].join(" "),
|
||||
validate: (content) => {
|
||||
const has8080 = content.includes("port: 8080");
|
||||
const has3000 = content.includes("port: 3000");
|
||||
if (has8080 && !has3000) {
|
||||
return { passed: true, reason: "port changed to 8080" };
|
||||
}
|
||||
if (has3000) {
|
||||
return { passed: false, reason: "port still 3000 — edit not applied" };
|
||||
}
|
||||
return {
|
||||
passed: false,
|
||||
reason: `unexpected content: ${content.slice(0, 100)}`,
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "2. Append after line",
|
||||
fileName: "fruits.txt",
|
||||
fileContent: ["apple", "banana", "cherry"].join("\n"),
|
||||
prompt:
|
||||
"Read fruits.txt with read_file. Then use edit_file with op='append' to insert a new line 'grape' after the 'banana' line. Use pos='LINE#HASH' of the banana line and lines=['grape'].",
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const bananaIdx = lines.findIndex((l) => l.trim() === "banana");
|
||||
const grapeIdx = lines.findIndex((l) => l.trim() === "grape");
|
||||
if (grapeIdx === -1) {
|
||||
return { passed: false, reason: '"grape" not found in file' };
|
||||
}
|
||||
if (bananaIdx === -1) {
|
||||
return { passed: false, reason: '"banana" was removed' };
|
||||
}
|
||||
if (grapeIdx !== bananaIdx + 1) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `"grape" at line ${grapeIdx + 1} but expected after "banana" at line ${bananaIdx + 1}`,
|
||||
};
|
||||
}
|
||||
if (lines.length !== 4) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `expected 4 lines, got ${lines.length}`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
passed: true,
|
||||
reason: '"grape" correctly appended after "banana"',
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "3. Prepend before line",
|
||||
fileName: "code.txt",
|
||||
fileContent: ["function greet() {", ' return "hello";', "}"].join("\n"),
|
||||
prompt:
|
||||
"Read code.txt with read_file. Then use edit_file with op='prepend' to add '// Greeting function' before the function line. Use pos='LINE#HASH' of the function line and lines=['// Greeting function'].",
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const commentIdx = lines.findIndex(
|
||||
(l) => l.trim().startsWith("//") && l.toLowerCase().includes("greet")
|
||||
);
|
||||
const funcIdx = lines.findIndex((l) =>
|
||||
l.trim().startsWith("function greet")
|
||||
);
|
||||
if (commentIdx === -1) {
|
||||
return { passed: false, reason: "comment line not found" };
|
||||
}
|
||||
if (funcIdx === -1) {
|
||||
return { passed: false, reason: '"function greet" line was removed' };
|
||||
}
|
||||
if (commentIdx !== funcIdx - 1) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `comment at line ${commentIdx + 1} but function at ${funcIdx + 1} — not directly before`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
passed: true,
|
||||
reason: "comment correctly prepended before function",
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "4. Range replace (multi-line → single line)",
|
||||
fileName: "log.txt",
|
||||
fileContent: [
|
||||
"=== Log Start ===",
|
||||
"INFO: started",
|
||||
"WARN: slow query",
|
||||
"ERROR: timeout",
|
||||
"INFO: recovered",
|
||||
"=== Log End ===",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Follow these steps exactly:",
|
||||
"Step 1: Call read_file on log.txt to see line anchors.",
|
||||
"Step 2: Note the anchor for 'WARN: slow query' (line 3) and 'ERROR: timeout' (line 4).",
|
||||
"Step 3: Call edit_file with path='log.txt' and edits containing ONE object with THREE separate JSON fields:",
|
||||
" { op: 'replace', pos: '<line3 anchor>', end: '<line4 anchor>', lines: ['RESOLVED: issues cleared'] }",
|
||||
"CRITICAL: pos, end, and lines are THREE SEPARATE JSON fields. pos is ONLY '3#XX'. end is ONLY '4#YY'. lines is ['RESOLVED: issues cleared'].",
|
||||
"If edit_file fails or errors, use write_file to write the complete correct file content instead.",
|
||||
"The correct final content should be: === Log Start ===, INFO: started, RESOLVED: issues cleared, INFO: recovered, === Log End ===",
|
||||
"Do not make any other changes.",
|
||||
].join(" "),
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const hasResolved = lines.some(
|
||||
(l) => l.trim() === "RESOLVED: issues cleared"
|
||||
);
|
||||
const hasWarn = content.includes("WARN: slow query");
|
||||
const hasError = content.includes("ERROR: timeout");
|
||||
if (!hasResolved) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: '"RESOLVED: issues cleared" not found',
|
||||
};
|
||||
}
|
||||
if (hasWarn || hasError) {
|
||||
return { passed: false, reason: "old WARN/ERROR lines still present" };
|
||||
}
|
||||
// Core assertion: 2 old lines removed, 1 new line added = net -1 line
|
||||
// Allow slight overshoot from model adding extra content
|
||||
if (lines.length < 4 || lines.length > 6) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `expected ~5 lines, got ${lines.length}`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
passed: true,
|
||||
reason: "range replace succeeded — 2 lines → 1 line",
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "5. Delete line",
|
||||
fileName: "settings.txt",
|
||||
fileContent: [
|
||||
"mode: production",
|
||||
"debug: true",
|
||||
"cache: enabled",
|
||||
"log_level: info",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Follow these steps exactly:",
|
||||
"Step 1: Call read_file on settings.txt to see line anchors.",
|
||||
"Step 2: Note the anchor for 'debug: true' (line 2).",
|
||||
"Step 3: Call edit_file with path='settings.txt' and edits containing ONE object:",
|
||||
" { op: 'replace', pos: '<line2 anchor>', lines: [] }",
|
||||
"IMPORTANT: lines must be an empty array [] to delete the line. pos must be ONLY the anchor like '2#SR'.",
|
||||
].join(" "),
|
||||
validate: (content) => {
|
||||
const lines = content.trim().split("\n");
|
||||
const hasDebug = content.includes("debug: true");
|
||||
if (hasDebug) {
|
||||
return { passed: false, reason: '"debug: true" still present' };
|
||||
}
|
||||
if (lines.length !== 3) {
|
||||
return {
|
||||
passed: false,
|
||||
reason: `expected 3 lines, got ${lines.length}`,
|
||||
};
|
||||
}
|
||||
if (
|
||||
!(
|
||||
content.includes("mode: production") &&
|
||||
content.includes("cache: enabled")
|
||||
)
|
||||
) {
|
||||
return { passed: false, reason: "other lines were removed" };
|
||||
}
|
||||
return { passed: true, reason: '"debug: true" successfully deleted' };
|
||||
},
|
||||
},
|
||||
|
||||
// ── Creative cases (6-15) ────────────────────────────────────
|
||||
{
|
||||
name: "6. Batch edit — two replacements in one call",
|
||||
fileName: "batch.txt",
|
||||
fileContent: ["red", "green", "blue", "yellow"].join("\n"),
|
||||
prompt: [
|
||||
"Read batch.txt with read_file.",
|
||||
"Then call edit_file ONCE with path='batch.txt' and edits containing TWO objects:",
|
||||
" 1) { op: 'replace', pos: '<line1 anchor>', lines: ['crimson'] }",
|
||||
" 2) { op: 'replace', pos: '<line3 anchor>', lines: ['navy'] }",
|
||||
"Both edits must be in the SAME edits array in a single edit_file call.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("crimson")) return { passed: false, reason: "'crimson' not found" };
|
||||
if (!c.includes("navy")) return { passed: false, reason: "'navy' not found" };
|
||||
if (c.includes("red")) return { passed: false, reason: "'red' still present" };
|
||||
if (c.includes("blue")) return { passed: false, reason: "'blue' still present" };
|
||||
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "both lines replaced in single call" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "7. Line expansion — 1 line → 3 lines",
|
||||
fileName: "expand.txt",
|
||||
fileContent: ["header", "TODO: implement", "footer"].join("\n"),
|
||||
prompt: [
|
||||
"Read expand.txt with read_file.",
|
||||
"Replace the 'TODO: implement' line (line 2) with THREE lines:",
|
||||
" 'step 1: init', 'step 2: process', 'step 3: cleanup'",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, lines=['step 1: init', 'step 2: process', 'step 3: cleanup'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("TODO")) return { passed: false, reason: "TODO line still present" };
|
||||
if (!c.includes("step 1: init")) return { passed: false, reason: "'step 1: init' not found" };
|
||||
if (!c.includes("step 3: cleanup")) return { passed: false, reason: "'step 3: cleanup' not found" };
|
||||
if (lines.length !== 5) return { passed: false, reason: `expected 5 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "1 line expanded to 3 lines" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "8. Append at EOF",
|
||||
fileName: "eof.txt",
|
||||
fileContent: ["line one", "line two"].join("\n"),
|
||||
prompt: [
|
||||
"Read eof.txt with read_file.",
|
||||
"Use edit_file to append 'line three' after the LAST line of the file.",
|
||||
"Use op='append', pos=<last line anchor>, lines=['line three'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("line three")) return { passed: false, reason: "'line three' not found" };
|
||||
if (lines[lines.length - 1].trim() !== "line three")
|
||||
return { passed: false, reason: "'line three' not at end" };
|
||||
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "appended at EOF" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "9. Special characters in content",
|
||||
fileName: "special.json",
|
||||
fileContent: [
|
||||
'{',
|
||||
' "name": "old-value",',
|
||||
' "count": 42',
|
||||
'}',
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Read special.json with read_file.",
|
||||
'Replace the line containing \"name\": \"old-value\" with \"name\": \"new-value\".',
|
||||
"Use edit_file with op='replace', pos=<that line's anchor>, lines=[' \"name\": \"new-value\",'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("old-value")) return { passed: false, reason: "'old-value' still present" };
|
||||
if (!c.includes('"new-value"')) return { passed: false, reason: "'new-value' not found" };
|
||||
if (!c.includes('"count": 42')) return { passed: false, reason: "other content was modified" };
|
||||
return { passed: true, reason: "JSON value replaced with special chars intact" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "10. Replace first line",
|
||||
fileName: "first.txt",
|
||||
fileContent: ["OLD HEADER", "body content", "footer"].join("\n"),
|
||||
prompt: [
|
||||
"Read first.txt with read_file.",
|
||||
"Replace the very first line 'OLD HEADER' with 'NEW HEADER'.",
|
||||
"Use edit_file with op='replace', pos=<line1 anchor>, lines=['NEW HEADER'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("OLD HEADER")) return { passed: false, reason: "'OLD HEADER' still present" };
|
||||
if (lines[0].trim() !== "NEW HEADER") return { passed: false, reason: "first line is not 'NEW HEADER'" };
|
||||
if (!c.includes("body content")) return { passed: false, reason: "body was modified" };
|
||||
return { passed: true, reason: "first line replaced" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "11. Replace last line",
|
||||
fileName: "last.txt",
|
||||
fileContent: ["alpha", "bravo", "OLD_FOOTER"].join("\n"),
|
||||
prompt: [
|
||||
"Read last.txt with read_file.",
|
||||
"Replace the last line 'OLD_FOOTER' with 'NEW_FOOTER'.",
|
||||
"Use edit_file with op='replace', pos=<last line anchor>, lines=['NEW_FOOTER'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("OLD_FOOTER")) return { passed: false, reason: "'OLD_FOOTER' still present" };
|
||||
if (lines[lines.length - 1].trim() !== "NEW_FOOTER")
|
||||
return { passed: false, reason: "last line is not 'NEW_FOOTER'" };
|
||||
return { passed: true, reason: "last line replaced" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "12. Adjacent line edits",
|
||||
fileName: "adjacent.txt",
|
||||
fileContent: ["aaa", "bbb", "ccc", "ddd"].join("\n"),
|
||||
prompt: [
|
||||
"Read adjacent.txt with read_file.",
|
||||
"Replace line 2 ('bbb') with 'BBB' and line 3 ('ccc') with 'CCC'.",
|
||||
"Use edit_file with TWO edits in the same call:",
|
||||
" { op: 'replace', pos: <line2 anchor>, lines: ['BBB'] }",
|
||||
" { op: 'replace', pos: <line3 anchor>, lines: ['CCC'] }",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("bbb")) return { passed: false, reason: "'bbb' still present" };
|
||||
if (c.includes("ccc")) return { passed: false, reason: "'ccc' still present" };
|
||||
if (!c.includes("BBB")) return { passed: false, reason: "'BBB' not found" };
|
||||
if (!c.includes("CCC")) return { passed: false, reason: "'CCC' not found" };
|
||||
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "two adjacent lines replaced" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "13. Prepend multi-line block",
|
||||
fileName: "block.py",
|
||||
fileContent: ["def main():", " print('hello')", "", "main()"].join("\n"),
|
||||
prompt: [
|
||||
"Read block.py with read_file.",
|
||||
"Prepend a 2-line comment block before 'def main():' (line 1).",
|
||||
"The two lines are: '# Author: test' and '# Date: 2025-01-01'.",
|
||||
"Use edit_file with op='prepend', pos=<line1 anchor>, lines=['# Author: test', '# Date: 2025-01-01'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("# Author: test")) return { passed: false, reason: "author comment not found" };
|
||||
if (!c.includes("# Date: 2025-01-01")) return { passed: false, reason: "date comment not found" };
|
||||
const defIdx = lines.findIndex((l) => l.startsWith("def main"));
|
||||
const authorIdx = lines.findIndex((l) => l.includes("Author"));
|
||||
if (authorIdx >= defIdx) return { passed: false, reason: "comments not before def" };
|
||||
return { passed: true, reason: "2-line block prepended before function" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "14. Delete range — 3 consecutive lines",
|
||||
fileName: "cleanup.txt",
|
||||
fileContent: ["keep1", "remove-a", "remove-b", "remove-c", "keep2"].join("\n"),
|
||||
prompt: [
|
||||
"Read cleanup.txt with read_file.",
|
||||
"Delete lines 2-4 ('remove-a', 'remove-b', 'remove-c') using a single range replace.",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, end=<line4 anchor>, lines=[].",
|
||||
"An empty lines array deletes the range.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (c.includes("remove")) return { passed: false, reason: "'remove' lines still present" };
|
||||
if (!c.includes("keep1")) return { passed: false, reason: "'keep1' was deleted" };
|
||||
if (!c.includes("keep2")) return { passed: false, reason: "'keep2' was deleted" };
|
||||
if (lines.length !== 2) return { passed: false, reason: `expected 2 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "3 consecutive lines deleted via range" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "15. Replace with duplicate-content line",
|
||||
fileName: "dupes.txt",
|
||||
fileContent: ["item", "item", "item", "item"].join("\n"),
|
||||
prompt: [
|
||||
"Read dupes.txt with read_file. All 4 lines have the same text 'item'.",
|
||||
"Replace ONLY line 3 with 'CHANGED'. Do NOT modify any other line.",
|
||||
"Use edit_file with op='replace', pos=<line3 anchor>, lines=['CHANGED'].",
|
||||
"The anchor hash uniquely identifies line 3 even though the content is identical.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (!c.includes("CHANGED")) return { passed: false, reason: "'CHANGED' not found" };
|
||||
const changedCount = lines.filter((l) => l.trim() === "CHANGED").length;
|
||||
const itemCount = lines.filter((l) => l.trim() === "item").length;
|
||||
if (changedCount !== 1) return { passed: false, reason: `expected 1 CHANGED, got ${changedCount}` };
|
||||
if (itemCount !== 3) return { passed: false, reason: `expected 3 item lines, got ${itemCount}` };
|
||||
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
|
||||
return { passed: true, reason: "only line 3 changed among duplicates" };
|
||||
},
|
||||
},
|
||||
|
||||
// ── Whitespace cases (16-21) ──────────────────────────────────
|
||||
{
|
||||
name: "16. Fix indentation — 2 spaces → 4 spaces",
|
||||
fileName: "indent.js",
|
||||
fileContent: ["function foo() {", " const x = 1;", " return x;", "}"].join("\n"),
|
||||
prompt: [
|
||||
"Read indent.js with read_file.",
|
||||
"Replace line 2 ' const x = 1;' (2-space indent) with ' const x = 1;' (4-space indent).",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' const x = 1;'].",
|
||||
"The ONLY change is the indentation: 2 spaces → 4 spaces. Content stays the same.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.split("\n");
|
||||
const line2 = lines[1];
|
||||
if (!line2) return { passed: false, reason: "line 2 missing" };
|
||||
if (line2 === " const x = 1;") return { passed: true, reason: "indentation fixed to 4 spaces" };
|
||||
if (line2 === " const x = 1;") return { passed: false, reason: "still 2-space indent" };
|
||||
return { passed: false, reason: `unexpected line 2: '${line2}'` };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "17. Replace preserving leading whitespace",
|
||||
fileName: "preserve.py",
|
||||
fileContent: [
|
||||
"class Foo:",
|
||||
" def old_method(self):",
|
||||
" pass",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Read preserve.py with read_file.",
|
||||
"Replace line 2 ' def old_method(self):' with ' def new_method(self):'.",
|
||||
"Keep the 4-space indentation. Only change the method name.",
|
||||
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' def new_method(self):'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("old_method")) return { passed: false, reason: "'old_method' still present" };
|
||||
const lines = c.split("\n");
|
||||
const methodLine = lines.find((l) => l.includes("new_method"));
|
||||
if (!methodLine) return { passed: false, reason: "'new_method' not found" };
|
||||
if (!methodLine.startsWith(" ")) return { passed: false, reason: "indentation lost" };
|
||||
return { passed: true, reason: "method renamed with indentation preserved" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "18. Insert blank line between sections",
|
||||
fileName: "sections.txt",
|
||||
fileContent: ["[section-a]", "value-a=1", "[section-b]", "value-b=2"].join("\n"),
|
||||
prompt: [
|
||||
"Read sections.txt with read_file.",
|
||||
"Insert a blank empty line between 'value-a=1' (line 2) and '[section-b]' (line 3).",
|
||||
"Use edit_file with op='append', pos=<line2 anchor>, lines=[''].",
|
||||
"lines=[''] inserts one empty line.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.split("\n");
|
||||
const valAIdx = lines.findIndex((l) => l.includes("value-a=1"));
|
||||
const secBIdx = lines.findIndex((l) => l.includes("[section-b]"));
|
||||
if (valAIdx === -1) return { passed: false, reason: "'value-a=1' missing" };
|
||||
if (secBIdx === -1) return { passed: false, reason: "'[section-b]' missing" };
|
||||
if (secBIdx - valAIdx < 2) return { passed: false, reason: "no blank line between sections" };
|
||||
const between = lines[valAIdx + 1];
|
||||
if (between.trim() !== "") return { passed: false, reason: `line between is '${between}', not blank` };
|
||||
return { passed: true, reason: "blank line inserted between sections" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "19. Delete blank line",
|
||||
fileName: "noblank.txt",
|
||||
fileContent: ["first", "", "second", "third"].join("\n"),
|
||||
prompt: [
|
||||
"Read noblank.txt with read_file.",
|
||||
"Delete the empty blank line (line 2). Use edit_file with op='replace', pos=<line2 anchor>, lines=[].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
const lines = c.trim().split("\n");
|
||||
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
|
||||
if (lines[0].trim() !== "first") return { passed: false, reason: "'first' not on line 1" };
|
||||
if (lines[1].trim() !== "second") return { passed: false, reason: "'second' not on line 2" };
|
||||
return { passed: true, reason: "blank line deleted" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "20. Tab → spaces conversion",
|
||||
fileName: "tabs.txt",
|
||||
fileContent: ["start", "\tindented-with-tab", "end"].join("\n"),
|
||||
prompt: [
|
||||
"Read tabs.txt with read_file.",
|
||||
"Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: [' indented-with-spaces'] }].",
|
||||
"Expected final line 2 to be 4 spaces followed by indented-with-spaces.",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("\t")) return { passed: false, reason: "tab still present" };
|
||||
if (!c.includes(" indented-with-spaces"))
|
||||
return { passed: false, reason: "' indented-with-spaces' not found" };
|
||||
if (!c.includes("start")) return { passed: false, reason: "'start' was modified" };
|
||||
return { passed: true, reason: "tab converted to 4 spaces" };
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "21. Deeply nested indent replacement",
|
||||
fileName: "nested.ts",
|
||||
fileContent: [
|
||||
"if (a) {",
|
||||
" if (b) {",
|
||||
" if (c) {",
|
||||
" old_call();",
|
||||
" }",
|
||||
" }",
|
||||
"}",
|
||||
].join("\n"),
|
||||
prompt: [
|
||||
"Read nested.ts with read_file.",
|
||||
"Replace line 4 ' old_call();' with ' new_call();'.",
|
||||
"Preserve the exact 6-space indentation. Only change the function name.",
|
||||
"Use edit_file with op='replace', pos=<line4 anchor>, lines=[' new_call();'].",
|
||||
].join(" "),
|
||||
validate: (c) => {
|
||||
if (c.includes("old_call")) return { passed: false, reason: "'old_call' still present" };
|
||||
const lines = c.split("\n");
|
||||
const callLine = lines.find((l) => l.includes("new_call"));
|
||||
if (!callLine) return { passed: false, reason: "'new_call' not found" };
|
||||
const leadingSpaces = callLine.match(/^ */)?.[0].length ?? 0;
|
||||
if (leadingSpaces !== 6) return { passed: false, reason: `expected 6-space indent, got ${leadingSpaces}` };
|
||||
return { passed: true, reason: "deeply nested line replaced with indent preserved" };
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
// ── JSONL event types ─────────────────────────────────────────
|
||||
interface ToolCallEvent {
|
||||
tool_call_id: string;
|
||||
tool_input: Record<string, unknown>;
|
||||
tool_name: string;
|
||||
type: "tool_call";
|
||||
}
|
||||
|
||||
interface ToolResultEvent {
|
||||
error?: string;
|
||||
output: string;
|
||||
tool_call_id: string;
|
||||
type: "tool_result";
|
||||
}
|
||||
|
||||
interface AnyEvent {
|
||||
type: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ── Run single test case ─────────────────────────────────────
|
||||
async function runTestCase(
|
||||
tc: TestCase,
|
||||
testDir: string
|
||||
): Promise<{
|
||||
passed: boolean;
|
||||
editCalls: number;
|
||||
editSuccesses: number;
|
||||
duration: number;
|
||||
}> {
|
||||
const testFile = join(testDir, tc.fileName);
|
||||
writeFileSync(testFile, tc.fileContent, "utf-8");
|
||||
|
||||
const headlessScript = resolve(import.meta.dir, "headless.ts");
|
||||
const headlessArgs = [
|
||||
"run",
|
||||
headlessScript,
|
||||
"-p",
|
||||
tc.prompt,
|
||||
"--no-translate",
|
||||
...extraArgs,
|
||||
];
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
const output = await new Promise<string>((res, reject) => {
|
||||
const proc = spawn("bun", headlessArgs, {
|
||||
cwd: testDir,
|
||||
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
proc.stdout.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
proc.stderr.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
const timeout = setTimeout(
|
||||
() => {
|
||||
proc.kill("SIGTERM");
|
||||
reject(new Error("Timed out after 4 minutes"));
|
||||
},
|
||||
4 * 60 * 1000
|
||||
);
|
||||
|
||||
proc.on("close", (code) => {
|
||||
clearTimeout(timeout);
|
||||
if (code !== 0) {
|
||||
reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`));
|
||||
} else {
|
||||
res(stdout);
|
||||
}
|
||||
});
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// Parse events
|
||||
const events: AnyEvent[] = [];
|
||||
for (const line of output.split("\n").filter((l) => l.trim())) {
|
||||
try {
|
||||
events.push(JSON.parse(line) as AnyEvent);
|
||||
} catch {
|
||||
// skip non-JSON
|
||||
}
|
||||
}
|
||||
|
||||
const toolCalls = events.filter(
|
||||
(e) => e.type === "tool_call"
|
||||
) as unknown as ToolCallEvent[];
|
||||
const toolResults = events.filter(
|
||||
(e) => e.type === "tool_result"
|
||||
) as unknown as ToolResultEvent[];
|
||||
|
||||
const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file");
|
||||
const editCallIds = new Set(editCalls.map((e) => e.tool_call_id));
|
||||
const editResults = toolResults.filter((e) =>
|
||||
editCallIds.has(e.tool_call_id)
|
||||
);
|
||||
const editSuccesses = editResults.filter((e) => !e.error);
|
||||
|
||||
// Show blocked calls
|
||||
const editErrors = editResults.filter((e) => e.error);
|
||||
for (const err of editErrors) {
|
||||
const matchingCall = editCalls.find(
|
||||
(c) => c.tool_call_id === err.tool_call_id
|
||||
);
|
||||
info(` blocked: ${err.error?.slice(0, 120)}`);
|
||||
if (matchingCall) {
|
||||
info(` input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Validate file content
|
||||
let finalContent: string;
|
||||
try {
|
||||
finalContent = readFileSync(testFile, "utf-8");
|
||||
} catch {
|
||||
return {
|
||||
passed: false,
|
||||
editCalls: editCalls.length,
|
||||
editSuccesses: editSuccesses.length,
|
||||
duration,
|
||||
};
|
||||
}
|
||||
|
||||
const validation = tc.validate(finalContent);
|
||||
|
||||
return {
|
||||
passed: validation.passed,
|
||||
editCalls: editCalls.length,
|
||||
editSuccesses: editSuccesses.length,
|
||||
duration,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────
|
||||
const main = async () => {
|
||||
console.log(`\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n`);
|
||||
|
||||
const testDir = join(tmpdir(), `edit-ops-${Date.now()}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
info(`Test dir: ${testDir}`);
|
||||
console.log();
|
||||
|
||||
let totalPassed = 0;
|
||||
const results: { name: string; passed: boolean; detail: string }[] = [];
|
||||
|
||||
for (const tc of TEST_CASES) {
|
||||
console.log(`${CYAN}${BOLD}${tc.name}${RESET}`);
|
||||
info(`File: ${tc.fileName}`);
|
||||
info(`Prompt: "${tc.prompt.slice(0, 80)}..."`);
|
||||
|
||||
try {
|
||||
const result = await runTestCase(tc, testDir);
|
||||
const status = result.passed
|
||||
? `${GREEN}PASS${RESET}`
|
||||
: `${RED}FAIL${RESET}`;
|
||||
const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`;
|
||||
|
||||
console.log(` ${status} — ${detail}`);
|
||||
|
||||
if (result.passed) {
|
||||
totalPassed++;
|
||||
// Validate the file to show reason
|
||||
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
|
||||
const v = tc.validate(content);
|
||||
pass(v.reason);
|
||||
} else {
|
||||
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
|
||||
const v = tc.validate(content);
|
||||
fail(v.reason);
|
||||
info(
|
||||
`Final content:\n${content
|
||||
.split("\n")
|
||||
.map((l, i) => ` ${i + 1}: ${l}`)
|
||||
.join("\n")}`
|
||||
);
|
||||
}
|
||||
|
||||
results.push({ name: tc.name, passed: result.passed, detail });
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.log(` ${RED}ERROR${RESET} — ${msg.slice(0, 200)}`);
|
||||
fail(msg.slice(0, 200));
|
||||
results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) });
|
||||
}
|
||||
|
||||
// Reset file for next test (in case of side effects)
|
||||
try {
|
||||
rmSync(join(testDir, tc.fileName), { force: true });
|
||||
} catch {}
|
||||
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Summary
|
||||
console.log(`${BOLD}━━━ Summary ━━━${RESET}`);
|
||||
for (const r of results) {
|
||||
const icon = r.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
|
||||
console.log(` ${icon} ${r.name} — ${r.detail}`);
|
||||
}
|
||||
console.log();
|
||||
console.log(
|
||||
`${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}`
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
try {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
} catch {}
|
||||
|
||||
if (totalPassed === TEST_CASES.length) {
|
||||
console.log(
|
||||
`\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n`
|
||||
);
|
||||
process.exit(0);
|
||||
} else {
|
||||
console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
main();
|
||||
280
benchmarks/test-multi-model.ts
Normal file
280
benchmarks/test-multi-model.ts
Normal file
@@ -0,0 +1,280 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Multi-model edit_file test runner
|
||||
*
|
||||
* Runs test-headless-edit-ops.ts against every available model
|
||||
* and produces a summary table.
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/test-multi-model-edit.ts [--timeout <seconds>]
|
||||
*/
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { resolve } from "node:path";
|
||||
|
||||
// ── Models ────────────────────────────────────────────────────
|
||||
const MODELS = [
|
||||
{ id: "MiniMaxAI/MiniMax-M2.5", short: "M2.5" },
|
||||
// { id: "MiniMaxAI/MiniMax-M2.1", short: "M2.1" }, // masked: slow + timeout-prone
|
||||
// { id: "zai-org/GLM-5", short: "GLM-5" }, // masked: API 503
|
||||
{ id: "zai-org/GLM-4.7", short: "GLM-4.7" },
|
||||
];
|
||||
|
||||
// ── CLI args ──────────────────────────────────────────────────
|
||||
let perModelTimeoutSec = 900; // 15 min default per model (5 tests)
|
||||
const rawArgs = process.argv.slice(2);
|
||||
for (let i = 0; i < rawArgs.length; i++) {
|
||||
if (rawArgs[i] === "--timeout" && i + 1 < rawArgs.length) {
|
||||
const parsed = Number.parseInt(rawArgs[i + 1], 10);
|
||||
if (Number.isNaN(parsed) || parsed <= 0) {
|
||||
console.error(`Invalid --timeout value: ${rawArgs[i + 1]}`);
|
||||
process.exit(1);
|
||||
}
|
||||
perModelTimeoutSec = parsed;
|
||||
i++;
|
||||
}
|
||||
|
||||
// ── Colors ────────────────────────────────────────────────────
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const DIM = "\x1b[2m";
|
||||
const CYAN = "\x1b[36m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────
|
||||
interface TestResult {
|
||||
detail: string;
|
||||
name: string;
|
||||
passed: boolean;
|
||||
}
|
||||
|
||||
interface ModelResult {
|
||||
durationMs: number;
|
||||
error?: string;
|
||||
modelId: string;
|
||||
modelShort: string;
|
||||
tests: TestResult[];
|
||||
totalPassed: number;
|
||||
totalTests: number;
|
||||
}
|
||||
|
||||
// ── Parse test-headless-edit-ops stdout ───────────────────────
|
||||
function parseOpsOutput(stdout: string): TestResult[] {
|
||||
const results: TestResult[] = [];
|
||||
|
||||
// Match lines like: " PASS — edit_file: 1/1 succeeded, 32.5s"
|
||||
// or " FAIL — edit_file: 0/3 succeeded, 15.2s"
|
||||
// or " ERROR — Timed out after 10 minutes"
|
||||
// Following a line like: "1. Replace single line"
|
||||
const lines = stdout.split("\n");
|
||||
|
||||
let currentTestName = "";
|
||||
for (const line of lines) {
|
||||
// Detect test name: starts with ANSI-colored bold cyan + "N. Name"
|
||||
// Strip ANSI codes for matching
|
||||
const stripped = line.replace(/\x1b\[[0-9;]*m/g, "");
|
||||
|
||||
// Test name pattern: "N. <name>"
|
||||
const testNameMatch = stripped.match(/^\s*(\d+\.\s+.+)$/);
|
||||
if (
|
||||
testNameMatch &&
|
||||
!stripped.includes("—") &&
|
||||
!stripped.includes("✓") &&
|
||||
!stripped.includes("✗")
|
||||
) {
|
||||
currentTestName = testNameMatch[1].trim();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Result line: PASS/FAIL/ERROR
|
||||
if (currentTestName && stripped.includes("PASS")) {
|
||||
const detail = stripped.replace(/^\s*PASS\s*—?\s*/, "").trim();
|
||||
results.push({
|
||||
name: currentTestName,
|
||||
passed: true,
|
||||
detail: detail || "passed",
|
||||
});
|
||||
currentTestName = "";
|
||||
} else if (currentTestName && stripped.includes("FAIL")) {
|
||||
const detail = stripped.replace(/^\s*FAIL\s*—?\s*/, "").trim();
|
||||
results.push({
|
||||
name: currentTestName,
|
||||
passed: false,
|
||||
detail: detail || "failed",
|
||||
});
|
||||
currentTestName = "";
|
||||
} else if (currentTestName && stripped.includes("ERROR")) {
|
||||
const detail = stripped.replace(/^\s*ERROR\s*—?\s*/, "").trim();
|
||||
results.push({
|
||||
name: currentTestName,
|
||||
passed: false,
|
||||
detail: detail || "error",
|
||||
});
|
||||
currentTestName = "";
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ── Run one model ────────────────────────────────────────────
|
||||
async function runModel(model: {
|
||||
id: string;
|
||||
short: string;
|
||||
}): Promise<ModelResult> {
|
||||
const opsScript = resolve(import.meta.dir, "test-edit-ops.ts");
|
||||
const startTime = Date.now();
|
||||
|
||||
return new Promise<ModelResult>((resolvePromise) => {
|
||||
const proc = spawn(
|
||||
"bun",
|
||||
["run", opsScript, "-m", model.id, "--no-translate"],
|
||||
{
|
||||
cwd: resolve(import.meta.dir),
|
||||
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
}
|
||||
);
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
proc.stdout.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
proc.stderr.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
proc.kill("SIGTERM");
|
||||
resolvePromise({
|
||||
modelId: model.id,
|
||||
modelShort: model.short,
|
||||
tests: [],
|
||||
totalPassed: 0,
|
||||
totalTests: 0,
|
||||
durationMs: Date.now() - startTime,
|
||||
error: `Timed out after ${perModelTimeoutSec}s`,
|
||||
});
|
||||
}, perModelTimeoutSec * 1000);
|
||||
|
||||
proc.on("close", () => {
|
||||
clearTimeout(timeout);
|
||||
const tests = parseOpsOutput(stdout);
|
||||
const totalPassed = tests.filter((t) => t.passed).length;
|
||||
|
||||
resolvePromise({
|
||||
modelId: model.id,
|
||||
modelShort: model.short,
|
||||
tests,
|
||||
totalPassed,
|
||||
totalTests: Math.max(tests.length, 5),
|
||||
durationMs: Date.now() - startTime,
|
||||
});
|
||||
});
|
||||
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
resolvePromise({
|
||||
modelId: model.id,
|
||||
modelShort: model.short,
|
||||
tests: [],
|
||||
totalPassed: 0,
|
||||
totalTests: 0,
|
||||
durationMs: Date.now() - startTime,
|
||||
error: err.message,
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────
|
||||
const main = async () => {
|
||||
console.log(`\n${BOLD}═══ Multi-Model edit_file Test Runner ═══${RESET}\n`);
|
||||
console.log(`${DIM}Models: ${MODELS.map((m) => m.short).join(", ")}${RESET}`);
|
||||
console.log(`${DIM}Timeout: ${perModelTimeoutSec}s per model${RESET}`);
|
||||
console.log();
|
||||
|
||||
const allResults: ModelResult[] = [];
|
||||
|
||||
for (const model of MODELS) {
|
||||
console.log(`${CYAN}${BOLD}▶ Testing ${model.short} (${model.id})${RESET}`);
|
||||
const result = await runModel(model);
|
||||
allResults.push(result);
|
||||
|
||||
const timeStr = `${(result.durationMs / 1000).toFixed(1)}s`;
|
||||
if (result.error) {
|
||||
console.log(` ${RED}ERROR${RESET}: ${result.error} (${timeStr})`);
|
||||
} else {
|
||||
const color =
|
||||
result.totalPassed === result.totalTests
|
||||
? GREEN
|
||||
: result.totalPassed > 0
|
||||
? YELLOW
|
||||
: RED;
|
||||
console.log(
|
||||
` ${color}${result.totalPassed}/${result.totalTests} passed${RESET} (${timeStr})`
|
||||
);
|
||||
for (const t of result.tests) {
|
||||
const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
|
||||
console.log(` ${icon} ${t.name}`);
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
// ── Summary Table ──────────────────────────────────────────
|
||||
console.log(`${BOLD}═══ Summary ═══${RESET}\n`);
|
||||
|
||||
// Per-model results
|
||||
for (const r of allResults) {
|
||||
const timeStr = `${(r.durationMs / 1000).toFixed(0)}s`;
|
||||
const color = r.error ? RED : r.totalPassed === r.totalTests ? GREEN : r.totalPassed > 0 ? YELLOW : RED;
|
||||
const label = r.error ? `ERROR: ${r.error}` : `${r.totalPassed}/${r.totalTests}`;
|
||||
console.log(` ${r.modelShort.padEnd(8)} ${color}${label}${RESET} (${timeStr})`);
|
||||
for (const t of r.tests) {
|
||||
const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
|
||||
console.log(` ${icon} ${t.name}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log();
|
||||
|
||||
// Overall
|
||||
const totalModels = allResults.length;
|
||||
const erroredModels = allResults.filter((r) => r.error).length;
|
||||
const perfectModels = allResults.filter(
|
||||
(r) => !r.error && r.totalPassed === r.totalTests && r.totalTests > 0
|
||||
).length;
|
||||
console.log(
|
||||
`${BOLD}Models with 100%: ${perfectModels}/${totalModels}${RESET}`
|
||||
);
|
||||
|
||||
const overallPassed = allResults.reduce((sum, r) => sum + r.totalPassed, 0);
|
||||
const overallTotal = allResults.reduce((sum, r) => sum + r.totalTests, 0);
|
||||
console.log(
|
||||
`${BOLD}Overall: ${overallPassed}/${overallTotal} (${Math.round((overallPassed / overallTotal) * 100)}%)${RESET}`
|
||||
);
|
||||
|
||||
console.log();
|
||||
|
||||
if (erroredModels > 0) {
|
||||
console.log(
|
||||
`${BOLD}${RED}${erroredModels} model(s) errored. See details above.${RESET}\n`
|
||||
);
|
||||
process.exit(1);
|
||||
} else if (perfectModels === totalModels) {
|
||||
console.log(`${BOLD}${GREEN}🎉 ALL MODELS PASSED ALL TESTS!${RESET}\n`);
|
||||
process.exit(0);
|
||||
} else {
|
||||
console.log(
|
||||
`${BOLD}${YELLOW}Some models have failures. See details above.${RESET}\n`
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
main();
|
||||
@@ -3,8 +3,9 @@
|
||||
// Wrapper script that detects platform and spawns the correct binary
|
||||
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { createRequire } from "node:module";
|
||||
import { getPlatformPackage, getBinaryPath } from "./platform.js";
|
||||
import { getPlatformPackageCandidates, getBinaryPath } from "./platform.js";
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
|
||||
@@ -26,55 +27,116 @@ function getLibcFamily() {
|
||||
}
|
||||
}
|
||||
|
||||
function supportsAvx2() {
|
||||
if (process.arch !== "x64") {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (process.env.OH_MY_OPENCODE_FORCE_BASELINE === "1") {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (process.platform === "linux") {
|
||||
try {
|
||||
const cpuInfo = readFileSync("/proc/cpuinfo", "utf8").toLowerCase();
|
||||
return cpuInfo.includes("avx2");
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (process.platform === "darwin") {
|
||||
const probe = spawnSync("sysctl", ["-n", "machdep.cpu.leaf7_features"], {
|
||||
encoding: "utf8",
|
||||
});
|
||||
|
||||
if (probe.error || probe.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return probe.stdout.toUpperCase().includes("AVX2");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getSignalExitCode(signal) {
|
||||
const signalCodeByName = {
|
||||
SIGINT: 2,
|
||||
SIGILL: 4,
|
||||
SIGKILL: 9,
|
||||
SIGTERM: 15,
|
||||
};
|
||||
|
||||
return 128 + (signalCodeByName[signal] ?? 1);
|
||||
}
|
||||
|
||||
function main() {
|
||||
const { platform, arch } = process;
|
||||
const libcFamily = getLibcFamily();
|
||||
const avx2Supported = supportsAvx2();
|
||||
|
||||
// Get platform package name
|
||||
let pkg;
|
||||
let packageCandidates;
|
||||
try {
|
||||
pkg = getPlatformPackage({ platform, arch, libcFamily });
|
||||
packageCandidates = getPlatformPackageCandidates({
|
||||
platform,
|
||||
arch,
|
||||
libcFamily,
|
||||
preferBaseline: avx2Supported === false,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(`\noh-my-opencode: ${error.message}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Resolve binary path
|
||||
const binRelPath = getBinaryPath(pkg, platform);
|
||||
|
||||
let binPath;
|
||||
try {
|
||||
binPath = require.resolve(binRelPath);
|
||||
} catch {
|
||||
|
||||
const resolvedBinaries = packageCandidates
|
||||
.map((pkg) => {
|
||||
try {
|
||||
return { pkg, binPath: require.resolve(getBinaryPath(pkg, platform)) };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter((entry) => entry !== null);
|
||||
|
||||
if (resolvedBinaries.length === 0) {
|
||||
console.error(`\noh-my-opencode: Platform binary not installed.`);
|
||||
console.error(`\nYour platform: ${platform}-${arch}${libcFamily === "musl" ? "-musl" : ""}`);
|
||||
console.error(`Expected package: ${pkg}`);
|
||||
console.error(`Expected packages (in order): ${packageCandidates.join(", ")}`);
|
||||
console.error(`\nTo fix, run:`);
|
||||
console.error(` npm install ${pkg}\n`);
|
||||
console.error(` npm install ${packageCandidates[0]}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Spawn the binary
|
||||
const result = spawnSync(binPath, process.argv.slice(2), {
|
||||
stdio: "inherit",
|
||||
});
|
||||
|
||||
// Handle spawn errors
|
||||
if (result.error) {
|
||||
console.error(`\noh-my-opencode: Failed to execute binary.`);
|
||||
console.error(`Error: ${result.error.message}\n`);
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
// Handle signals
|
||||
if (result.signal) {
|
||||
const signalNum = result.signal === "SIGTERM" ? 15 :
|
||||
result.signal === "SIGKILL" ? 9 :
|
||||
result.signal === "SIGINT" ? 2 : 1;
|
||||
process.exit(128 + signalNum);
|
||||
|
||||
for (let index = 0; index < resolvedBinaries.length; index += 1) {
|
||||
const currentBinary = resolvedBinaries[index];
|
||||
const hasFallback = index < resolvedBinaries.length - 1;
|
||||
const result = spawnSync(currentBinary.binPath, process.argv.slice(2), {
|
||||
stdio: "inherit",
|
||||
});
|
||||
|
||||
if (result.error) {
|
||||
if (hasFallback) {
|
||||
continue;
|
||||
}
|
||||
|
||||
console.error(`\noh-my-opencode: Failed to execute binary.`);
|
||||
console.error(`Error: ${result.error.message}\n`);
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
if (result.signal === "SIGILL" && hasFallback) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result.signal) {
|
||||
process.exit(getSignalExitCode(result.signal));
|
||||
}
|
||||
|
||||
process.exit(result.status ?? 1);
|
||||
}
|
||||
|
||||
process.exit(result.status ?? 1);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
main();
|
||||
|
||||
14
bin/platform.d.ts
vendored
Normal file
14
bin/platform.d.ts
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
export declare function getPlatformPackage(options: {
|
||||
platform: string;
|
||||
arch: string;
|
||||
libcFamily?: string | null;
|
||||
}): string;
|
||||
|
||||
export declare function getPlatformPackageCandidates(options: {
|
||||
platform: string;
|
||||
arch: string;
|
||||
libcFamily?: string | null;
|
||||
preferBaseline?: boolean;
|
||||
}): string[];
|
||||
|
||||
export declare function getBinaryPath(pkg: string, platform: string): string;
|
||||
@@ -26,6 +26,50 @@ export function getPlatformPackage({ platform, arch, libcFamily }) {
|
||||
return `oh-my-opencode-${os}-${arch}${suffix}`;
|
||||
}
|
||||
|
||||
/** @param {{ platform: string, arch: string, libcFamily?: string | null, preferBaseline?: boolean }} options */
|
||||
export function getPlatformPackageCandidates({ platform, arch, libcFamily, preferBaseline = false }) {
|
||||
const primaryPackage = getPlatformPackage({ platform, arch, libcFamily });
|
||||
const baselinePackage = getBaselinePlatformPackage({ platform, arch, libcFamily });
|
||||
|
||||
if (!baselinePackage) {
|
||||
return [primaryPackage];
|
||||
}
|
||||
|
||||
return preferBaseline ? [baselinePackage, primaryPackage] : [primaryPackage, baselinePackage];
|
||||
}
|
||||
|
||||
/** @param {{ platform: string, arch: string, libcFamily?: string | null }} options */
|
||||
function getBaselinePlatformPackage({ platform, arch, libcFamily }) {
|
||||
if (arch !== "x64") {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (platform === "darwin") {
|
||||
return "oh-my-opencode-darwin-x64-baseline";
|
||||
}
|
||||
|
||||
if (platform === "win32") {
|
||||
return "oh-my-opencode-windows-x64-baseline";
|
||||
}
|
||||
|
||||
if (platform === "linux") {
|
||||
if (libcFamily === null || libcFamily === undefined) {
|
||||
throw new Error(
|
||||
"Could not detect libc on Linux. " +
|
||||
"Please ensure detect-libc is installed or report this issue."
|
||||
);
|
||||
}
|
||||
|
||||
if (libcFamily === "musl") {
|
||||
return "oh-my-opencode-linux-x64-musl-baseline";
|
||||
}
|
||||
|
||||
return "oh-my-opencode-linux-x64-baseline";
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to the binary within a platform package
|
||||
* @param {string} pkg Package name
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// bin/platform.test.ts
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { getPlatformPackage, getBinaryPath } from "./platform.js";
|
||||
import { getBinaryPath, getPlatformPackage, getPlatformPackageCandidates } from "./platform.js";
|
||||
|
||||
describe("getPlatformPackage", () => {
|
||||
// #region Darwin platforms
|
||||
@@ -146,3 +146,58 @@ describe("getBinaryPath", () => {
|
||||
expect(result).toBe("oh-my-opencode-linux-x64/bin/oh-my-opencode");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getPlatformPackageCandidates", () => {
|
||||
test("returns x64 and baseline candidates for Linux glibc", () => {
|
||||
// #given Linux x64 with glibc
|
||||
const input = { platform: "linux", arch: "x64", libcFamily: "glibc" };
|
||||
|
||||
// #when getting package candidates
|
||||
const result = getPlatformPackageCandidates(input);
|
||||
|
||||
// #then returns modern first then baseline fallback
|
||||
expect(result).toEqual([
|
||||
"oh-my-opencode-linux-x64",
|
||||
"oh-my-opencode-linux-x64-baseline",
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns x64 musl and baseline candidates for Linux musl", () => {
|
||||
// #given Linux x64 with musl
|
||||
const input = { platform: "linux", arch: "x64", libcFamily: "musl" };
|
||||
|
||||
// #when getting package candidates
|
||||
const result = getPlatformPackageCandidates(input);
|
||||
|
||||
// #then returns musl modern first then musl baseline fallback
|
||||
expect(result).toEqual([
|
||||
"oh-my-opencode-linux-x64-musl",
|
||||
"oh-my-opencode-linux-x64-musl-baseline",
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns baseline first when preferBaseline is true", () => {
|
||||
// #given Windows x64 and baseline preference
|
||||
const input = { platform: "win32", arch: "x64", preferBaseline: true };
|
||||
|
||||
// #when getting package candidates
|
||||
const result = getPlatformPackageCandidates(input);
|
||||
|
||||
// #then baseline package is preferred first
|
||||
expect(result).toEqual([
|
||||
"oh-my-opencode-windows-x64-baseline",
|
||||
"oh-my-opencode-windows-x64",
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns only one candidate for ARM64", () => {
|
||||
// #given non-x64 platform
|
||||
const input = { platform: "linux", arch: "arm64", libcFamily: "glibc" };
|
||||
|
||||
// #when getting package candidates
|
||||
const result = getPlatformPackageCandidates(input);
|
||||
|
||||
// #then baseline fallback is not included
|
||||
expect(result).toEqual(["oh-my-opencode-linux-arm64"]);
|
||||
});
|
||||
});
|
||||
|
||||
31
bun.lock
31
bun.lock
@@ -14,6 +14,7 @@
|
||||
"@opencode-ai/sdk": "^1.1.19",
|
||||
"commander": "^14.0.2",
|
||||
"detect-libc": "^2.0.0",
|
||||
"diff": "^8.0.3",
|
||||
"js-yaml": "^4.1.1",
|
||||
"jsonc-parser": "^3.3.1",
|
||||
"picocolors": "^1.1.1",
|
||||
@@ -28,13 +29,13 @@
|
||||
"typescript": "^5.7.3",
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.8.1",
|
||||
"oh-my-opencode-darwin-x64": "3.8.1",
|
||||
"oh-my-opencode-linux-arm64": "3.8.1",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.8.1",
|
||||
"oh-my-opencode-linux-x64": "3.8.1",
|
||||
"oh-my-opencode-linux-x64-musl": "3.8.1",
|
||||
"oh-my-opencode-windows-x64": "3.8.1",
|
||||
"oh-my-opencode-darwin-arm64": "3.8.5",
|
||||
"oh-my-opencode-darwin-x64": "3.8.5",
|
||||
"oh-my-opencode-linux-arm64": "3.8.5",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.8.5",
|
||||
"oh-my-opencode-linux-x64": "3.8.5",
|
||||
"oh-my-opencode-linux-x64-musl": "3.8.5",
|
||||
"oh-my-opencode-windows-x64": "3.8.5",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -138,6 +139,8 @@
|
||||
|
||||
"detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="],
|
||||
|
||||
"diff": ["diff@8.0.3", "", {}, "sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ=="],
|
||||
|
||||
"dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="],
|
||||
|
||||
"ee-first": ["ee-first@1.1.1", "", {}, "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="],
|
||||
@@ -228,19 +231,19 @@
|
||||
|
||||
"object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],
|
||||
|
||||
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.8.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-vbtS0WUFOZpufKzlX2G83fIDry3rpiXej8zNuXNCkx7hF34rK04rj0zeBH9dL+kdNV0Ys0Wl1rR1Mjto28UcAw=="],
|
||||
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.8.5", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-bbLu1We9NNhYAVp9Q/FK8dYFlYLp2PKfvdBCr+O6QjNRixdjp8Ru4RK7i9mKg0ybYBUzzCcbbC2Cc1o8orkhBA=="],
|
||||
|
||||
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.8.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-gLz6dLNg9hr7roqBjaqlxta6+XYCs032/FiE0CiwypIBtYOq5EAgDVJ95JY5DQ2M+3Un028d50yMfwsfNfGlSw=="],
|
||||
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.8.5", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-N9GcmzYgL87UybSaMGiHc5lwT5Mxg1tyB502el5syouN39wfeUYoj37SonENrMUTiEfn75Lwv/5cSLCesSubpA=="],
|
||||
|
||||
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.8.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-teAIuHlR5xOAoUmA+e0bGzy3ikgIr+nCdyOPwHYm8jIp0aBUWAqbcdoQLeNTgenWpoM8vhHk+2xh4WcCeQzjEA=="],
|
||||
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.8.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ki4a7s1DD5z5wEKmzcchqAKOIpw0LsBvyF8ieqNLS5Xl8PWE0gAZ7rqjlXC54NTubpexVH6lO2yenFJsk2Zk9A=="],
|
||||
|
||||
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.8.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-VzBEq1H5dllEloouIoLdbw1icNUW99qmvErFrNj66mX42DNXK+f1zTtvBG8U6eeFfUBRRJoUjdCsvO65f8BkFA=="],
|
||||
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.8.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-9+6hU3z503fBzuV0VjxIkTKFElbKacHijFcdKAussG6gPFLWmCRWtdowzEDwUfAoIsoHHH7FBwvh5waGp/ZksA=="],
|
||||
|
||||
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.8.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-8hDcb8s+wdQpQObSmiyaaTV0P/js2Bs9Lu+HmzrkKjuMLXXj/Gk7K0kKWMoEnMbMGfj86GfBHHIWmu9juI/SjA=="],
|
||||
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.8.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-DmnMK/PgvdcCYL+OQE5iZWgi/vmjm0sIPQVQgSUbWn3izcUF7C5DtlxqaU2cKxNZwrhDTlJdLWxmJqgLmLqd9A=="],
|
||||
|
||||
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.8.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-idyH5bdYn7wrLkIkYr83omN83E2BjA/9DUHCX2we8VXbhDVbBgmMpUg8B8nKnd5NK/SyLHgRs5QqQJw8XBC0cQ=="],
|
||||
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.8.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-jhCNStljsyapVq9X7PaHSOcWxxEA4BUcIibvoPs/xc7fVP8D47p651LzIRsM6STn6Bx684mlYbxxX1P/0QPKNg=="],
|
||||
|
||||
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.8.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-O30L1PUF9aq1vSOyadcXQOLnDFSTvYn6cGd5huh0LAK/us0hGezoahtXegMdFtDXPIIREJlkRQhyJiafza7YgA=="],
|
||||
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.8.5", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-lcPBp9NCNQ6TnqzsN9p/K+xKwOzBoIPw7HncxmrXSberZ3uHy0K9uNraQ7fqnXIKWqQiK4kSwWfSHpmhbaHiNg=="],
|
||||
|
||||
"on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],
|
||||
|
||||
|
||||
21
package.json
21
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -60,6 +60,7 @@
|
||||
"@opencode-ai/sdk": "^1.1.19",
|
||||
"commander": "^14.0.2",
|
||||
"detect-libc": "^2.0.0",
|
||||
"diff": "^8.0.3",
|
||||
"js-yaml": "^4.1.1",
|
||||
"jsonc-parser": "^3.3.1",
|
||||
"picocolors": "^1.1.1",
|
||||
@@ -74,13 +75,17 @@
|
||||
"typescript": "^5.7.3"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.8.4",
|
||||
"oh-my-opencode-darwin-x64": "3.8.4",
|
||||
"oh-my-opencode-linux-arm64": "3.8.4",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.8.4",
|
||||
"oh-my-opencode-linux-x64": "3.8.4",
|
||||
"oh-my-opencode-linux-x64-musl": "3.8.4",
|
||||
"oh-my-opencode-windows-x64": "3.8.4"
|
||||
"oh-my-opencode-darwin-arm64": "3.9.0",
|
||||
"oh-my-opencode-darwin-x64": "3.9.0",
|
||||
"oh-my-opencode-darwin-x64-baseline": "3.9.0",
|
||||
"oh-my-opencode-linux-arm64": "3.9.0",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.9.0",
|
||||
"oh-my-opencode-linux-x64": "3.9.0",
|
||||
"oh-my-opencode-linux-x64-baseline": "3.9.0",
|
||||
"oh-my-opencode-linux-x64-musl": "3.9.0",
|
||||
"oh-my-opencode-linux-x64-musl-baseline": "3.9.0",
|
||||
"oh-my-opencode-windows-x64": "3.9.0",
|
||||
"oh-my-opencode-windows-x64-baseline": "3.9.0"
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"@ast-grep/cli",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-arm64",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-x64-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-x64-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-x64",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64-musl",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-musl-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-musl",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-windows-x64-baseline",
|
||||
"version": "3.1.1",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (windows-x64-baseline, no AVX2)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-windows-x64",
|
||||
"version": "3.8.4",
|
||||
"version": "3.9.0",
|
||||
"description": "Platform-specific binary for oh-my-opencode (windows-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// Runs after npm install to verify platform binary is available
|
||||
|
||||
import { createRequire } from "node:module";
|
||||
import { getPlatformPackage, getBinaryPath } from "./bin/platform.js";
|
||||
import { getPlatformPackageCandidates, getBinaryPath } from "./bin/platform.js";
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
|
||||
@@ -27,12 +27,28 @@ function main() {
|
||||
const libcFamily = getLibcFamily();
|
||||
|
||||
try {
|
||||
const pkg = getPlatformPackage({ platform, arch, libcFamily });
|
||||
const binPath = getBinaryPath(pkg, platform);
|
||||
|
||||
// Try to resolve the binary
|
||||
require.resolve(binPath);
|
||||
console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch}`);
|
||||
const packageCandidates = getPlatformPackageCandidates({
|
||||
platform,
|
||||
arch,
|
||||
libcFamily,
|
||||
});
|
||||
|
||||
const resolvedPackage = packageCandidates.find((pkg) => {
|
||||
try {
|
||||
require.resolve(getBinaryPath(pkg, platform));
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
if (!resolvedPackage) {
|
||||
throw new Error(
|
||||
`No platform binary package installed. Tried: ${packageCandidates.join(", ")}`
|
||||
);
|
||||
}
|
||||
|
||||
console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch} (${resolvedPackage})`);
|
||||
} catch (error) {
|
||||
console.warn(`⚠ oh-my-opencode: ${error.message}`);
|
||||
console.warn(` The CLI may not work on this platform.`);
|
||||
|
||||
@@ -1703,6 +1703,70 @@
|
||||
"created_at": "2026-02-23T19:27:59Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2080
|
||||
},
|
||||
{
|
||||
"name": "PHP-Expert",
|
||||
"id": 12047666,
|
||||
"comment_id": 3951828700,
|
||||
"created_at": "2026-02-24T13:27:18Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2098
|
||||
},
|
||||
{
|
||||
"name": "Pantoria",
|
||||
"id": 37699442,
|
||||
"comment_id": 3953543578,
|
||||
"created_at": "2026-02-24T17:12:31Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1983
|
||||
},
|
||||
{
|
||||
"name": "east-shine",
|
||||
"id": 20237288,
|
||||
"comment_id": 3957576758,
|
||||
"created_at": "2026-02-25T08:19:34Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2113
|
||||
},
|
||||
{
|
||||
"name": "SupenBysz",
|
||||
"id": 3314033,
|
||||
"comment_id": 3962352704,
|
||||
"created_at": "2026-02-25T22:00:54Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2119
|
||||
},
|
||||
{
|
||||
"name": "zhzy0077",
|
||||
"id": 8717471,
|
||||
"comment_id": 3964015975,
|
||||
"created_at": "2026-02-26T04:45:23Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2125
|
||||
},
|
||||
{
|
||||
"name": "spacecowboy0416",
|
||||
"id": 239068998,
|
||||
"comment_id": 3964320737,
|
||||
"created_at": "2026-02-26T06:05:27Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2126
|
||||
},
|
||||
{
|
||||
"name": "imwxc",
|
||||
"id": 49653609,
|
||||
"comment_id": 3965127447,
|
||||
"created_at": "2026-02-26T09:00:16Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2129
|
||||
},
|
||||
{
|
||||
"name": "maou-shonen",
|
||||
"id": 22576780,
|
||||
"comment_id": 3965445132,
|
||||
"created_at": "2026-02-26T09:50:46Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2131
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/ — Plugin Source
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
@@ -14,7 +14,7 @@ Root source directory. Entry point `index.ts` orchestrates 4-step initialization
|
||||
| `plugin-config.ts` | JSONC parse, multi-level merge (user → project → defaults), Zod validation |
|
||||
| `create-managers.ts` | TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler |
|
||||
| `create-tools.ts` | SkillContext + AvailableCategories + ToolRegistry |
|
||||
| `create-hooks.ts` | 3-tier hook composition: Core(35) + Continuation(7) + Skill(2) |
|
||||
| `create-hooks.ts` | 3-tier hook composition: Core(37) + Continuation(7) + Skill(2) |
|
||||
| `plugin-interface.ts` | Assembles 8 OpenCode hook handlers into PluginInterface |
|
||||
|
||||
## CONFIG LOADING
|
||||
@@ -32,9 +32,9 @@ loadPluginConfig(directory, ctx)
|
||||
|
||||
```
|
||||
createHooks()
|
||||
├─→ createCoreHooks() # 35 hooks
|
||||
│ ├─ createSessionHooks() # 21: contextWindowMonitor, thinkMode, ralphLoop, sessionRecovery, jsonErrorRecovery, sisyphusGptHephaestusReminder, anthropicEffort...
|
||||
│ ├─ createToolGuardHooks() # 10: commentChecker, rulesInjector, writeExistingFileGuard, hashlineEditDiffEnhancer...
|
||||
├─→ createCoreHooks() # 37 hooks
|
||||
│ ├─ createSessionHooks() # 23: contextWindowMonitor, thinkMode, ralphLoop, modelFallback, runtimeFallback, noSisyphusGpt, noHephaestusNonGpt, anthropicEffort...
|
||||
│ ├─ createToolGuardHooks() # 10: commentChecker, rulesInjector, writeExistingFileGuard, jsonErrorRecovery, hashlineReadEnhancer...
|
||||
│ └─ createTransformHooks() # 4: claudeCodeHooks, keywordDetector, contextInjector, thinkingBlockValidator
|
||||
├─→ createContinuationHooks() # 7: todoContinuationEnforcer, atlas, stopContinuationGuard...
|
||||
└─→ createSkillHooks() # 2: categorySkillReminder, autoSlashCommand
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/agents/ — 11 Agent Definitions
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ import type { AvailableAgent, AvailableSkill, AvailableCategory } from "../dynam
|
||||
import { buildCategorySkillsDelegationGuide } from "../dynamic-agent-prompt-builder"
|
||||
import type { CategoryConfig } from "../../config/schema"
|
||||
import { mergeCategories } from "../../shared/merge-categories"
|
||||
import { createAgentToolRestrictions } from "../../shared/permission-compat"
|
||||
|
||||
import { getDefaultAtlasPrompt } from "./default"
|
||||
import { getGptAtlasPrompt } from "./gpt"
|
||||
@@ -30,7 +29,7 @@ import {
|
||||
buildDecisionMatrix,
|
||||
} from "./prompt-section-builder"
|
||||
|
||||
const MODE: AgentMode = "primary"
|
||||
const MODE: AgentMode = "all"
|
||||
|
||||
export type AtlasPromptSource = "default" | "gpt" | "gemini"
|
||||
|
||||
@@ -100,11 +99,6 @@ function buildDynamicOrchestratorPrompt(ctx?: OrchestratorContext): string {
|
||||
}
|
||||
|
||||
export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
|
||||
const restrictions = createAgentToolRestrictions([
|
||||
"task",
|
||||
"call_omo_agent",
|
||||
])
|
||||
|
||||
const baseConfig = {
|
||||
description:
|
||||
"Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
|
||||
@@ -113,7 +107,6 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
|
||||
temperature: 0.1,
|
||||
prompt: buildDynamicOrchestratorPrompt(ctx),
|
||||
color: "#10B981",
|
||||
...restrictions,
|
||||
}
|
||||
|
||||
return baseConfig as AgentConfig
|
||||
|
||||
41
src/agents/env-context.test.ts
Normal file
41
src/agents/env-context.test.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { describe, test, expect } from "bun:test"
|
||||
import { createEnvContext } from "./env-context"
|
||||
|
||||
describe("createEnvContext", () => {
|
||||
test("returns omo-env block with timezone and locale", () => {
|
||||
// #given - no setup needed
|
||||
|
||||
// #when
|
||||
const result = createEnvContext()
|
||||
|
||||
// #then
|
||||
expect(result).toContain("<omo-env>")
|
||||
expect(result).toContain("</omo-env>")
|
||||
expect(result).toContain("Timezone:")
|
||||
expect(result).toContain("Locale:")
|
||||
expect(result).not.toContain("Current date:")
|
||||
})
|
||||
|
||||
test("does not include time with seconds precision to preserve token cache", () => {
|
||||
// #given - seconds-precision time changes every second, breaking cache on every request
|
||||
|
||||
// #when
|
||||
const result = createEnvContext()
|
||||
|
||||
// #then - no HH:MM:SS pattern anywhere in the output
|
||||
expect(result).not.toMatch(/\d{1,2}:\d{2}:\d{2}/)
|
||||
})
|
||||
|
||||
test("does not include date or time fields since OpenCode already provides them", () => {
|
||||
// #given - OpenCode's system.ts already injects date, platform, working directory
|
||||
|
||||
// #when
|
||||
const result = createEnvContext()
|
||||
|
||||
// #then - only timezone and locale remain; both are stable across requests
|
||||
expect(result).not.toContain("Current date:")
|
||||
expect(result).not.toContain("Current time:")
|
||||
})
|
||||
})
|
||||
@@ -1,32 +1,15 @@
|
||||
/**
|
||||
* Creates OmO-specific environment context (time, timezone, locale).
|
||||
* Creates OmO-specific environment context (timezone, locale).
|
||||
* Note: Working directory, platform, and date are already provided by OpenCode's system.ts,
|
||||
* so we only include fields that OpenCode doesn't provide to avoid duplication.
|
||||
* See: https://github.com/code-yeongyu/oh-my-opencode/issues/379
|
||||
*/
|
||||
export function createEnvContext(): string {
|
||||
const now = new Date()
|
||||
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone
|
||||
const locale = Intl.DateTimeFormat().resolvedOptions().locale
|
||||
|
||||
const dateStr = now.toLocaleDateString(locale, {
|
||||
weekday: "short",
|
||||
year: "numeric",
|
||||
month: "short",
|
||||
day: "numeric",
|
||||
})
|
||||
|
||||
const timeStr = now.toLocaleTimeString(locale, {
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
second: "2-digit",
|
||||
hour12: true,
|
||||
})
|
||||
|
||||
return `
|
||||
<omo-env>
|
||||
Current date: ${dateStr}
|
||||
Current time: ${timeStr}
|
||||
Timezone: ${timezone}
|
||||
Locale: ${locale}
|
||||
</omo-env>`
|
||||
|
||||
@@ -19,7 +19,7 @@ import {
|
||||
categorizeTools,
|
||||
} from "./dynamic-agent-prompt-builder";
|
||||
|
||||
const MODE: AgentMode = "primary";
|
||||
const MODE: AgentMode = "all";
|
||||
|
||||
function buildTodoDisciplineSection(useTaskSystem: boolean): string {
|
||||
if (useTaskSystem) {
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
buildGeminiIntentGateEnforcement,
|
||||
} from "./sisyphus-gemini-overlays";
|
||||
|
||||
const MODE: AgentMode = "primary";
|
||||
const MODE: AgentMode = "all";
|
||||
export const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata = {
|
||||
category: "utility",
|
||||
cost: "EXPENSIVE",
|
||||
|
||||
@@ -4,6 +4,7 @@ import { createLibrarianAgent } from "./librarian"
|
||||
import { createExploreAgent } from "./explore"
|
||||
import { createMomusAgent } from "./momus"
|
||||
import { createMetisAgent } from "./metis"
|
||||
import { createAtlasAgent } from "./atlas"
|
||||
|
||||
const TEST_MODEL = "anthropic/claude-sonnet-4-5"
|
||||
|
||||
@@ -96,4 +97,18 @@ describe("read-only agent tool restrictions", () => {
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe("Atlas", () => {
|
||||
test("allows delegation tools for orchestration", () => {
|
||||
// given
|
||||
const agent = createAtlasAgent({ model: TEST_MODEL })
|
||||
|
||||
// when
|
||||
const permission = (agent.permission ?? {}) as Record<string, string>
|
||||
|
||||
// then
|
||||
expect(permission["task"]).toBeUndefined()
|
||||
expect(permission["call_omo_agent"]).toBeUndefined()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -2,11 +2,17 @@ import { describe, test, expect } from "bun:test";
|
||||
import { isGptModel, isGeminiModel } from "./types";
|
||||
|
||||
describe("isGptModel", () => {
|
||||
test("standard openai provider models", () => {
|
||||
test("standard openai provider gpt models", () => {
|
||||
expect(isGptModel("openai/gpt-5.2")).toBe(true);
|
||||
expect(isGptModel("openai/gpt-4o")).toBe(true);
|
||||
expect(isGptModel("openai/o1")).toBe(true);
|
||||
expect(isGptModel("openai/o3-mini")).toBe(true);
|
||||
});
|
||||
|
||||
test("o-series models are not gpt by name", () => {
|
||||
expect(isGptModel("openai/o1")).toBe(false);
|
||||
expect(isGptModel("openai/o3-mini")).toBe(false);
|
||||
expect(isGptModel("litellm/o1")).toBe(false);
|
||||
expect(isGptModel("litellm/o3-mini")).toBe(false);
|
||||
expect(isGptModel("litellm/o4-mini")).toBe(false);
|
||||
});
|
||||
|
||||
test("github copilot gpt models", () => {
|
||||
@@ -17,9 +23,6 @@ describe("isGptModel", () => {
|
||||
test("litellm proxied gpt models", () => {
|
||||
expect(isGptModel("litellm/gpt-5.2")).toBe(true);
|
||||
expect(isGptModel("litellm/gpt-4o")).toBe(true);
|
||||
expect(isGptModel("litellm/o1")).toBe(true);
|
||||
expect(isGptModel("litellm/o3-mini")).toBe(true);
|
||||
expect(isGptModel("litellm/o4-mini")).toBe(true);
|
||||
});
|
||||
|
||||
test("other proxied gpt models", () => {
|
||||
@@ -27,6 +30,11 @@ describe("isGptModel", () => {
|
||||
expect(isGptModel("custom-provider/gpt-5.2")).toBe(true);
|
||||
});
|
||||
|
||||
test("venice provider gpt models", () => {
|
||||
expect(isGptModel("venice/gpt-5.2")).toBe(true);
|
||||
expect(isGptModel("venice/gpt-4o")).toBe(true);
|
||||
});
|
||||
|
||||
test("gpt4 prefix without hyphen (legacy naming)", () => {
|
||||
expect(isGptModel("litellm/gpt4o")).toBe(true);
|
||||
expect(isGptModel("ollama/gpt4")).toBe(true);
|
||||
@@ -39,8 +47,8 @@ describe("isGptModel", () => {
|
||||
});
|
||||
|
||||
test("gemini models are not gpt", () => {
|
||||
expect(isGptModel("google/gemini-3-pro")).toBe(false);
|
||||
expect(isGptModel("litellm/gemini-3-pro")).toBe(false);
|
||||
expect(isGptModel("google/gemini-3.1-pro")).toBe(false);
|
||||
expect(isGptModel("litellm/gemini-3.1-pro")).toBe(false);
|
||||
});
|
||||
|
||||
test("opencode provider is not gpt", () => {
|
||||
@@ -50,29 +58,29 @@ describe("isGptModel", () => {
|
||||
|
||||
describe("isGeminiModel", () => {
|
||||
test("#given google provider models #then returns true", () => {
|
||||
expect(isGeminiModel("google/gemini-3-pro")).toBe(true);
|
||||
expect(isGeminiModel("google/gemini-3.1-pro")).toBe(true);
|
||||
expect(isGeminiModel("google/gemini-3-flash")).toBe(true);
|
||||
expect(isGeminiModel("google/gemini-2.5-pro")).toBe(true);
|
||||
});
|
||||
|
||||
test("#given google-vertex provider models #then returns true", () => {
|
||||
expect(isGeminiModel("google-vertex/gemini-3-pro")).toBe(true);
|
||||
expect(isGeminiModel("google-vertex/gemini-3.1-pro")).toBe(true);
|
||||
expect(isGeminiModel("google-vertex/gemini-3-flash")).toBe(true);
|
||||
});
|
||||
|
||||
test("#given github copilot gemini models #then returns true", () => {
|
||||
expect(isGeminiModel("github-copilot/gemini-3-pro")).toBe(true);
|
||||
expect(isGeminiModel("github-copilot/gemini-3.1-pro")).toBe(true);
|
||||
expect(isGeminiModel("github-copilot/gemini-3-flash")).toBe(true);
|
||||
});
|
||||
|
||||
test("#given litellm proxied gemini models #then returns true", () => {
|
||||
expect(isGeminiModel("litellm/gemini-3-pro")).toBe(true);
|
||||
expect(isGeminiModel("litellm/gemini-3.1-pro")).toBe(true);
|
||||
expect(isGeminiModel("litellm/gemini-3-flash")).toBe(true);
|
||||
expect(isGeminiModel("litellm/gemini-2.5-pro")).toBe(true);
|
||||
});
|
||||
|
||||
test("#given other proxied gemini models #then returns true", () => {
|
||||
expect(isGeminiModel("custom-provider/gemini-3-pro")).toBe(true);
|
||||
expect(isGeminiModel("custom-provider/gemini-3.1-pro")).toBe(true);
|
||||
expect(isGeminiModel("ollama/gemini-3-flash")).toBe(true);
|
||||
});
|
||||
|
||||
|
||||
@@ -70,14 +70,9 @@ function extractModelName(model: string): string {
|
||||
return model.includes("/") ? model.split("/").pop() ?? model : model
|
||||
}
|
||||
|
||||
const GPT_MODEL_PREFIXES = ["gpt-", "gpt4", "o1", "o3", "o4"]
|
||||
|
||||
export function isGptModel(model: string): boolean {
|
||||
if (model.startsWith("openai/") || model.startsWith("github-copilot/gpt-"))
|
||||
return true
|
||||
|
||||
const modelName = extractModelName(model).toLowerCase()
|
||||
return GPT_MODEL_PREFIXES.some((prefix) => modelName.startsWith(prefix))
|
||||
return modelName.includes("gpt")
|
||||
}
|
||||
|
||||
const GEMINI_PROVIDERS = ["google/", "google-vertex/"]
|
||||
|
||||
@@ -589,20 +589,22 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () =>
|
||||
}
|
||||
})
|
||||
|
||||
test("hephaestus is created when github-copilot provider is connected", async () => {
|
||||
// #given - github-copilot provider has models available
|
||||
test("hephaestus IS created when github-copilot is connected with a GPT model", async () => {
|
||||
// #given - github-copilot provider has gpt-5.3-codex available
|
||||
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["github-copilot/gpt-5.3-codex"])
|
||||
)
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
|
||||
|
||||
try {
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
|
||||
|
||||
// #then
|
||||
// #then - github-copilot is now a valid provider for hephaestus
|
||||
expect(agents.hephaestus).toBeDefined()
|
||||
} finally {
|
||||
fetchSpy.mockRestore()
|
||||
cacheSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
@@ -986,7 +988,7 @@ describe("buildAgent with category and skills", () => {
|
||||
const agent = buildAgent(source["test-agent"], TEST_MODEL)
|
||||
|
||||
// #then - category's built-in model is applied
|
||||
expect(agent.model).toBe("google/gemini-3-pro")
|
||||
expect(agent.model).toBe("google/gemini-3.1-pro")
|
||||
})
|
||||
|
||||
test("agent with category and existing model keeps existing model", () => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/cli/ — CLI: install, run, doctor, mcp-oauth
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -325,7 +325,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
|
||||
"agents": {
|
||||
"atlas": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
},
|
||||
"explore": {
|
||||
"model": "opencode/gpt-5-nano",
|
||||
@@ -334,34 +334,34 @@ exports[`generateModelConfig single native provider uses Gemini models when only
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
"metis": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"momus": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"multimodal-looker": {
|
||||
"model": "google/gemini-3-flash-preview",
|
||||
},
|
||||
"oracle": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"prometheus": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
},
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"quick": {
|
||||
"model": "google/gemini-3-flash-preview",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"unspecified-high": {
|
||||
@@ -371,7 +371,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
|
||||
"model": "google/gemini-3-flash-preview",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -386,7 +386,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
|
||||
"agents": {
|
||||
"atlas": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
},
|
||||
"explore": {
|
||||
"model": "opencode/gpt-5-nano",
|
||||
@@ -395,44 +395,44 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
"metis": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"momus": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"multimodal-looker": {
|
||||
"model": "google/gemini-3-flash-preview",
|
||||
},
|
||||
"oracle": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"prometheus": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
},
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"quick": {
|
||||
"model": "google/gemini-3-flash-preview",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"unspecified-high": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
},
|
||||
"unspecified-low": {
|
||||
"model": "google/gemini-3-flash-preview",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -485,7 +485,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -506,7 +506,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -559,7 +559,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -581,7 +581,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -634,7 +634,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"model": "opencode/gemini-3.1-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -655,7 +655,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
|
||||
"model": "opencode/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"model": "opencode/gemini-3.1-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -708,7 +708,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"model": "opencode/gemini-3.1-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -730,7 +730,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
|
||||
"model": "opencode/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"model": "opencode/gemini-3.1-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -750,10 +750,6 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
|
||||
"explore": {
|
||||
"model": "github-copilot/gpt-5-mini",
|
||||
},
|
||||
"hephaestus": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"variant": "medium",
|
||||
},
|
||||
"librarian": {
|
||||
"model": "github-copilot/claude-sonnet-4.5",
|
||||
},
|
||||
@@ -783,19 +779,15 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "github-copilot/claude-haiku-4.5",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"variant": "xhigh",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"unspecified-high": {
|
||||
"model": "github-copilot/claude-sonnet-4.5",
|
||||
@@ -804,7 +796,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
|
||||
"model": "github-copilot/claude-sonnet-4.5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -824,10 +816,6 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
|
||||
"explore": {
|
||||
"model": "github-copilot/gpt-5-mini",
|
||||
},
|
||||
"hephaestus": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"variant": "medium",
|
||||
},
|
||||
"librarian": {
|
||||
"model": "github-copilot/claude-sonnet-4.5",
|
||||
},
|
||||
@@ -857,19 +845,15 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "github-copilot/claude-haiku-4.5",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"variant": "xhigh",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"unspecified-high": {
|
||||
"model": "github-copilot/claude-opus-4.6",
|
||||
@@ -879,7 +863,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
|
||||
"model": "github-copilot/claude-sonnet-4.5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -1042,7 +1026,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"model": "opencode/gemini-3.1-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -1063,7 +1047,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"model": "opencode/gemini-3.1-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -1116,7 +1100,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -1137,7 +1121,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
|
||||
"model": "github-copilot/claude-sonnet-4.5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -1233,7 +1217,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
|
||||
"model": "google/gemini-3-flash-preview",
|
||||
},
|
||||
"oracle": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"prometheus": {
|
||||
@@ -1247,14 +1231,14 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"quick": {
|
||||
"model": "anthropic/claude-haiku-4-5",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"unspecified-high": {
|
||||
@@ -1264,7 +1248,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -1285,7 +1269,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
|
||||
"model": "opencode/claude-haiku-4-5",
|
||||
},
|
||||
"hephaestus": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"model": "opencode/gpt-5.3-codex",
|
||||
"variant": "medium",
|
||||
},
|
||||
"librarian": {
|
||||
@@ -1317,18 +1301,18 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"model": "opencode/gpt-5.3-codex",
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "github-copilot/claude-haiku-4.5",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "github-copilot/gpt-5.3-codex",
|
||||
"model": "opencode/gpt-5.3-codex",
|
||||
"variant": "xhigh",
|
||||
},
|
||||
"unspecified-high": {
|
||||
@@ -1338,7 +1322,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
|
||||
"model": "github-copilot/claude-sonnet-4.5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -1391,7 +1375,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -1412,7 +1396,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
@@ -1465,7 +1449,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
|
||||
},
|
||||
"categories": {
|
||||
"artistry": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"deep": {
|
||||
@@ -1487,7 +1471,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro-preview",
|
||||
"model": "google/gemini-3.1-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
|
||||
@@ -178,7 +178,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
|
||||
expect(models).toBeTruthy()
|
||||
|
||||
const required = [
|
||||
"antigravity-gemini-3-pro",
|
||||
"antigravity-gemini-3.1-pro",
|
||||
"antigravity-gemini-3-flash",
|
||||
"antigravity-claude-sonnet-4-6",
|
||||
"antigravity-claude-sonnet-4-6-thinking",
|
||||
@@ -206,7 +206,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
|
||||
const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
|
||||
|
||||
// #when checking Gemini Pro variants
|
||||
const pro = models["antigravity-gemini-3-pro"]
|
||||
const pro = models["antigravity-gemini-3.1-pro"]
|
||||
// #then should have low and high variants
|
||||
expect(pro.variants).toBeTruthy()
|
||||
expect(pro.variants.low).toBeTruthy()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/cli/config-manager/ — CLI Installation Utilities
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
* IMPORTANT: Model names MUST use `antigravity-` prefix for stability.
|
||||
*
|
||||
* Since opencode-antigravity-auth v1.3.0, models use a variant system:
|
||||
* - `antigravity-gemini-3-pro` with variants: low, high
|
||||
* - `antigravity-gemini-3.1-pro` with variants: low, high
|
||||
* - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high
|
||||
*
|
||||
* Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work
|
||||
* Legacy tier-suffixed names (e.g., `antigravity-gemini-3.1-pro-high`) still work
|
||||
* but variants are the recommended approach.
|
||||
*
|
||||
* @see https://github.com/NoeFabris/opencode-antigravity-auth#models
|
||||
@@ -16,7 +16,7 @@ export const ANTIGRAVITY_PROVIDER_CONFIG = {
|
||||
google: {
|
||||
name: "Google",
|
||||
models: {
|
||||
"antigravity-gemini-3-pro": {
|
||||
"antigravity-gemini-3.1-pro": {
|
||||
name: "Gemini 3 Pro (Antigravity)",
|
||||
limit: { context: 1048576, output: 65535 },
|
||||
modalities: { input: ["text", "image", "pdf"], output: ["text"] },
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { getConfigDir } from "./config-context"
|
||||
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
|
||||
|
||||
const BUN_INSTALL_TIMEOUT_SECONDS = 60
|
||||
const BUN_INSTALL_TIMEOUT_MS = BUN_INSTALL_TIMEOUT_SECONDS * 1000
|
||||
@@ -16,7 +17,7 @@ export async function runBunInstall(): Promise<boolean> {
|
||||
|
||||
export async function runBunInstallWithDetails(): Promise<BunInstallResult> {
|
||||
try {
|
||||
const proc = Bun.spawn(["bun", "install"], {
|
||||
const proc = spawnWithWindowsHide(["bun", "install"], {
|
||||
cwd: getConfigDir(),
|
||||
stdout: "inherit",
|
||||
stderr: "inherit",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { OpenCodeBinaryType } from "../../shared/opencode-config-dir-types"
|
||||
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
|
||||
import { initConfigContext } from "./config-context"
|
||||
|
||||
const OPENCODE_BINARIES = ["opencode", "opencode-desktop"] as const
|
||||
@@ -11,7 +12,7 @@ interface OpenCodeBinaryResult {
|
||||
async function findOpenCodeBinaryWithVersion(): Promise<OpenCodeBinaryResult | null> {
|
||||
for (const binary of OPENCODE_BINARIES) {
|
||||
try {
|
||||
const proc = Bun.spawn([binary, "--version"], {
|
||||
const proc = spawnWithWindowsHide([binary, "--version"], {
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
})
|
||||
|
||||
80
src/cli/config-manager/write-omo-config.test.ts
Normal file
80
src/cli/config-manager/write-omo-config.test.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from "bun:test"
|
||||
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"
|
||||
import { tmpdir } from "node:os"
|
||||
import { join } from "node:path"
|
||||
|
||||
import { parseJsonc } from "../../shared/jsonc-parser"
|
||||
import type { InstallConfig } from "../types"
|
||||
import { resetConfigContext } from "./config-context"
|
||||
import { generateOmoConfig } from "./generate-omo-config"
|
||||
import { writeOmoConfig } from "./write-omo-config"
|
||||
|
||||
const installConfig: InstallConfig = {
|
||||
hasClaude: true,
|
||||
isMax20: true,
|
||||
hasOpenAI: true,
|
||||
hasGemini: true,
|
||||
hasCopilot: false,
|
||||
hasOpencodeZen: false,
|
||||
hasZaiCodingPlan: false,
|
||||
hasKimiForCoding: false,
|
||||
}
|
||||
|
||||
function getRecord(value: unknown): Record<string, unknown> {
|
||||
if (value && typeof value === "object" && !Array.isArray(value)) {
|
||||
return value as Record<string, unknown>
|
||||
}
|
||||
|
||||
return {}
|
||||
}
|
||||
|
||||
describe("writeOmoConfig", () => {
|
||||
let testConfigDir = ""
|
||||
let testConfigPath = ""
|
||||
|
||||
beforeEach(() => {
|
||||
testConfigDir = join(tmpdir(), `omo-write-config-${Date.now()}-${Math.random().toString(36).slice(2)}`)
|
||||
testConfigPath = join(testConfigDir, "oh-my-opencode.json")
|
||||
|
||||
mkdirSync(testConfigDir, { recursive: true })
|
||||
process.env.OPENCODE_CONFIG_DIR = testConfigDir
|
||||
resetConfigContext()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(testConfigDir, { recursive: true, force: true })
|
||||
resetConfigContext()
|
||||
delete process.env.OPENCODE_CONFIG_DIR
|
||||
})
|
||||
|
||||
it("preserves existing user values while adding new defaults", () => {
|
||||
// given
|
||||
const existingConfig = {
|
||||
agents: {
|
||||
sisyphus: {
|
||||
model: "custom/provider-model",
|
||||
},
|
||||
},
|
||||
disabled_hooks: ["comment-checker"],
|
||||
}
|
||||
writeFileSync(testConfigPath, JSON.stringify(existingConfig, null, 2) + "\n", "utf-8")
|
||||
|
||||
const generatedDefaults = generateOmoConfig(installConfig)
|
||||
|
||||
// when
|
||||
const result = writeOmoConfig(installConfig)
|
||||
|
||||
// then
|
||||
expect(result.success).toBe(true)
|
||||
|
||||
const savedConfig = parseJsonc<Record<string, unknown>>(readFileSync(testConfigPath, "utf-8"))
|
||||
const savedAgents = getRecord(savedConfig.agents)
|
||||
const savedSisyphus = getRecord(savedAgents.sisyphus)
|
||||
expect(savedSisyphus.model).toBe("custom/provider-model")
|
||||
expect(savedConfig.disabled_hooks).toEqual(["comment-checker"])
|
||||
|
||||
for (const defaultKey of Object.keys(generatedDefaults)) {
|
||||
expect(savedConfig).toHaveProperty(defaultKey)
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -43,7 +43,7 @@ export function writeOmoConfig(installConfig: InstallConfig): ConfigMergeResult
|
||||
return { success: true, configPath: omoConfigPath }
|
||||
}
|
||||
|
||||
const merged = deepMergeRecord(existing, newConfig)
|
||||
const merged = deepMergeRecord(newConfig, existing)
|
||||
writeFileSync(omoConfigPath, JSON.stringify(merged, null, 2) + "\n")
|
||||
} catch (parseErr) {
|
||||
if (parseErr instanceof SyntaxError) {
|
||||
|
||||
@@ -3,6 +3,7 @@ import { createRequire } from "node:module"
|
||||
import { dirname, join } from "node:path"
|
||||
|
||||
import type { DependencyInfo } from "../types"
|
||||
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
|
||||
|
||||
async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
|
||||
try {
|
||||
@@ -18,7 +19,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
|
||||
|
||||
async function getBinaryVersion(binary: string): Promise<string | null> {
|
||||
try {
|
||||
const proc = Bun.spawn([binary, "--version"], { stdout: "pipe", stderr: "pipe" })
|
||||
const proc = spawnWithWindowsHide([binary, "--version"], { stdout: "pipe", stderr: "pipe" })
|
||||
const output = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
if (proc.exitCode === 0) {
|
||||
@@ -140,4 +141,3 @@ export async function checkCommentChecker(): Promise<DependencyInfo> {
|
||||
path: resolvedPath,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ describe("model-resolution check", () => {
|
||||
// then: Should have category entries
|
||||
const visual = info.categories.find((c) => c.name === "visual-engineering")
|
||||
expect(visual).toBeDefined()
|
||||
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
|
||||
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3.1-pro")
|
||||
expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { existsSync } from "node:fs"
|
||||
import { homedir } from "node:os"
|
||||
import { join } from "node:path"
|
||||
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
|
||||
|
||||
import { OPENCODE_BINARIES } from "../constants"
|
||||
|
||||
@@ -110,7 +111,7 @@ export async function getOpenCodeVersion(
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const command = buildVersionCommand(binaryPath, platform)
|
||||
const processResult = Bun.spawn(command, { stdout: "pipe", stderr: "pipe" })
|
||||
const processResult = spawnWithWindowsHide(command, { stdout: "pipe", stderr: "pipe" })
|
||||
const output = await new Response(processResult.stdout).text()
|
||||
await processResult.exited
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
|
||||
|
||||
export interface GhCliInfo {
|
||||
installed: boolean
|
||||
version: string | null
|
||||
@@ -19,7 +21,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
|
||||
|
||||
async function getGhVersion(): Promise<string | null> {
|
||||
try {
|
||||
const processResult = Bun.spawn(["gh", "--version"], { stdout: "pipe", stderr: "pipe" })
|
||||
const processResult = spawnWithWindowsHide(["gh", "--version"], { stdout: "pipe", stderr: "pipe" })
|
||||
const output = await new Response(processResult.stdout).text()
|
||||
await processResult.exited
|
||||
if (processResult.exitCode !== 0) return null
|
||||
@@ -38,7 +40,7 @@ async function getGhAuthStatus(): Promise<{
|
||||
error: string | null
|
||||
}> {
|
||||
try {
|
||||
const processResult = Bun.spawn(["gh", "auth", "status"], {
|
||||
const processResult = spawnWithWindowsHide(["gh", "auth", "status"], {
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
env: { ...process.env, GH_NO_UPDATE_NOTIFIER: "1" },
|
||||
|
||||
@@ -17,14 +17,14 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
},
|
||||
hephaestus: {
|
||||
fallbackChain: [
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
|
||||
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
|
||||
],
|
||||
requiresProvider: ["openai", "github-copilot", "opencode"],
|
||||
requiresProvider: ["openai", "opencode"],
|
||||
},
|
||||
oracle: {
|
||||
fallbackChain: [
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
],
|
||||
},
|
||||
@@ -59,7 +59,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
{ providers: ["kimi-for-coding"], model: "k2p5" },
|
||||
{ providers: ["opencode"], model: "kimi-k2.5-free" },
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
|
||||
],
|
||||
},
|
||||
metis: {
|
||||
@@ -68,14 +68,14 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
{ providers: ["kimi-for-coding"], model: "k2p5" },
|
||||
{ providers: ["opencode"], model: "kimi-k2.5-free" },
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
|
||||
],
|
||||
},
|
||||
momus: {
|
||||
fallbackChain: [
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
|
||||
],
|
||||
},
|
||||
atlas: {
|
||||
@@ -84,7 +84,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
{ providers: ["opencode"], model: "kimi-k2.5-free" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
|
||||
],
|
||||
},
|
||||
}
|
||||
@@ -92,7 +92,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
"visual-engineering": {
|
||||
fallbackChain: [
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
|
||||
{ providers: ["zai-coding-plan"], model: "glm-5" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
{ providers: ["kimi-for-coding"], model: "k2p5" },
|
||||
@@ -100,26 +100,26 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
|
||||
},
|
||||
ultrabrain: {
|
||||
fallbackChain: [
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
],
|
||||
},
|
||||
deep: {
|
||||
fallbackChain: [
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
|
||||
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
|
||||
],
|
||||
requiresModel: "gpt-5.3-codex",
|
||||
},
|
||||
artistry: {
|
||||
fallbackChain: [
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
|
||||
],
|
||||
requiresModel: "gemini-3-pro",
|
||||
requiresModel: "gemini-3.1-pro",
|
||||
},
|
||||
quick: {
|
||||
fallbackChain: [
|
||||
@@ -131,7 +131,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
|
||||
"unspecified-low": {
|
||||
fallbackChain: [
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
|
||||
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
|
||||
],
|
||||
},
|
||||
@@ -139,7 +139,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
|
||||
fallbackChain: [
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
|
||||
],
|
||||
},
|
||||
writing: {
|
||||
|
||||
@@ -421,16 +421,15 @@ describe("generateModelConfig", () => {
|
||||
expect(result.agents?.hephaestus?.variant).toBe("medium")
|
||||
})
|
||||
|
||||
test("Hephaestus is created when Copilot is available (github-copilot provider connected)", () => {
|
||||
test("Hephaestus is NOT created when only Copilot is available (gpt-5.3-codex unavailable on github-copilot)", () => {
|
||||
// #given
|
||||
const config = createConfig({ hasCopilot: true })
|
||||
|
||||
// #when
|
||||
const result = generateModelConfig(config)
|
||||
|
||||
// #then
|
||||
expect(result.agents?.hephaestus?.model).toBe("github-copilot/gpt-5.3-codex")
|
||||
expect(result.agents?.hephaestus?.variant).toBe("medium")
|
||||
// #then - hephaestus is omitted because gpt-5.3-codex is not available on github-copilot
|
||||
expect(result.agents?.hephaestus).toBeUndefined()
|
||||
})
|
||||
|
||||
test("Hephaestus is created when OpenCode Zen is available (opencode provider connected)", () => {
|
||||
|
||||
@@ -40,16 +40,16 @@ describe("transformModelForProvider", () => {
|
||||
expect(result).toBe("claude-haiku-4.5")
|
||||
})
|
||||
|
||||
test("transforms gemini-3-pro to gemini-3-pro-preview", () => {
|
||||
// #given github-copilot provider and gemini-3-pro model
|
||||
test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
|
||||
// #given github-copilot provider and gemini-3.1-pro model
|
||||
const provider = "github-copilot"
|
||||
const model = "gemini-3-pro"
|
||||
const model = "gemini-3.1-pro"
|
||||
|
||||
// #when transformModelForProvider is called
|
||||
const result = transformModelForProvider(provider, model)
|
||||
|
||||
// #then should transform to gemini-3-pro-preview
|
||||
expect(result).toBe("gemini-3-pro-preview")
|
||||
// #then should transform to gemini-3.1-pro-preview
|
||||
expect(result).toBe("gemini-3.1-pro-preview")
|
||||
})
|
||||
|
||||
test("transforms gemini-3-flash to gemini-3-flash-preview", () => {
|
||||
@@ -64,16 +64,16 @@ describe("transformModelForProvider", () => {
|
||||
expect(result).toBe("gemini-3-flash-preview")
|
||||
})
|
||||
|
||||
test("prevents double transformation of gemini-3-pro-preview", () => {
|
||||
// #given github-copilot provider and gemini-3-pro-preview model (already transformed)
|
||||
test("prevents double transformation of gemini-3.1-pro-preview", () => {
|
||||
// #given github-copilot provider and gemini-3.1-pro-preview model (already transformed)
|
||||
const provider = "github-copilot"
|
||||
const model = "gemini-3-pro-preview"
|
||||
const model = "gemini-3.1-pro-preview"
|
||||
|
||||
// #when transformModelForProvider is called
|
||||
const result = transformModelForProvider(provider, model)
|
||||
|
||||
// #then should NOT become gemini-3-pro-preview-preview
|
||||
expect(result).toBe("gemini-3-pro-preview")
|
||||
// #then should NOT become gemini-3.1-pro-preview-preview
|
||||
expect(result).toBe("gemini-3.1-pro-preview")
|
||||
})
|
||||
|
||||
test("prevents double transformation of gemini-3-flash-preview", () => {
|
||||
@@ -102,16 +102,16 @@ describe("transformModelForProvider", () => {
|
||||
expect(result).toBe("gemini-3-flash-preview")
|
||||
})
|
||||
|
||||
test("transforms gemini-3-pro to gemini-3-pro-preview", () => {
|
||||
// #given google provider and gemini-3-pro model
|
||||
test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
|
||||
// #given google provider and gemini-3.1-pro model
|
||||
const provider = "google"
|
||||
const model = "gemini-3-pro"
|
||||
const model = "gemini-3.1-pro"
|
||||
|
||||
// #when transformModelForProvider is called
|
||||
const result = transformModelForProvider(provider, model)
|
||||
|
||||
// #then should transform to gemini-3-pro-preview
|
||||
expect(result).toBe("gemini-3-pro-preview")
|
||||
// #then should transform to gemini-3.1-pro-preview
|
||||
expect(result).toBe("gemini-3.1-pro-preview")
|
||||
})
|
||||
|
||||
test("passes through other gemini models unchanged", () => {
|
||||
@@ -138,16 +138,16 @@ describe("transformModelForProvider", () => {
|
||||
expect(result).toBe("gemini-3-flash-preview")
|
||||
})
|
||||
|
||||
test("prevents double transformation of gemini-3-pro-preview", () => {
|
||||
// #given google provider and gemini-3-pro-preview model (already transformed)
|
||||
test("prevents double transformation of gemini-3.1-pro-preview", () => {
|
||||
// #given google provider and gemini-3.1-pro-preview model (already transformed)
|
||||
const provider = "google"
|
||||
const model = "gemini-3-pro-preview"
|
||||
const model = "gemini-3.1-pro-preview"
|
||||
|
||||
// #when transformModelForProvider is called
|
||||
const result = transformModelForProvider(provider, model)
|
||||
|
||||
// #then should NOT become gemini-3-pro-preview-preview
|
||||
expect(result).toBe("gemini-3-pro-preview")
|
||||
// #then should NOT become gemini-3.1-pro-preview-preview
|
||||
expect(result).toBe("gemini-3.1-pro-preview")
|
||||
})
|
||||
|
||||
test("does not transform claude models for google provider", () => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/cli/run/ — Non-Interactive Session Launcher
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { describe, it, expect, spyOn } from "bun:test"
|
||||
const { describe, it, expect, spyOn } = require("bun:test")
|
||||
import type { RunContext } from "./types"
|
||||
import { createEventState } from "./events"
|
||||
import { handleSessionStatus, handleMessagePartUpdated, handleMessageUpdated, handleTuiToast } from "./event-handlers"
|
||||
@@ -235,9 +235,7 @@ describe("handleMessagePartUpdated", () => {
|
||||
|
||||
it("prints completion metadata once when assistant text part is completed", () => {
|
||||
// given
|
||||
const nowSpy = spyOn(Date, "now")
|
||||
nowSpy.mockReturnValueOnce(1000)
|
||||
nowSpy.mockReturnValueOnce(3400)
|
||||
const nowSpy = spyOn(Date, "now").mockReturnValue(3400)
|
||||
|
||||
const ctx = createMockContext("ses_main")
|
||||
const state = createEventState()
|
||||
@@ -259,6 +257,7 @@ describe("handleMessagePartUpdated", () => {
|
||||
} as any,
|
||||
state,
|
||||
)
|
||||
state.messageStartedAtById["msg_1"] = 1000
|
||||
|
||||
// when
|
||||
handleMessagePartUpdated(
|
||||
|
||||
@@ -7,6 +7,8 @@ export interface EventState {
|
||||
currentTool: string | null
|
||||
/** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
|
||||
hasReceivedMeaningfulWork: boolean
|
||||
/** Timestamp of the last received event (for watchdog detection) */
|
||||
lastEventTimestamp: number
|
||||
/** Count of assistant messages for the main session */
|
||||
messageCount: number
|
||||
/** Current agent name from the latest assistant message */
|
||||
@@ -54,6 +56,7 @@ export function createEventState(): EventState {
|
||||
lastPartText: "",
|
||||
currentTool: null,
|
||||
hasReceivedMeaningfulWork: false,
|
||||
lastEventTimestamp: Date.now(),
|
||||
messageCount: 0,
|
||||
currentAgent: null,
|
||||
currentModel: null,
|
||||
|
||||
@@ -35,6 +35,9 @@ export async function processEvents(
|
||||
logEventVerbose(ctx, payload)
|
||||
}
|
||||
|
||||
// Update last event timestamp for watchdog detection
|
||||
state.lastEventTimestamp = Date.now()
|
||||
|
||||
handleSessionError(ctx, payload, state)
|
||||
handleSessionIdle(ctx, payload, state)
|
||||
handleSessionStatus(ctx, payload, state)
|
||||
|
||||
@@ -3,6 +3,7 @@ import type { RunResult } from "./types"
|
||||
import { createJsonOutputManager } from "./json-output"
|
||||
import { resolveSession } from "./session-resolver"
|
||||
import { executeOnCompleteHook } from "./on-complete-hook"
|
||||
import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
|
||||
import type { OpencodeClient } from "./types"
|
||||
import * as originalSdk from "@opencode-ai/sdk"
|
||||
import * as originalPortUtils from "../../shared/port-utils"
|
||||
@@ -147,7 +148,7 @@ describe("integration: --session-id", () => {
|
||||
const result = resolveSession({ client: mockClient, sessionId, directory: "/test" })
|
||||
|
||||
// then
|
||||
await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
|
||||
expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
|
||||
expect(mockClient.session.get).toHaveBeenCalledWith({
|
||||
path: { id: sessionId },
|
||||
query: { directory: "/test" },
|
||||
@@ -161,10 +162,13 @@ describe("integration: --on-complete", () => {
|
||||
|
||||
beforeEach(() => {
|
||||
spyOn(console, "error").mockImplementation(() => {})
|
||||
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
|
||||
spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
|
||||
exited: Promise.resolve(0),
|
||||
exitCode: 0,
|
||||
} as unknown as ReturnType<typeof Bun.spawn>)
|
||||
stdout: undefined,
|
||||
stderr: undefined,
|
||||
kill: () => {},
|
||||
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
@@ -186,7 +190,7 @@ describe("integration: --on-complete", () => {
|
||||
|
||||
// then
|
||||
expect(spawnSpy).toHaveBeenCalledTimes(1)
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
|
||||
expect(options?.env?.SESSION_ID).toBe("session-123")
|
||||
expect(options?.env?.EXIT_CODE).toBe("0")
|
||||
expect(options?.env?.DURATION_MS).toBe("5000")
|
||||
@@ -208,10 +212,13 @@ describe("integration: option combinations", () => {
|
||||
spyOn(console, "error").mockImplementation(() => {})
|
||||
mockStdout = createMockWriteStream()
|
||||
mockStderr = createMockWriteStream()
|
||||
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
|
||||
spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
|
||||
exited: Promise.resolve(0),
|
||||
exitCode: 0,
|
||||
} as unknown as ReturnType<typeof Bun.spawn>)
|
||||
stdout: undefined,
|
||||
stderr: undefined,
|
||||
kill: () => {},
|
||||
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
@@ -249,9 +256,9 @@ describe("integration: option combinations", () => {
|
||||
const emitted = mockStdout.writes[0]!
|
||||
expect(() => JSON.parse(emitted)).not.toThrow()
|
||||
expect(spawnSpy).toHaveBeenCalledTimes(1)
|
||||
const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
const [args] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
|
||||
expect(args).toEqual(["sh", "-c", "echo done"])
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
|
||||
expect(options?.env?.SESSION_ID).toBe("session-123")
|
||||
expect(options?.env?.EXIT_CODE).toBe("0")
|
||||
expect(options?.env?.DURATION_MS).toBe("5000")
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
|
||||
import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
|
||||
import { executeOnCompleteHook } from "./on-complete-hook"
|
||||
|
||||
describe("executeOnCompleteHook", () => {
|
||||
@@ -6,7 +7,10 @@ describe("executeOnCompleteHook", () => {
|
||||
return {
|
||||
exited: Promise.resolve(exitCode),
|
||||
exitCode,
|
||||
} as unknown as ReturnType<typeof Bun.spawn>
|
||||
stdout: undefined,
|
||||
stderr: undefined,
|
||||
kill: () => {},
|
||||
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
|
||||
}
|
||||
|
||||
let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
|
||||
@@ -21,7 +25,7 @@ describe("executeOnCompleteHook", () => {
|
||||
|
||||
it("executes command with correct env vars", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
@@ -35,7 +39,7 @@ describe("executeOnCompleteHook", () => {
|
||||
|
||||
// then
|
||||
expect(spawnSpy).toHaveBeenCalledTimes(1)
|
||||
const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
|
||||
|
||||
expect(args).toEqual(["sh", "-c", "echo test"])
|
||||
expect(options?.env?.SESSION_ID).toBe("session-123")
|
||||
@@ -51,7 +55,7 @@ describe("executeOnCompleteHook", () => {
|
||||
|
||||
it("env var values are strings", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
@@ -64,7 +68,7 @@ describe("executeOnCompleteHook", () => {
|
||||
})
|
||||
|
||||
// then
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
|
||||
|
||||
expect(options?.env?.EXIT_CODE).toBe("1")
|
||||
expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
|
||||
@@ -79,7 +83,7 @@ describe("executeOnCompleteHook", () => {
|
||||
|
||||
it("empty command string is no-op", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
@@ -100,7 +104,7 @@ describe("executeOnCompleteHook", () => {
|
||||
|
||||
it("whitespace-only command is no-op", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
@@ -121,11 +125,11 @@ describe("executeOnCompleteHook", () => {
|
||||
|
||||
it("command failure logs warning but does not throw", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1))
|
||||
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(1))
|
||||
|
||||
try {
|
||||
// when
|
||||
await expect(
|
||||
expect(
|
||||
executeOnCompleteHook({
|
||||
command: "false",
|
||||
sessionId: "session-123",
|
||||
@@ -149,13 +153,13 @@ describe("executeOnCompleteHook", () => {
|
||||
it("spawn error logs warning but does not throw", async () => {
|
||||
// given
|
||||
const spawnError = new Error("Command not found")
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => {
|
||||
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockImplementation(() => {
|
||||
throw spawnError
|
||||
})
|
||||
|
||||
try {
|
||||
// when
|
||||
await expect(
|
||||
expect(
|
||||
executeOnCompleteHook({
|
||||
command: "nonexistent-command",
|
||||
sessionId: "session-123",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import pc from "picocolors"
|
||||
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
|
||||
|
||||
export async function executeOnCompleteHook(options: {
|
||||
command: string
|
||||
@@ -17,7 +18,7 @@ export async function executeOnCompleteHook(options: {
|
||||
console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
|
||||
|
||||
try {
|
||||
const proc = Bun.spawn(["sh", "-c", trimmedCommand], {
|
||||
const proc = spawnWithWindowsHide(["sh", "-c", trimmedCommand], {
|
||||
env: {
|
||||
...process.env,
|
||||
SESSION_ID: sessionId,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { delimiter, dirname, join } from "node:path"
|
||||
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
|
||||
|
||||
const OPENCODE_COMMANDS = ["opencode", "opencode-desktop"] as const
|
||||
const WINDOWS_SUFFIXES = ["", ".exe", ".cmd", ".bat", ".ps1"] as const
|
||||
@@ -41,7 +42,7 @@ export function collectCandidateBinaryPaths(
|
||||
|
||||
export async function canExecuteBinary(binaryPath: string): Promise<boolean> {
|
||||
try {
|
||||
const proc = Bun.spawn([binaryPath, "--version"], {
|
||||
const proc = spawnWithWindowsHide([binaryPath, "--version"], {
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
})
|
||||
|
||||
@@ -8,11 +8,15 @@ const DEFAULT_POLL_INTERVAL_MS = 500
|
||||
const DEFAULT_REQUIRED_CONSECUTIVE = 1
|
||||
const ERROR_GRACE_CYCLES = 3
|
||||
const MIN_STABILIZATION_MS = 1_000
|
||||
const DEFAULT_EVENT_WATCHDOG_MS = 30_000 // 30 seconds
|
||||
const DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS = 60_000 // 60 seconds
|
||||
|
||||
export interface PollOptions {
|
||||
pollIntervalMs?: number
|
||||
requiredConsecutive?: number
|
||||
minStabilizationMs?: number
|
||||
eventWatchdogMs?: number
|
||||
secondaryMeaningfulWorkTimeoutMs?: number
|
||||
}
|
||||
|
||||
export async function pollForCompletion(
|
||||
@@ -28,9 +32,15 @@ export async function pollForCompletion(
|
||||
options.minStabilizationMs ?? MIN_STABILIZATION_MS
|
||||
const minStabilizationMs =
|
||||
rawMinStabilizationMs > 0 ? rawMinStabilizationMs : MIN_STABILIZATION_MS
|
||||
const eventWatchdogMs =
|
||||
options.eventWatchdogMs ?? DEFAULT_EVENT_WATCHDOG_MS
|
||||
const secondaryMeaningfulWorkTimeoutMs =
|
||||
options.secondaryMeaningfulWorkTimeoutMs ??
|
||||
DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS
|
||||
let consecutiveCompleteChecks = 0
|
||||
let errorCycleCount = 0
|
||||
let firstWorkTimestamp: number | null = null
|
||||
let secondaryTimeoutChecked = false
|
||||
const pollStartTimestamp = Date.now()
|
||||
|
||||
while (!abortController.signal.aborted) {
|
||||
@@ -59,7 +69,37 @@ export async function pollForCompletion(
|
||||
errorCycleCount = 0
|
||||
}
|
||||
|
||||
const mainSessionStatus = await getMainSessionStatus(ctx)
|
||||
// Watchdog: if no events received for N seconds, verify session status via API
|
||||
let mainSessionStatus: "idle" | "busy" | "retry" | null = null
|
||||
if (eventState.lastEventTimestamp !== null) {
|
||||
const timeSinceLastEvent = Date.now() - eventState.lastEventTimestamp
|
||||
if (timeSinceLastEvent > eventWatchdogMs) {
|
||||
// Events stopped coming - verify actual session state
|
||||
console.log(
|
||||
pc.yellow(
|
||||
`\n No events for ${Math.round(
|
||||
timeSinceLastEvent / 1000
|
||||
)}s, verifying session status...`
|
||||
)
|
||||
)
|
||||
|
||||
// Force check session status directly
|
||||
mainSessionStatus = await getMainSessionStatus(ctx)
|
||||
if (mainSessionStatus === "idle") {
|
||||
eventState.mainSessionIdle = true
|
||||
} else if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
|
||||
eventState.mainSessionIdle = false
|
||||
}
|
||||
|
||||
// Reset timestamp to avoid repeated checks
|
||||
eventState.lastEventTimestamp = Date.now()
|
||||
}
|
||||
}
|
||||
|
||||
// Only call getMainSessionStatus if watchdog didn't already check
|
||||
if (mainSessionStatus === null) {
|
||||
mainSessionStatus = await getMainSessionStatus(ctx)
|
||||
}
|
||||
if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
|
||||
eventState.mainSessionIdle = false
|
||||
} else if (mainSessionStatus === "idle") {
|
||||
@@ -81,6 +121,50 @@ export async function pollForCompletion(
|
||||
consecutiveCompleteChecks = 0
|
||||
continue
|
||||
}
|
||||
|
||||
// Secondary timeout: if we've been polling for reasonable time but haven't
|
||||
// received meaningful work via events, check if there's active work via API
|
||||
// Only check once to avoid unnecessary API calls every poll cycle
|
||||
if (
|
||||
Date.now() - pollStartTimestamp > secondaryMeaningfulWorkTimeoutMs &&
|
||||
!secondaryTimeoutChecked
|
||||
) {
|
||||
secondaryTimeoutChecked = true
|
||||
// Check if session actually has pending work (children, todos, etc.)
|
||||
const childrenRes = await ctx.client.session.children({
|
||||
path: { id: ctx.sessionID },
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
const children = normalizeSDKResponse(childrenRes, [] as unknown[])
|
||||
const todosRes = await ctx.client.session.todo({
|
||||
path: { id: ctx.sessionID },
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
const todos = normalizeSDKResponse(todosRes, [] as unknown[])
|
||||
|
||||
const hasActiveChildren =
|
||||
Array.isArray(children) && children.length > 0
|
||||
const hasActiveTodos =
|
||||
Array.isArray(todos) &&
|
||||
todos.some(
|
||||
(t: unknown) =>
|
||||
(t as { status?: string })?.status !== "completed" &&
|
||||
(t as { status?: string })?.status !== "cancelled"
|
||||
)
|
||||
const hasActiveWork = hasActiveChildren || hasActiveTodos
|
||||
|
||||
if (hasActiveWork) {
|
||||
// Assume meaningful work is happening even without events
|
||||
eventState.hasReceivedMeaningfulWork = true
|
||||
console.log(
|
||||
pc.yellow(
|
||||
`\n No meaningful work events for ${Math.round(
|
||||
secondaryMeaningfulWorkTimeoutMs / 1000
|
||||
)}s but session has active work - assuming in progress`
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Track when first meaningful work was received
|
||||
if (firstWorkTimestamp === null) {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/config/ — Zod v4 Schema System
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -59,7 +59,9 @@ export const AgentOverridesSchema = z.object({
|
||||
build: AgentOverrideConfigSchema.optional(),
|
||||
plan: AgentOverrideConfigSchema.optional(),
|
||||
sisyphus: AgentOverrideConfigSchema.optional(),
|
||||
hephaestus: AgentOverrideConfigSchema.optional(),
|
||||
hephaestus: AgentOverrideConfigSchema.extend({
|
||||
allow_non_gpt_model: z.boolean().optional(),
|
||||
}).optional(),
|
||||
"sisyphus-junior": AgentOverrideConfigSchema.optional(),
|
||||
"OpenCode-Builder": AgentOverrideConfigSchema.optional(),
|
||||
prometheus: AgentOverrideConfigSchema.optional(),
|
||||
|
||||
@@ -20,6 +20,7 @@ export const CategoryConfigSchema = z.object({
|
||||
textVerbosity: z.enum(["low", "medium", "high"]).optional(),
|
||||
tools: z.record(z.string(), z.boolean()).optional(),
|
||||
prompt_append: z.string().optional(),
|
||||
max_prompt_tokens: z.number().int().positive().optional(),
|
||||
/** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini/minimax models. */
|
||||
is_unstable_agent: z.boolean().optional(),
|
||||
/** Disable this category. Disabled categories are excluded from task delegation. */
|
||||
|
||||
@@ -27,7 +27,7 @@ export const OhMyOpenCodeConfigSchema = z.object({
|
||||
/** Default agent name for `oh-my-opencode run` (env: OPENCODE_DEFAULT_AGENT) */
|
||||
default_run_agent: z.string().optional(),
|
||||
disabled_mcps: z.array(AnyMcpNameSchema).optional(),
|
||||
disabled_agents: z.array(BuiltinAgentNameSchema).optional(),
|
||||
disabled_agents: z.array(z.string()).optional(),
|
||||
disabled_skills: z.array(BuiltinSkillNameSchema).optional(),
|
||||
disabled_hooks: z.array(z.string()).optional(),
|
||||
disabled_commands: z.array(BuiltinCommandNameSchema).optional(),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/features/ — 19 Feature Modules
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/features/background-agent/ — Core Orchestration Engine
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
|
||||
test("should return provider limit even when modelConcurrency exists but doesn't match", () => {
|
||||
// given
|
||||
const config: BackgroundTaskConfig = {
|
||||
modelConcurrency: { "google/gemini-3-pro": 5 },
|
||||
modelConcurrency: { "google/gemini-3.1-pro": 5 },
|
||||
providerConcurrency: { anthropic: 3 }
|
||||
}
|
||||
const manager = new ConcurrencyManager(config)
|
||||
@@ -95,7 +95,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
|
||||
// when
|
||||
const modelLimit = manager.getConcurrencyLimit("anthropic/claude-sonnet-4-6")
|
||||
const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-6")
|
||||
const defaultLimit = manager.getConcurrencyLimit("google/gemini-3-pro")
|
||||
const defaultLimit = manager.getConcurrencyLimit("google/gemini-3.1-pro")
|
||||
|
||||
// then
|
||||
expect(modelLimit).toBe(10)
|
||||
|
||||
@@ -191,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>
|
||||
return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent
|
||||
}
|
||||
|
||||
function getPendingNotifications(manager: BackgroundManager): Map<string, string[]> {
|
||||
return (manager as unknown as { pendingNotifications: Map<string, string[]> }).pendingNotifications
|
||||
}
|
||||
|
||||
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
|
||||
return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
|
||||
}
|
||||
@@ -1057,6 +1061,49 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
|
||||
test("should queue notification when promptAsync aborts while parent is idle", async () => {
|
||||
//#given
|
||||
const promptMock = async () => {
|
||||
const error = new Error("Request aborted while waiting for input")
|
||||
error.name = "MessageAbortedError"
|
||||
throw error
|
||||
}
|
||||
const client = {
|
||||
session: {
|
||||
prompt: promptMock,
|
||||
promptAsync: promptMock,
|
||||
abort: async () => ({}),
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
const task: BackgroundTask = {
|
||||
id: "task-aborted-idle-queue",
|
||||
sessionID: "session-child",
|
||||
parentSessionID: "session-parent",
|
||||
parentMessageID: "msg-parent",
|
||||
description: "task idle queue",
|
||||
prompt: "test",
|
||||
agent: "explore",
|
||||
status: "completed",
|
||||
startedAt: new Date(),
|
||||
completedAt: new Date(),
|
||||
}
|
||||
getPendingByParent(manager).set("session-parent", new Set([task.id]))
|
||||
|
||||
//#when
|
||||
await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })
|
||||
.notifyParentSession(task)
|
||||
|
||||
//#then
|
||||
const queuedNotifications = getPendingNotifications(manager).get("session-parent") ?? []
|
||||
expect(queuedNotifications).toHaveLength(1)
|
||||
expect(queuedNotifications[0]).toContain("<system-reminder>")
|
||||
expect(queuedNotifications[0]).toContain("[ALL BACKGROUND TASKS COMPLETE]")
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
|
||||
@@ -1105,6 +1152,29 @@ describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.injectPendingNotificationsIntoChatMessage", () => {
|
||||
test("should prepend queued notifications to first text part and clear queue", () => {
|
||||
// given
|
||||
const manager = createBackgroundManager()
|
||||
manager.queuePendingNotification("session-parent", "<system-reminder>queued-one</system-reminder>")
|
||||
manager.queuePendingNotification("session-parent", "<system-reminder>queued-two</system-reminder>")
|
||||
const output = {
|
||||
parts: [{ type: "text", text: "User prompt" }],
|
||||
}
|
||||
|
||||
// when
|
||||
manager.injectPendingNotificationsIntoChatMessage(output, "session-parent")
|
||||
|
||||
// then
|
||||
expect(output.parts[0].text).toContain("<system-reminder>queued-one</system-reminder>")
|
||||
expect(output.parts[0].text).toContain("<system-reminder>queued-two</system-reminder>")
|
||||
expect(output.parts[0].text).toContain("User prompt")
|
||||
expect(getPendingNotifications(manager).get("session-parent")).toBeUndefined()
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
})
|
||||
|
||||
function buildNotificationPromptBody(
|
||||
task: BackgroundTask,
|
||||
currentMessage: CurrentMessage | null
|
||||
@@ -2917,6 +2987,28 @@ describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
|
||||
manager.shutdown()
|
||||
resetToastManager()
|
||||
})
|
||||
|
||||
test("should clean pending notifications for deleted sessions", () => {
|
||||
//#given
|
||||
const manager = createBackgroundManager()
|
||||
const sessionID = "session-pending-notifications"
|
||||
|
||||
manager.queuePendingNotification(sessionID, "<system-reminder>queued</system-reminder>")
|
||||
expect(getPendingNotifications(manager).get(sessionID)).toEqual([
|
||||
"<system-reminder>queued</system-reminder>",
|
||||
])
|
||||
|
||||
//#when
|
||||
manager.handleEvent({
|
||||
type: "session.deleted",
|
||||
properties: { info: { id: sessionID } },
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(getPendingNotifications(manager).has(sessionID)).toBe(false)
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.handleEvent - session.error", () => {
|
||||
|
||||
@@ -93,6 +93,7 @@ export class BackgroundManager {
|
||||
|
||||
private tasks: Map<string, BackgroundTask>
|
||||
private notifications: Map<string, BackgroundTask[]>
|
||||
private pendingNotifications: Map<string, string[]>
|
||||
private pendingByParent: Map<string, Set<string>> // Track pending tasks per parent for batching
|
||||
private client: OpencodeClient
|
||||
private directory: string
|
||||
@@ -125,6 +126,7 @@ export class BackgroundManager {
|
||||
) {
|
||||
this.tasks = new Map()
|
||||
this.notifications = new Map()
|
||||
this.pendingNotifications = new Map()
|
||||
this.pendingByParent = new Map()
|
||||
this.client = ctx.client
|
||||
this.directory = ctx.directory
|
||||
@@ -828,6 +830,8 @@ export class BackgroundManager {
|
||||
tasksToCancel.set(descendant.id, descendant)
|
||||
}
|
||||
|
||||
this.pendingNotifications.delete(sessionID)
|
||||
|
||||
if (tasksToCancel.size === 0) return
|
||||
|
||||
for (const task of tasksToCancel.values()) {
|
||||
@@ -864,6 +868,13 @@ export class BackgroundManager {
|
||||
subagentSessions.delete(task.sessionID)
|
||||
}
|
||||
}
|
||||
|
||||
for (const task of tasksToCancel.values()) {
|
||||
if (task.parentSessionID) {
|
||||
this.pendingNotifications.delete(task.parentSessionID)
|
||||
}
|
||||
}
|
||||
|
||||
SessionCategoryRegistry.remove(sessionID)
|
||||
}
|
||||
|
||||
@@ -917,6 +928,32 @@ export class BackgroundManager {
|
||||
this.notifications.delete(sessionID)
|
||||
}
|
||||
|
||||
queuePendingNotification(sessionID: string | undefined, notification: string): void {
|
||||
if (!sessionID) return
|
||||
const existingNotifications = this.pendingNotifications.get(sessionID) ?? []
|
||||
existingNotifications.push(notification)
|
||||
this.pendingNotifications.set(sessionID, existingNotifications)
|
||||
}
|
||||
|
||||
injectPendingNotificationsIntoChatMessage(output: { parts: Array<{ type: string; text?: string; [key: string]: unknown }> }, sessionID: string): void {
|
||||
const pendingNotifications = this.pendingNotifications.get(sessionID)
|
||||
if (!pendingNotifications || pendingNotifications.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
this.pendingNotifications.delete(sessionID)
|
||||
const notificationContent = pendingNotifications.join("\n\n")
|
||||
const firstTextPartIndex = output.parts.findIndex((part) => part.type === "text")
|
||||
|
||||
if (firstTextPartIndex === -1) {
|
||||
output.parts.unshift(createInternalAgentTextPart(notificationContent))
|
||||
return
|
||||
}
|
||||
|
||||
const originalText = output.parts[firstTextPartIndex].text ?? ""
|
||||
output.parts[firstTextPartIndex].text = `${notificationContent}\n\n---\n\n${originalText}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that a session has actual assistant/tool output before marking complete.
|
||||
* Prevents premature completion when session.idle fires before agent responds.
|
||||
@@ -1340,6 +1377,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
taskId: task.id,
|
||||
parentSessionID: task.parentSessionID,
|
||||
})
|
||||
this.queuePendingNotification(task.parentSessionID, notification)
|
||||
} else {
|
||||
log("[background-agent] Failed to send notification:", error)
|
||||
}
|
||||
@@ -1568,6 +1606,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
this.concurrencyManager.clear()
|
||||
this.tasks.clear()
|
||||
this.notifications.clear()
|
||||
this.pendingNotifications.clear()
|
||||
this.pendingByParent.clear()
|
||||
this.notificationQueueByParent.clear()
|
||||
this.queuesByKey.clear()
|
||||
|
||||
@@ -269,6 +269,71 @@ describe("boulder-state", () => {
|
||||
expect(progress.isComplete).toBe(false)
|
||||
})
|
||||
|
||||
test("should count space-indented unchecked checkbox", () => {
|
||||
// given - plan file with a two-space indented checkbox
|
||||
const planPath = join(TEST_DIR, "space-indented-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
- [ ] indented task
|
||||
`)
|
||||
|
||||
// when
|
||||
const progress = getPlanProgress(planPath)
|
||||
|
||||
// then
|
||||
expect(progress.total).toBe(1)
|
||||
expect(progress.completed).toBe(0)
|
||||
expect(progress.isComplete).toBe(false)
|
||||
})
|
||||
|
||||
test("should count tab-indented unchecked checkbox", () => {
|
||||
// given - plan file with a tab-indented checkbox
|
||||
const planPath = join(TEST_DIR, "tab-indented-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
- [ ] tab-indented task
|
||||
`)
|
||||
|
||||
// when
|
||||
const progress = getPlanProgress(planPath)
|
||||
|
||||
// then
|
||||
expect(progress.total).toBe(1)
|
||||
expect(progress.completed).toBe(0)
|
||||
expect(progress.isComplete).toBe(false)
|
||||
})
|
||||
|
||||
test("should count mixed top-level checked and indented unchecked checkboxes", () => {
|
||||
// given - plan file with checked top-level and unchecked indented task
|
||||
const planPath = join(TEST_DIR, "mixed-indented-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
- [x] top-level completed task
|
||||
- [ ] nested unchecked task
|
||||
`)
|
||||
|
||||
// when
|
||||
const progress = getPlanProgress(planPath)
|
||||
|
||||
// then
|
||||
expect(progress.total).toBe(2)
|
||||
expect(progress.completed).toBe(1)
|
||||
expect(progress.isComplete).toBe(false)
|
||||
})
|
||||
|
||||
test("should count space-indented completed checkbox", () => {
|
||||
// given - plan file with a two-space indented completed checkbox
|
||||
const planPath = join(TEST_DIR, "indented-completed-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
- [x] indented completed task
|
||||
`)
|
||||
|
||||
// when
|
||||
const progress = getPlanProgress(planPath)
|
||||
|
||||
// then
|
||||
expect(progress.total).toBe(1)
|
||||
expect(progress.completed).toBe(1)
|
||||
expect(progress.isComplete).toBe(true)
|
||||
})
|
||||
|
||||
test("should return isComplete true when all checked", () => {
|
||||
// given - all tasks completed
|
||||
const planPath = join(TEST_DIR, "complete-plan.md")
|
||||
|
||||
@@ -121,8 +121,8 @@ export function getPlanProgress(planPath: string): PlanProgress {
|
||||
const content = readFileSync(planPath, "utf-8")
|
||||
|
||||
// Match markdown checkboxes: - [ ] or - [x] or - [X]
|
||||
const uncheckedMatches = content.match(/^[-*]\s*\[\s*\]/gm) || []
|
||||
const checkedMatches = content.match(/^[-*]\s*\[[xX]\]/gm) || []
|
||||
const uncheckedMatches = content.match(/^\s*[-*]\s*\[\s*\]/gm) || []
|
||||
const checkedMatches = content.match(/^\s*[-*]\s*\[[xX]\]/gm) || []
|
||||
|
||||
const total = uncheckedMatches.length + checkedMatches.length
|
||||
const completed = checkedMatches.length
|
||||
@@ -150,7 +150,8 @@ export function getPlanName(planPath: string): string {
|
||||
export function createBoulderState(
|
||||
planPath: string,
|
||||
sessionId: string,
|
||||
agent?: string
|
||||
agent?: string,
|
||||
worktreePath?: string,
|
||||
): BoulderState {
|
||||
return {
|
||||
active_plan: planPath,
|
||||
@@ -158,5 +159,6 @@ export function createBoulderState(
|
||||
session_ids: [sessionId],
|
||||
plan_name: getPlanName(planPath),
|
||||
...(agent !== undefined ? { agent } : {}),
|
||||
...(worktreePath !== undefined ? { worktree_path: worktreePath } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@ export interface BoulderState {
|
||||
plan_name: string
|
||||
/** Agent type to use when resuming (e.g., 'atlas') */
|
||||
agent?: string
|
||||
/** Absolute path to the git worktree root where work happens */
|
||||
worktree_path?: string
|
||||
}
|
||||
|
||||
export interface PlanProgress {
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
|
||||
|
||||
## ARGUMENTS
|
||||
|
||||
- \`/start-work [plan-name] [--worktree <path>]\`
|
||||
- \`plan-name\` (optional): name or partial match of the plan to start
|
||||
- \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
|
||||
- If specified and valid: hook pre-sets worktree_path in boulder.json
|
||||
- If specified but invalid: you must run \`git worktree add <path> <branch>\` first
|
||||
- If omitted: you MUST choose or create a worktree (see Worktree Setup below)
|
||||
|
||||
## WHAT TO DO
|
||||
|
||||
1. **Find available plans**: Search for Prometheus-generated plan files at \`.sisyphus/plans/\`
|
||||
@@ -15,17 +24,24 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
|
||||
- If ONE plan: auto-select it
|
||||
- If MULTIPLE plans: show list with timestamps, ask user to select
|
||||
|
||||
4. **Create/Update boulder.json**:
|
||||
4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json):
|
||||
1. \`git worktree list --porcelain\` — see available worktrees
|
||||
2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
|
||||
3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
|
||||
4. All work happens inside that worktree directory
|
||||
|
||||
5. **Create/Update boulder.json**:
|
||||
\`\`\`json
|
||||
{
|
||||
"active_plan": "/absolute/path/to/plan.md",
|
||||
"started_at": "ISO_TIMESTAMP",
|
||||
"session_ids": ["session_id_1", "session_id_2"],
|
||||
"plan_name": "plan-name"
|
||||
"plan_name": "plan-name",
|
||||
"worktree_path": "/absolute/path/to/git/worktree"
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
5. **Read the plan file** and start executing tasks according to atlas workflow
|
||||
6. **Read the plan file** and start executing tasks according to atlas workflow
|
||||
|
||||
## OUTPUT FORMAT
|
||||
|
||||
@@ -49,6 +65,7 @@ Resuming Work Session
|
||||
Active Plan: {plan-name}
|
||||
Progress: {completed}/{total} tasks
|
||||
Sessions: {count} (appending current session)
|
||||
Worktree: {worktree_path}
|
||||
|
||||
Reading plan and continuing from last incomplete task...
|
||||
\`\`\`
|
||||
@@ -60,6 +77,7 @@ Starting Work Session
|
||||
Plan: {plan-name}
|
||||
Session ID: {session_id}
|
||||
Started: {timestamp}
|
||||
Worktree: {worktree_path}
|
||||
|
||||
Reading plan and beginning execution...
|
||||
\`\`\`
|
||||
@@ -68,5 +86,6 @@ Reading plan and beginning execution...
|
||||
|
||||
- The session_id is injected by the hook - use it directly
|
||||
- Always update boulder.json BEFORE starting work
|
||||
- Always set worktree_path in boulder.json before executing any tasks
|
||||
- Read the FULL plan file before delegating any tasks
|
||||
- Follow atlas delegation protocols (7-section format)`
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/features/claude-tasks/ — Task Schema + Storage
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/features/mcp-oauth/ — OAuth 2.0 + PKCE + DCR for MCP Servers
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/features/opencode-skill-loader/ — 4-Scope Skill Discovery
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -162,7 +162,7 @@ describe("TaskToastManager", () => {
|
||||
description: "Task with category default model",
|
||||
agent: "sisyphus-junior",
|
||||
isBackground: false,
|
||||
modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const },
|
||||
modelInfo: { model: "google/gemini-3.1-pro", type: "category-default" as const },
|
||||
}
|
||||
|
||||
// when - addTask is called
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/features/tmux-subagent/ — Tmux Pane Management
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
# src/hooks/ — 44 Lifecycle Hooks
|
||||
# src/hooks/ — 46 Lifecycle Hooks
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
44 hooks across 39 directories + 6 standalone files. Three-tier composition: Core(35) + Continuation(7) + Skill(2). All hooks follow `createXXXHook(deps) → HookFunction` factory pattern.
|
||||
46 hooks across 39 directories + 6 standalone files. Three-tier composition: Core(37) + Continuation(7) + Skill(2). All hooks follow `createXXXHook(deps) → HookFunction` factory pattern.
|
||||
|
||||
## HOOK TIERS
|
||||
|
||||
### Tier 1: Session Hooks (22) — `create-session-hooks.ts`
|
||||
### Tier 1: Session Hooks (23) — `create-session-hooks.ts`
|
||||
## STRUCTURE
|
||||
```
|
||||
hooks/
|
||||
@@ -70,11 +70,12 @@ hooks/
|
||||
| questionLabelTruncator | tool.execute.before | Truncate long question labels |
|
||||
| taskResumeInfo | chat.message | Inject task context on resume |
|
||||
| anthropicEffort | chat.params | Adjust reasoning effort level |
|
||||
| jsonErrorRecovery | tool.execute.after | Detect JSON parse errors, inject correction reminder |
|
||||
| sisyphusGptHephaestusReminder | chat.message | Toast warning when Sisyphus uses GPT model |
|
||||
| taskReminder | tool.execute.after | Remind about task tools after 10 turns without usage |
|
||||
| modelFallback | chat.params | Provider-level model fallback on errors |
|
||||
| noSisyphusGpt | chat.message | Block Sisyphus from using GPT models (toast warning) |
|
||||
| noHephaestusNonGpt | chat.message | Block Hephaestus from using non-GPT models |
|
||||
| runtimeFallback | event | Auto-switch models on API provider errors |
|
||||
|
||||
### Tier 2: Tool Guard Hooks (9) — `create-tool-guard-hooks.ts`
|
||||
### Tier 2: Tool Guard Hooks (10) — `create-tool-guard-hooks.ts`
|
||||
|
||||
| Hook | Event | Purpose |
|
||||
|------|-------|---------|
|
||||
@@ -87,6 +88,7 @@ hooks/
|
||||
| tasksTodowriteDisabler | tool.execute.before | Disable TodoWrite when task system active |
|
||||
| writeExistingFileGuard | tool.execute.before | Require Read before Write on existing files |
|
||||
| hashlineReadEnhancer | tool.execute.after | Enhance Read output with line hashes |
|
||||
| jsonErrorRecovery | tool.execute.after | Detect JSON parse errors, inject correction reminder |
|
||||
|
||||
### Tier 3: Transform Hooks (4) — `create-transform-hooks.ts`
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/hooks/anthropic-context-window-limit-recovery/ — Multi-Strategy Context Recovery
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ export function getOrCreateRetryState(
|
||||
): RetryState {
|
||||
let state = autoCompactState.retryStateBySession.get(sessionID)
|
||||
if (!state) {
|
||||
state = { attempt: 0, lastAttemptTime: 0 }
|
||||
state = { attempt: 0, lastAttemptTime: 0, firstAttemptTime: 0 }
|
||||
autoCompactState.retryStateBySession.set(sessionID, state)
|
||||
}
|
||||
return state
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"
|
||||
import { runSummarizeRetryStrategy } from "./summarize-retry-strategy"
|
||||
import type { AutoCompactState, ParsedTokenLimitError, RetryState } from "./types"
|
||||
import type { OhMyOpenCodeConfig } from "../../config"
|
||||
|
||||
type TimeoutCall = {
|
||||
delay: number
|
||||
}
|
||||
|
||||
function createAutoCompactState(): AutoCompactState {
|
||||
return {
|
||||
pendingCompact: new Set<string>(),
|
||||
errorDataBySession: new Map<string, ParsedTokenLimitError>(),
|
||||
retryStateBySession: new Map<string, RetryState>(),
|
||||
truncateStateBySession: new Map(),
|
||||
emptyContentAttemptBySession: new Map(),
|
||||
compactionInProgress: new Set<string>(),
|
||||
}
|
||||
}
|
||||
|
||||
describe("runSummarizeRetryStrategy", () => {
|
||||
const sessionID = "ses_retry_timeout"
|
||||
const directory = "/tmp"
|
||||
let autoCompactState: AutoCompactState
|
||||
|
||||
const summarizeMock = mock(() => Promise.resolve())
|
||||
const showToastMock = mock(() => Promise.resolve())
|
||||
const client = {
|
||||
session: {
|
||||
summarize: summarizeMock,
|
||||
messages: mock(() => Promise.resolve({ data: [] })),
|
||||
promptAsync: mock(() => Promise.resolve()),
|
||||
revert: mock(() => Promise.resolve()),
|
||||
},
|
||||
tui: {
|
||||
showToast: showToastMock,
|
||||
},
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
autoCompactState = createAutoCompactState()
|
||||
summarizeMock.mockReset()
|
||||
showToastMock.mockReset()
|
||||
summarizeMock.mockResolvedValue(undefined)
|
||||
showToastMock.mockResolvedValue(undefined)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.setTimeout = originalSetTimeout
|
||||
})
|
||||
|
||||
const originalSetTimeout = globalThis.setTimeout
|
||||
|
||||
test("stops retries when total summarize timeout is exceeded", async () => {
|
||||
//#given
|
||||
autoCompactState.pendingCompact.add(sessionID)
|
||||
autoCompactState.errorDataBySession.set(sessionID, {
|
||||
currentTokens: 250000,
|
||||
maxTokens: 200000,
|
||||
errorType: "token_limit_exceeded",
|
||||
})
|
||||
autoCompactState.retryStateBySession.set(sessionID, {
|
||||
attempt: 1,
|
||||
lastAttemptTime: Date.now(),
|
||||
firstAttemptTime: Date.now() - 130000,
|
||||
})
|
||||
|
||||
//#when
|
||||
await runSummarizeRetryStrategy({
|
||||
sessionID,
|
||||
msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
|
||||
autoCompactState,
|
||||
client: client as never,
|
||||
directory,
|
||||
pluginConfig: {} as OhMyOpenCodeConfig,
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(summarizeMock).not.toHaveBeenCalled()
|
||||
expect(autoCompactState.pendingCompact.has(sessionID)).toBe(false)
|
||||
expect(autoCompactState.errorDataBySession.has(sessionID)).toBe(false)
|
||||
expect(autoCompactState.retryStateBySession.has(sessionID)).toBe(false)
|
||||
expect(showToastMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
body: expect.objectContaining({
|
||||
title: "Auto Compact Timed Out",
|
||||
}),
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
test("caps retry delay by remaining total timeout window", async () => {
|
||||
//#given
|
||||
const timeoutCalls: TimeoutCall[] = []
|
||||
globalThis.setTimeout = ((_: (...args: unknown[]) => void, delay?: number) => {
|
||||
timeoutCalls.push({ delay: delay ?? 0 })
|
||||
return 1 as unknown as ReturnType<typeof setTimeout>
|
||||
}) as typeof setTimeout
|
||||
|
||||
autoCompactState.retryStateBySession.set(sessionID, {
|
||||
attempt: 1,
|
||||
lastAttemptTime: Date.now(),
|
||||
firstAttemptTime: Date.now() - 119700,
|
||||
})
|
||||
summarizeMock.mockRejectedValueOnce(new Error("rate limited"))
|
||||
|
||||
//#when
|
||||
await runSummarizeRetryStrategy({
|
||||
sessionID,
|
||||
msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
|
||||
autoCompactState,
|
||||
client: client as never,
|
||||
directory,
|
||||
pluginConfig: {} as OhMyOpenCodeConfig,
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(timeoutCalls.length).toBe(1)
|
||||
expect(timeoutCalls[0]!.delay).toBeGreaterThan(0)
|
||||
expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(500)
|
||||
})
|
||||
})
|
||||
@@ -7,6 +7,8 @@ import { sanitizeEmptyMessagesBeforeSummarize } from "./message-builder"
|
||||
import { fixEmptyMessages } from "./empty-content-recovery"
|
||||
|
||||
import { resolveCompactionModel } from "../shared/compaction-model-resolver"
|
||||
|
||||
const SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS = 120_000
|
||||
export async function runSummarizeRetryStrategy(params: {
|
||||
sessionID: string
|
||||
msg: Record<string, unknown>
|
||||
@@ -18,6 +20,27 @@ export async function runSummarizeRetryStrategy(params: {
|
||||
messageIndex?: number
|
||||
}): Promise<void> {
|
||||
const retryState = getOrCreateRetryState(params.autoCompactState, params.sessionID)
|
||||
const now = Date.now()
|
||||
|
||||
if (retryState.firstAttemptTime === 0) {
|
||||
retryState.firstAttemptTime = now
|
||||
}
|
||||
|
||||
const elapsedTimeMs = now - retryState.firstAttemptTime
|
||||
if (elapsedTimeMs >= SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS) {
|
||||
clearSessionState(params.autoCompactState, params.sessionID)
|
||||
await params.client.tui
|
||||
.showToast({
|
||||
body: {
|
||||
title: "Auto Compact Timed Out",
|
||||
message: "Compaction retries exceeded the timeout window. Please start a new session.",
|
||||
variant: "error",
|
||||
duration: 5000,
|
||||
},
|
||||
})
|
||||
.catch(() => {})
|
||||
return
|
||||
}
|
||||
|
||||
if (params.errorType?.includes("non-empty content")) {
|
||||
const attempt = getEmptyContentAttempt(params.autoCompactState, params.sessionID)
|
||||
@@ -52,6 +75,7 @@ export async function runSummarizeRetryStrategy(params: {
|
||||
|
||||
if (Date.now() - retryState.lastAttemptTime > 300000) {
|
||||
retryState.attempt = 0
|
||||
retryState.firstAttemptTime = Date.now()
|
||||
params.autoCompactState.truncateStateBySession.delete(params.sessionID)
|
||||
}
|
||||
|
||||
@@ -92,10 +116,26 @@ export async function runSummarizeRetryStrategy(params: {
|
||||
})
|
||||
return
|
||||
} catch {
|
||||
const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime)
|
||||
if (remainingTimeMs <= 0) {
|
||||
clearSessionState(params.autoCompactState, params.sessionID)
|
||||
await params.client.tui
|
||||
.showToast({
|
||||
body: {
|
||||
title: "Auto Compact Timed Out",
|
||||
message: "Compaction retries exceeded the timeout window. Please start a new session.",
|
||||
variant: "error",
|
||||
duration: 5000,
|
||||
},
|
||||
})
|
||||
.catch(() => {})
|
||||
return
|
||||
}
|
||||
|
||||
const delay =
|
||||
RETRY_CONFIG.initialDelayMs *
|
||||
Math.pow(RETRY_CONFIG.backoffFactor, retryState.attempt - 1)
|
||||
const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs)
|
||||
const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs, remainingTimeMs)
|
||||
|
||||
setTimeout(() => {
|
||||
void runSummarizeRetryStrategy(params)
|
||||
|
||||
@@ -11,6 +11,7 @@ export interface ParsedTokenLimitError {
|
||||
export interface RetryState {
|
||||
attempt: number
|
||||
lastAttemptTime: number
|
||||
firstAttemptTime: number
|
||||
}
|
||||
|
||||
export interface TruncateState {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/hooks/atlas/ — Master Boulder Orchestrator
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ export async function injectBoulderContinuation(input: {
|
||||
remaining: number
|
||||
total: number
|
||||
agent?: string
|
||||
worktreePath?: string
|
||||
backgroundManager?: BackgroundManager
|
||||
sessionState: SessionState
|
||||
}): Promise<void> {
|
||||
@@ -24,6 +25,7 @@ export async function injectBoulderContinuation(input: {
|
||||
remaining,
|
||||
total,
|
||||
agent,
|
||||
worktreePath,
|
||||
backgroundManager,
|
||||
sessionState,
|
||||
} = input
|
||||
@@ -37,9 +39,11 @@ export async function injectBoulderContinuation(input: {
|
||||
return
|
||||
}
|
||||
|
||||
const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
|
||||
const prompt =
|
||||
BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
|
||||
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]`
|
||||
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
|
||||
worktreeContext
|
||||
|
||||
try {
|
||||
log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining })
|
||||
@@ -62,6 +66,7 @@ export async function injectBoulderContinuation(input: {
|
||||
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
|
||||
} catch (err) {
|
||||
sessionState.promptFailureCount += 1
|
||||
sessionState.lastFailureAt = Date.now()
|
||||
log(`[${HOOK_NAME}] Boulder continuation failed`, {
|
||||
sessionID,
|
||||
error: String(err),
|
||||
|
||||
@@ -10,6 +10,7 @@ import { getLastAgentFromSession } from "./session-last-agent"
|
||||
import type { AtlasHookOptions, SessionState } from "./types"
|
||||
|
||||
const CONTINUATION_COOLDOWN_MS = 5000
|
||||
const FAILURE_BACKOFF_MS = 5 * 60 * 1000
|
||||
|
||||
export function createAtlasEventHandler(input: {
|
||||
ctx: PluginInput
|
||||
@@ -53,6 +54,7 @@ export function createAtlasEventHandler(input: {
|
||||
}
|
||||
|
||||
const state = getState(sessionID)
|
||||
const now = Date.now()
|
||||
|
||||
if (state.lastEventWasAbortError) {
|
||||
state.lastEventWasAbortError = false
|
||||
@@ -61,11 +63,18 @@ export function createAtlasEventHandler(input: {
|
||||
}
|
||||
|
||||
if (state.promptFailureCount >= 2) {
|
||||
log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, {
|
||||
sessionID,
|
||||
promptFailureCount: state.promptFailureCount,
|
||||
})
|
||||
return
|
||||
const timeSinceLastFailure = state.lastFailureAt !== undefined ? now - state.lastFailureAt : Number.POSITIVE_INFINITY
|
||||
if (timeSinceLastFailure < FAILURE_BACKOFF_MS) {
|
||||
log(`[${HOOK_NAME}] Skipped: continuation in backoff after repeated failures`, {
|
||||
sessionID,
|
||||
promptFailureCount: state.promptFailureCount,
|
||||
backoffRemaining: FAILURE_BACKOFF_MS - timeSinceLastFailure,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
state.promptFailureCount = 0
|
||||
state.lastFailureAt = undefined
|
||||
}
|
||||
|
||||
const backgroundManager = options?.backgroundManager
|
||||
@@ -92,17 +101,15 @@ export function createAtlasEventHandler(input: {
|
||||
const lastAgentKey = getAgentConfigKey(lastAgent ?? "")
|
||||
const requiredAgent = getAgentConfigKey(boulderState.agent ?? "atlas")
|
||||
const lastAgentMatchesRequired = lastAgentKey === requiredAgent
|
||||
const boulderAgentWasNotExplicitlySet = boulderState.agent === undefined
|
||||
const boulderAgentDefaultsToAtlas = requiredAgent === "atlas"
|
||||
const lastAgentIsSisyphus = lastAgentKey === "sisyphus"
|
||||
const allowSisyphusWhenDefaultAtlas = boulderAgentWasNotExplicitlySet && boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
|
||||
const agentMatches = lastAgentMatchesRequired || allowSisyphusWhenDefaultAtlas
|
||||
const allowSisyphusForAtlasBoulder = boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
|
||||
const agentMatches = lastAgentMatchesRequired || allowSisyphusForAtlasBoulder
|
||||
if (!agentMatches) {
|
||||
log(`[${HOOK_NAME}] Skipped: last agent does not match boulder agent`, {
|
||||
sessionID,
|
||||
lastAgent: lastAgent ?? "unknown",
|
||||
requiredAgent,
|
||||
boulderAgentExplicitlySet: boulderState.agent !== undefined,
|
||||
})
|
||||
return
|
||||
}
|
||||
@@ -113,7 +120,6 @@ export function createAtlasEventHandler(input: {
|
||||
return
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
if (state.lastContinuationInjectedAt && now - state.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) {
|
||||
log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, {
|
||||
sessionID,
|
||||
@@ -132,6 +138,7 @@ export function createAtlasEventHandler(input: {
|
||||
remaining,
|
||||
total: progress.total,
|
||||
agent: boulderState.agent,
|
||||
worktreePath: boulderState.worktree_path,
|
||||
backgroundManager,
|
||||
sessionState: state,
|
||||
})
|
||||
|
||||
@@ -933,8 +933,8 @@ describe("atlas hook", () => {
|
||||
expect(callArgs.body.parts[0].text).toContain("2 remaining")
|
||||
})
|
||||
|
||||
test("should not inject when last agent does not match boulder agent", async () => {
|
||||
// given - boulder state with incomplete plan, but last agent does NOT match
|
||||
test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
|
||||
// given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
@@ -947,7 +947,7 @@ describe("atlas hook", () => {
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
// given - last agent is NOT the boulder agent
|
||||
// given - last agent is sisyphus (typical state right after /start-work)
|
||||
cleanupMessageStorage(MAIN_SESSION_ID)
|
||||
setupMessageStorage(MAIN_SESSION_ID, "sisyphus")
|
||||
|
||||
@@ -962,7 +962,39 @@ describe("atlas hook", () => {
|
||||
},
|
||||
})
|
||||
|
||||
// then - should NOT call prompt because agent does not match
|
||||
// then - should call prompt because sisyphus is always allowed for atlas boulders
|
||||
expect(mockInput._promptMock).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should not inject when last agent is non-sisyphus and does not match boulder agent", async () => {
|
||||
// given - boulder explicitly set to atlas, last agent is hephaestus (unrelated agent)
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
agent: "atlas",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
cleanupMessageStorage(MAIN_SESSION_ID)
|
||||
setupMessageStorage(MAIN_SESSION_ID, "hephaestus")
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
// when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// then - should NOT call prompt because hephaestus does not match atlas or sisyphus
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
@@ -1122,6 +1154,144 @@ describe("atlas hook", () => {
|
||||
}
|
||||
})
|
||||
|
||||
test("should keep skipping continuation during 5-minute backoff after 2 consecutive failures", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
const originalDateNow = Date.now
|
||||
let now = 0
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - third idle occurs inside 5-minute backoff window
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 60000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - third attempt should still be skipped
|
||||
expect(promptMock).toHaveBeenCalledTimes(2)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should retry continuation after 5-minute backoff expires following 2 consecutive failures", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
const originalDateNow = Date.now
|
||||
let now = 0
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - third idle occurs after 5+ minutes
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 300000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - third attempt should run after backoff expiration
|
||||
expect(promptMock).toHaveBeenCalledTimes(3)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should reset prompt failure counter after successful retry beyond backoff window", async () => {
|
||||
//#given - boulder state with incomplete plan and success on first retry after backoff
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.resolve(undefined))
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
const originalDateNow = Date.now
|
||||
let now = 0
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - fail twice, recover after backoff with success, then fail twice again
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 300000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - success retry resets counter, so two additional failures are allowed before skip
|
||||
expect(promptMock).toHaveBeenCalledTimes(5)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should reset continuation failure state on session.compacted event", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
|
||||
@@ -26,4 +26,5 @@ export interface SessionState {
|
||||
lastEventWasAbortError?: boolean
|
||||
lastContinuationInjectedAt?: number
|
||||
promptFailureCount: number
|
||||
lastFailureAt?: number
|
||||
}
|
||||
|
||||
@@ -9,6 +9,14 @@ interface EventInput {
|
||||
event: Event
|
||||
}
|
||||
|
||||
interface ChatMessageInput {
|
||||
sessionID: string
|
||||
}
|
||||
|
||||
interface ChatMessageOutput {
|
||||
parts: Array<{ type: string; text?: string; [key: string]: unknown }>
|
||||
}
|
||||
|
||||
/**
|
||||
* Background notification hook - handles event routing to BackgroundManager.
|
||||
*
|
||||
@@ -20,7 +28,15 @@ export function createBackgroundNotificationHook(manager: BackgroundManager) {
|
||||
manager.handleEvent(event)
|
||||
}
|
||||
|
||||
const chatMessageHandler = async (
|
||||
input: ChatMessageInput,
|
||||
output: ChatMessageOutput,
|
||||
): Promise<void> => {
|
||||
manager.injectPendingNotificationsIntoChatMessage(output, input.sessionID)
|
||||
}
|
||||
|
||||
return {
|
||||
"chat.message": chatMessageHandler,
|
||||
event: eventHandler,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/hooks/claude-code-hooks/ — Claude Code Compatibility
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
import { OMO_SESSION_PREFIX, buildSessionReminderMessage } from "./constants";
|
||||
import type { InteractiveBashSessionState } from "./types";
|
||||
import { subagentSessions } from "../../features/claude-code-session-state";
|
||||
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
|
||||
|
||||
type AbortSession = (args: { path: { id: string } }) => Promise<unknown>
|
||||
|
||||
@@ -19,7 +20,7 @@ async function killAllTrackedSessions(
|
||||
): Promise<void> {
|
||||
for (const sessionName of state.tmuxSessions) {
|
||||
try {
|
||||
const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], {
|
||||
const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
|
||||
stdout: "ignore",
|
||||
stderr: "ignore",
|
||||
})
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { InteractiveBashSessionState } from "./types";
|
||||
import { loadInteractiveBashSessionState } from "./storage";
|
||||
import { OMO_SESSION_PREFIX } from "./constants";
|
||||
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
|
||||
|
||||
export function getOrCreateState(sessionID: string, sessionStates: Map<string, InteractiveBashSessionState>): InteractiveBashSessionState {
|
||||
if (!sessionStates.has(sessionID)) {
|
||||
@@ -24,7 +25,7 @@ export async function killAllTrackedSessions(
|
||||
): Promise<void> {
|
||||
for (const sessionName of state.tmuxSessions) {
|
||||
try {
|
||||
const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], {
|
||||
const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
|
||||
stdout: "ignore",
|
||||
stderr: "ignore",
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# src/hooks/keyword-detector/ — Mode Keyword Injection
|
||||
|
||||
**Generated:** 2026-02-21
|
||||
**Generated:** 2026-02-24
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user