Compare commits
135 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f0b5835459 | ||
|
|
2a495c2e8d | ||
|
|
0edb87b1c1 | ||
|
|
cca057dc0f | ||
|
|
e000a3bb0d | ||
|
|
c19fc4ba22 | ||
|
|
e0de06851d | ||
|
|
26ac413dd9 | ||
|
|
81c912cf04 | ||
|
|
9c348db450 | ||
|
|
2993b3255d | ||
|
|
0b77e2def0 | ||
|
|
bfa8fa2378 | ||
|
|
6ee680af99 | ||
|
|
d327334ded | ||
|
|
07d120a78d | ||
|
|
8b7b1c843a | ||
|
|
a1786f469d | ||
|
|
da77d8addf | ||
|
|
971912e065 | ||
|
|
af301ab29a | ||
|
|
984464470c | ||
|
|
535ecee318 | ||
|
|
32035d153e | ||
|
|
a0649616bf | ||
|
|
cb12b286c8 | ||
|
|
8e239e134c | ||
|
|
733676f1a9 | ||
|
|
d2e566ba9d | ||
|
|
6da4d2dae0 | ||
|
|
3b41191980 | ||
|
|
0b614b751c | ||
|
|
c56a01c15d | ||
|
|
d2d48fc9ff | ||
|
|
41a43c62fc | ||
|
|
cea8769a7f | ||
|
|
7fa2417c42 | ||
|
|
4bba924dad | ||
|
|
e691303919 | ||
|
|
d4aee20743 | ||
|
|
bad70f5e24 | ||
|
|
b9fa2a3ebc | ||
|
|
0e7bd595f8 | ||
|
|
0732cb85f9 | ||
|
|
500784a9b9 | ||
|
|
5e856b4fde | ||
|
|
03dc903e8e | ||
|
|
69d0b23ab6 | ||
|
|
ee8735cd2c | ||
|
|
d8fe61131c | ||
|
|
935995d270 | ||
|
|
23d8b88c4a | ||
|
|
b4285ce565 | ||
|
|
f9d354b63e | ||
|
|
370eb945ee | ||
|
|
6387065e6f | ||
|
|
bebdb97c21 | ||
|
|
b5e2ead4e1 | ||
|
|
91922dae36 | ||
|
|
cb3d8af995 | ||
|
|
0fb3e2063a | ||
|
|
b37b877c45 | ||
|
|
f854246d7f | ||
|
|
f1eaa7bf9b | ||
|
|
ed9b4a6329 | ||
|
|
a00a22ac4c | ||
|
|
8879581fc1 | ||
|
|
230ce835e5 | ||
|
|
10e56badb3 | ||
|
|
cddf78434c | ||
|
|
0078b736b9 | ||
|
|
6d7f69625b | ||
|
|
fda17dd161 | ||
|
|
c41d6fd912 | ||
|
|
6e9128e060 | ||
|
|
92509d8cfb | ||
|
|
331f7ec52b | ||
|
|
4ba2da7ebb | ||
|
|
f95d3b1ef5 | ||
|
|
d5d7c7dd26 | ||
|
|
6a56c0e241 | ||
|
|
94c234c88c | ||
|
|
2ab976c511 | ||
|
|
dc66088483 | ||
|
|
67b5f46a7c | ||
|
|
0e483d27ac | ||
|
|
f5eaa648e9 | ||
|
|
4c4760a4ee | ||
|
|
7f20dd6ff5 | ||
|
|
de371be236 | ||
|
|
f3c2138ef4 | ||
|
|
0810e37240 | ||
|
|
a64e364fa6 | ||
|
|
d886ac701f | ||
|
|
30dc50d880 | ||
|
|
b17e633464 | ||
|
|
eec268ee42 | ||
|
|
363661c0d6 | ||
|
|
261bbdf4dc | ||
|
|
8aec4c5cb3 | ||
|
|
16cbc847ac | ||
|
|
436ce71dc8 | ||
|
|
d2a49428b9 | ||
|
|
c3b23bf603 | ||
|
|
50094de73e | ||
|
|
3aa2748c04 | ||
|
|
ccaf759b6b | ||
|
|
521a1f76a9 | ||
|
|
490f0f2090 | ||
|
|
caf595e727 | ||
|
|
1f64a45113 | ||
|
|
9b2dc2189c | ||
|
|
071fab1618 | ||
|
|
f6c24e42af | ||
|
|
22fd976eb9 | ||
|
|
826284f3d9 | ||
|
|
3c7e6a3940 | ||
|
|
33ef4db502 | ||
|
|
458ec06b0e | ||
|
|
6b66f69433 | ||
|
|
ce8957e1e1 | ||
|
|
a3db64b931 | ||
|
|
8859da5fef | ||
|
|
23c0ff60f2 | ||
|
|
4723319eef | ||
|
|
b8f3186d65 | ||
|
|
01e18f8773 | ||
|
|
1669c83782 | ||
|
|
8adf6a2c47 | ||
|
|
5c6194372e | ||
|
|
399796cbe4 | ||
|
|
77c3ed1a1f | ||
|
|
82e25c845b | ||
|
|
c644930753 | ||
|
|
b79df5e018 |
BIN
.github/assets/building-in-public.png
vendored
Normal file
BIN
.github/assets/building-in-public.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 278 KiB |
6
.github/workflows/publish.yml
vendored
6
.github/workflows/publish.yml
vendored
@@ -57,6 +57,7 @@ jobs:
|
||||
bun test src/cli/doctor/format-default.test.ts
|
||||
bun test src/tools/call-omo-agent/sync-executor.test.ts
|
||||
bun test src/tools/call-omo-agent/session-creator.test.ts
|
||||
bun test src/tools/session-manager
|
||||
bun test src/features/opencode-skill-loader/loader.test.ts
|
||||
bun test src/hooks/anthropic-context-window-limit-recovery/recovery-hook.test.ts
|
||||
bun test src/hooks/anthropic-context-window-limit-recovery/executor.test.ts
|
||||
@@ -66,9 +67,8 @@ jobs:
|
||||
# Enumerate subdirectories/files explicitly to EXCLUDE mock-heavy files
|
||||
# that were already run in isolation above.
|
||||
# Excluded from src/cli: doctor/formatter.test.ts, doctor/format-default.test.ts
|
||||
# Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts
|
||||
# Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts, session-manager (all)
|
||||
# Excluded from src/hooks/anthropic-context-window-limit-recovery: recovery-hook.test.ts, executor.test.ts
|
||||
# Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts
|
||||
bun test bin script src/config src/mcp src/index.test.ts \
|
||||
src/agents src/shared \
|
||||
src/cli/run src/cli/config-manager src/cli/mcp-oauth \
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
src/cli/doctor/runner.test.ts src/cli/doctor/checks \
|
||||
src/tools/ast-grep src/tools/background-task src/tools/delegate-task \
|
||||
src/tools/glob src/tools/grep src/tools/interactive-bash \
|
||||
src/tools/look-at src/tools/lsp src/tools/session-manager \
|
||||
src/tools/look-at src/tools/lsp \
|
||||
src/tools/skill src/tools/skill-mcp src/tools/slashcommand src/tools/task \
|
||||
src/tools/call-omo-agent/background-agent-executor.test.ts \
|
||||
src/tools/call-omo-agent/background-executor.test.ts \
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -36,3 +36,4 @@ test-injection/
|
||||
notepad.md
|
||||
oauth-success.html
|
||||
*.bun-build
|
||||
.omx/
|
||||
|
||||
@@ -79,47 +79,65 @@ Pass `REPO`, `REPORT_DIR`, and `COMMIT_SHA` to every subagent.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Fetch All Open Items
|
||||
---
|
||||
|
||||
<fetch>
|
||||
Paginate if 500 results returned.
|
||||
## Phase 1: Fetch All Open Items (CORRECTED)
|
||||
|
||||
**IMPORTANT:** `body` and `comments` fields may contain control characters that break jq parsing. Fetch basic metadata first, then fetch full details per-item in subagents.
|
||||
|
||||
```bash
|
||||
ISSUES=$(gh issue list --repo $REPO --state open --limit 500 \
|
||||
--json number,title,state,createdAt,updatedAt,labels,author,body,comments)
|
||||
ISSUE_LEN=$(echo "$ISSUES" | jq length)
|
||||
if [ "$ISSUE_LEN" -eq 500 ]; then
|
||||
LAST_DATE=$(echo "$ISSUES" | jq -r '.[-1].createdAt')
|
||||
# Step 1: Fetch basic metadata (without body/comments to avoid JSON parsing issues)
|
||||
ISSUES_LIST=$(gh issue list --repo $REPO --state open --limit 500 \
|
||||
--json number,title,labels,author,createdAt)
|
||||
ISSUE_COUNT=$(echo "$ISSUES_LIST" | jq length)
|
||||
|
||||
# Paginate if needed
|
||||
if [ "$ISSUE_COUNT" -eq 500 ]; then
|
||||
LAST_DATE=$(echo "$ISSUES_LIST" | jq -r '.[-1].createdAt')
|
||||
while true; do
|
||||
PAGE=$(gh issue list --repo $REPO --state open --limit 500 \
|
||||
--search "created:<$LAST_DATE" \
|
||||
--json number,title,state,createdAt,updatedAt,labels,author,body,comments)
|
||||
PAGE_LEN=$(echo "$PAGE" | jq length)
|
||||
[ "$PAGE_LEN" -eq 0 ] && break
|
||||
ISSUES=$(echo "[$ISSUES, $PAGE]" | jq -s 'add | unique_by(.number)')
|
||||
[ "$PAGE_LEN" -lt 500 ] && break
|
||||
--json number,title,labels,author,createdAt)
|
||||
PAGE_COUNT=$(echo "$PAGE" | jq length)
|
||||
[ "$PAGE_COUNT" -eq 0 ] && break
|
||||
ISSUES_LIST=$(echo "$ISSUES_LIST" "$PAGE" | jq -s '.[0] + .[1] | unique_by(.number)')
|
||||
ISSUE_COUNT=$(echo "$ISSUES_LIST" | jq length)
|
||||
[ "$PAGE_COUNT" -lt 500 ] && break
|
||||
LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt')
|
||||
done
|
||||
fi
|
||||
|
||||
PRS=$(gh pr list --repo $REPO --state open --limit 500 \
|
||||
--json number,title,state,createdAt,updatedAt,labels,author,body,headRefName,baseRefName,isDraft,mergeable,reviewDecision,statusCheckRollup)
|
||||
PR_LEN=$(echo "$PRS" | jq length)
|
||||
if [ "$PR_LEN" -eq 500 ]; then
|
||||
LAST_DATE=$(echo "$PRS" | jq -r '.[-1].createdAt')
|
||||
# Same for PRs
|
||||
PRS_LIST=$(gh pr list --repo $REPO --state open --limit 500 \
|
||||
--json number,title,labels,author,headRefName,baseRefName,isDraft,createdAt)
|
||||
PR_COUNT=$(echo "$PRS_LIST" | jq length)
|
||||
|
||||
if [ "$PR_COUNT" -eq 500 ]; then
|
||||
LAST_DATE=$(echo "$PRS_LIST" | jq -r '.[-1].createdAt')
|
||||
while true; do
|
||||
PAGE=$(gh pr list --repo $REPO --state open --limit 500 \
|
||||
--search "created:<$LAST_DATE" \
|
||||
--json number,title,state,createdAt,updatedAt,labels,author,body,headRefName,baseRefName,isDraft,mergeable,reviewDecision,statusCheckRollup)
|
||||
PAGE_LEN=$(echo "$PAGE" | jq length)
|
||||
[ "$PAGE_LEN" -eq 0 ] && break
|
||||
PRS=$(echo "[$PRS, $PAGE]" | jq -s 'add | unique_by(.number)')
|
||||
[ "$PAGE_LEN" -lt 500 ] && break
|
||||
--json number,title,labels,author,headRefName,baseRefName,isDraft,createdAt)
|
||||
PAGE_COUNT=$(echo "$PAGE" | jq length)
|
||||
[ "$PAGE_COUNT" -eq 0 ] && break
|
||||
PRS_LIST=$(echo "$PRS_LIST" "$PAGE" | jq -s '.[0] + .[1] | unique_by(.number)')
|
||||
PR_COUNT=$(echo "$PRS_LIST" | jq length)
|
||||
[ "$PAGE_COUNT" -lt 500 ] && break
|
||||
LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt')
|
||||
done
|
||||
fi
|
||||
|
||||
echo "Total issues: $ISSUE_COUNT, Total PRs: $PR_COUNT"
|
||||
```
|
||||
</fetch>
|
||||
|
||||
**LARGE REPOSITORY HANDLING:**
|
||||
If total items exceeds 50, you MUST process ALL items. Use the pagination code above to fetch every single open issue and PR.
|
||||
**DO NOT** sample or limit to 50 items - process the entire backlog.
|
||||
|
||||
Example: If there are 500 open issues, spawn 500 subagents. If there are 1000 open PRs, spawn 1000 subagents.
|
||||
|
||||
**Note:** Background task system will queue excess tasks automatically.
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
10
AGENTS.md
10
AGENTS.md
@@ -4,7 +4,7 @@
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
OpenCode plugin (npm: `oh-my-opencode`) that extends Claude Code (OpenCode fork) with multi-agent orchestration, 46 lifecycle hooks, 26 tools, skill/command/MCP systems, and Claude Code compatibility. 1268 TypeScript files, 160k LOC.
|
||||
OpenCode plugin (npm: `oh-my-opencode`) that extends Claude Code (OpenCode fork) with multi-agent orchestration, 48 lifecycle hooks, 26 tools, skill/command/MCP systems, and Claude Code compatibility. 1268 TypeScript files, 160k LOC.
|
||||
|
||||
## STRUCTURE
|
||||
|
||||
@@ -14,14 +14,14 @@ oh-my-opencode/
|
||||
│ ├── index.ts # Plugin entry: loadConfig → createManagers → createTools → createHooks → createPluginInterface
|
||||
│ ├── plugin-config.ts # JSONC multi-level config: user → project → defaults (Zod v4)
|
||||
│ ├── agents/ # 11 agents (Sisyphus, Hephaestus, Oracle, Librarian, Explore, Atlas, Prometheus, Metis, Momus, Multimodal-Looker, Sisyphus-Junior)
|
||||
│ ├── hooks/ # 46 hooks across 45 directories + 11 standalone files
|
||||
│ ├── hooks/ # 48 lifecycle hooks across dedicated modules and standalone files
|
||||
│ ├── tools/ # 26 tools across 15 directories
|
||||
│ ├── features/ # 19 feature modules (background-agent, skill-loader, tmux, MCP-OAuth, etc.)
|
||||
│ ├── shared/ # 95+ utility files in 13 categories
|
||||
│ ├── config/ # Zod v4 schema system (24 files)
|
||||
│ ├── cli/ # CLI: install, run, doctor, mcp-oauth (Commander.js)
|
||||
│ ├── mcp/ # 3 built-in remote MCPs (websearch, context7, grep_app)
|
||||
│ ├── plugin/ # 8 OpenCode hook handlers + 46 hook composition
|
||||
│ ├── plugin/ # 8 OpenCode hook handlers + 48 hook composition
|
||||
│ └── plugin-handlers/ # 6-phase config loading pipeline
|
||||
├── packages/ # Monorepo: cli-runner, 12 platform binaries
|
||||
└── local-ignore/ # Dev-only test fixtures
|
||||
@@ -34,7 +34,7 @@ OhMyOpenCodePlugin(ctx)
|
||||
├─→ loadPluginConfig() # JSONC parse → project/user merge → Zod validate → migrate
|
||||
├─→ createManagers() # TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler
|
||||
├─→ createTools() # SkillContext + AvailableCategories + ToolRegistry (26 tools)
|
||||
├─→ createHooks() # 3-tier: Core(37) + Continuation(7) + Skill(2) = 46 hooks
|
||||
├─→ createHooks() # 3-tier: Core(39) + Continuation(7) + Skill(2) = 48 hooks
|
||||
└─→ createPluginInterface() # 8 OpenCode hook handlers → PluginInterface
|
||||
```
|
||||
|
||||
@@ -97,7 +97,7 @@ Fields: agents (14 overridable, 21 fields each), categories (8 built-in + custom
|
||||
- **Test pattern**: Bun test (`bun:test`), co-located `*.test.ts`, given/when/then style (nested describe with `#given`/`#when`/`#then` prefixes)
|
||||
- **CI test split**: mock-heavy tests run in isolation (separate `bun test` processes), rest in batch
|
||||
- **Factory pattern**: `createXXX()` for all tools, hooks, agents
|
||||
- **Hook tiers**: Session (23) → Tool-Guard (10) → Transform (4) → Continuation (7) → Skill (2)
|
||||
- **Hook tiers**: Session (23) → Tool-Guard (12) → Transform (4) → Continuation (7) → Skill (2)
|
||||
- **Agent modes**: `primary` (respects UI model) vs `subagent` (own fallback chain) vs `all`
|
||||
- **Model resolution**: 4-step: override → category-default → provider-fallback → system-default
|
||||
- **Config format**: JSONC with comments, Zod v4 validation, snake_case keys
|
||||
|
||||
11
README.ja.md
11
README.ja.md
@@ -4,6 +4,17 @@
|
||||
> コアメンテナーのQが負傷したため、今週は Issue/PR への返信とリリースが遅れる可能性があります。
|
||||
> ご理解とご支援に感謝します。
|
||||
|
||||
> [!TIP]
|
||||
> **Building in Public**
|
||||
>
|
||||
> メンテナーが Jobdori を使い、oh-my-opencode をリアルタイムで開発・メンテナンスしています。Jobdori は OpenClaw をベースに大幅カスタマイズされた AI アシスタントです。
|
||||
> すべての機能開発、修正、Issue トリアージを Discord でライブでご覧いただけます。
|
||||
>
|
||||
> [](https://discord.gg/PUwSMR9XNk)
|
||||
>
|
||||
> [**→ #building-in-public で確認する**](https://discord.gg/PUwSMR9XNk)
|
||||
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> [](https://sisyphuslabs.ai)
|
||||
|
||||
11
README.ko.md
11
README.ko.md
@@ -4,6 +4,17 @@
|
||||
> 핵심 메인테이너 Q가 부상을 입어, 이번 주에는 이슈/PR 응답 및 릴리스가 지연될 수 있습니다.
|
||||
> 양해와 응원에 감사드립니다.
|
||||
|
||||
> [!TIP]
|
||||
> **Building in Public**
|
||||
>
|
||||
> 메인테이너가 Jobdori를 통해 oh-my-opencode를 실시간으로 개발하고 있습니다. Jobdori는 OpenClaw를 기반으로 대폭 커스터마이징된 AI 어시스턴트입니다.
|
||||
> 모든 기능 개발, 버그 수정, 이슈 트리아지를 Discord에서 실시간으로 확인하세요.
|
||||
>
|
||||
> [](https://discord.gg/PUwSMR9XNk)
|
||||
>
|
||||
> [**→ #building-in-public에서 확인하기**](https://discord.gg/PUwSMR9XNk)
|
||||
|
||||
|
||||
> [!TIP]
|
||||
> 저희와 함께 하세요!
|
||||
>
|
||||
|
||||
14
README.md
14
README.md
@@ -1,3 +1,13 @@
|
||||
> [!TIP]
|
||||
> **Building in Public**
|
||||
>
|
||||
> The maintainer builds and maintains oh-my-opencode in real-time with Jobdori, an AI assistant built on a heavily customized fork of OpenClaw.
|
||||
> Every feature, every fix, every issue triage — live in our Discord.
|
||||
>
|
||||
> [](https://discord.gg/PUwSMR9XNk)
|
||||
>
|
||||
> [**→ Watch it happen in #building-in-public**](https://discord.gg/PUwSMR9XNk)
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> [](https://sisyphuslabs.ai)
|
||||
@@ -304,7 +314,7 @@ See full [Features Documentation](docs/reference/features.md).
|
||||
- **Claude Code Compatibility**: Full hook system, commands, skills, agents, MCPs
|
||||
- **Built-in MCPs**: websearch (Exa), context7 (docs), grep_app (GitHub search)
|
||||
- **Session Tools**: List, read, search, and analyze session history
|
||||
- **Productivity Features**: Ralph Loop, Todo Enforcer, GPT permission-tail continuation, Comment Checker, Think Mode, and more
|
||||
- **Productivity Features**: Ralph Loop, Todo Enforcer, Comment Checker, Think Mode, and more
|
||||
- **Model Setup**: Agent-model matching is built into the [Installation Guide](docs/guide/installation.md#step-5-understand-your-model-setup)
|
||||
|
||||
## Configuration
|
||||
@@ -321,7 +331,7 @@ See [Configuration Documentation](docs/reference/configuration.md).
|
||||
- **Sisyphus Agent**: Main orchestrator with Prometheus (Planner) and Metis (Plan Consultant)
|
||||
- **Background Tasks**: Configure concurrency limits per provider/model
|
||||
- **Categories**: Domain-specific task delegation (`visual`, `business-logic`, custom)
|
||||
- **Hooks**: 25+ built-in hooks, including `gpt-permission-continuation`, all configurable via `disabled_hooks`
|
||||
- **Hooks**: 25+ built-in hooks, all configurable via `disabled_hooks`
|
||||
- **MCPs**: Built-in websearch (Exa), context7 (docs), grep_app (GitHub search)
|
||||
- **LSP**: Full LSP support with refactoring tools
|
||||
- **Experimental**: Aggressive truncation, auto-resume, and more
|
||||
|
||||
11
README.ru.md
11
README.ru.md
@@ -4,6 +4,17 @@
|
||||
> Ключевой мейнтейнер Q получил травму, поэтому на этой неделе ответы по issue/PR и релизы могут задерживаться.
|
||||
> Спасибо за терпение и поддержку.
|
||||
|
||||
> [!TIP]
|
||||
> **Building in Public**
|
||||
>
|
||||
> Мейнтейнер разрабатывает и поддерживает oh-my-opencode в режиме реального времени с помощью Jobdori — ИИ-ассистента на базе глубоко кастомизированной версии OpenClaw.
|
||||
> Каждая фича, каждый фикс, каждый триаж issue — в прямом эфире в нашем Discord.
|
||||
>
|
||||
> [](https://discord.gg/PUwSMR9XNk)
|
||||
>
|
||||
> [**→ Смотрите в #building-in-public**](https://discord.gg/PUwSMR9XNk)
|
||||
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> [](https://sisyphuslabs.ai)
|
||||
|
||||
@@ -4,6 +4,17 @@
|
||||
> 核心维护者 Q 因受伤,本周 issue/PR 回复和发布可能会延迟。
|
||||
> 感谢你的耐心与支持。
|
||||
|
||||
> [!TIP]
|
||||
> **Building in Public**
|
||||
>
|
||||
> 维护者正在使用 Jobdori 实时开发和维护 oh-my-opencode。Jobdori 是基于 OpenClaw 深度定制的 AI 助手。
|
||||
> 每个功能开发、每次修复、每次 Issue 分类,都在 Discord 上实时进行。
|
||||
>
|
||||
> [](https://discord.gg/PUwSMR9XNk)
|
||||
>
|
||||
> [**→ 在 #building-in-public 频道中查看**](https://discord.gg/PUwSMR9XNk)
|
||||
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> [](https://sisyphuslabs.ai)
|
||||
|
||||
@@ -3736,6 +3736,147 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"openclaw": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"gateways": {
|
||||
"default": {},
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
"type": "string"
|
||||
},
|
||||
"additionalProperties": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"default": "http",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"http",
|
||||
"command"
|
||||
]
|
||||
},
|
||||
"url": {
|
||||
"type": "string"
|
||||
},
|
||||
"method": {
|
||||
"default": "POST",
|
||||
"type": "string"
|
||||
},
|
||||
"headers": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
"type": "string"
|
||||
},
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"command": {
|
||||
"type": "string"
|
||||
},
|
||||
"timeout": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"method"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"hooks": {
|
||||
"default": {},
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
"type": "string"
|
||||
},
|
||||
"additionalProperties": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"default": true,
|
||||
"type": "boolean"
|
||||
},
|
||||
"gateway": {
|
||||
"type": "string"
|
||||
},
|
||||
"instruction": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"enabled",
|
||||
"gateway",
|
||||
"instruction"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"replyListener": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"discordBotToken": {
|
||||
"type": "string"
|
||||
},
|
||||
"discordChannelId": {
|
||||
"type": "string"
|
||||
},
|
||||
"discordMention": {
|
||||
"type": "string"
|
||||
},
|
||||
"authorizedDiscordUserIds": {
|
||||
"default": [],
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"telegramBotToken": {
|
||||
"type": "string"
|
||||
},
|
||||
"telegramChatId": {
|
||||
"type": "string"
|
||||
},
|
||||
"pollIntervalMs": {
|
||||
"default": 3000,
|
||||
"type": "number"
|
||||
},
|
||||
"rateLimitPerMinute": {
|
||||
"default": 10,
|
||||
"type": "number"
|
||||
},
|
||||
"maxMessageLength": {
|
||||
"default": 500,
|
||||
"type": "number"
|
||||
},
|
||||
"includePrefix": {
|
||||
"default": true,
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"authorizedDiscordUserIds",
|
||||
"pollIntervalMs",
|
||||
"rateLimitPerMinute",
|
||||
"maxMessageLength",
|
||||
"includePrefix"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"enabled",
|
||||
"gateways",
|
||||
"hooks"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"babysitting": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -8,7 +8,7 @@ Think of AI models as developers on a team. Each has a different brain, differen
|
||||
|
||||
This isn't a bug. It's the foundation of the entire system.
|
||||
|
||||
Oh My OpenCode assigns each agent a model that matches its _working style_ — like building a team where each person is in the role that fits their personality.
|
||||
Oh My OpenAgent assigns each agent a model that matches its _working style_ — like building a team where each person is in the role that fits their personality.
|
||||
|
||||
### Sisyphus: The Sociable Lead
|
||||
|
||||
@@ -121,6 +121,7 @@ Principle-driven, explicit reasoning, deep technical capability. Best for agents
|
||||
| ----------------- | ----------------------------------------------------------------------------------------------- |
|
||||
| **GPT-5.3 Codex** | Deep coding powerhouse. Autonomous exploration. Required for Hephaestus. |
|
||||
| **GPT-5.4** | High intelligence, strategic reasoning. Default for Oracle, Momus, and a key fallback for Prometheus / Atlas. Uses xhigh variant for Momus. |
|
||||
| **GPT-5.4 Mini** | Fast + strong reasoning. Good for lightweight autonomous tasks. Default for quick category. |
|
||||
| **GPT-5-Nano** | Ultra-cheap, fast. Good for simple utility tasks. |
|
||||
|
||||
### Other Models
|
||||
@@ -170,7 +171,7 @@ When agents delegate work, they don't pick a model name — they pick a **catego
|
||||
| `ultrabrain` | Maximum reasoning needed | GPT-5.4 → Gemini 3.1 Pro → Claude Opus → opencode-go/glm-5 |
|
||||
| `deep` | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3.1 Pro |
|
||||
| `artistry` | Creative, novel approaches | Gemini 3.1 Pro → Claude Opus → GPT-5.4 |
|
||||
| `quick` | Simple, fast tasks | Claude Haiku → Gemini Flash → opencode-go/minimax-m2.5 → GPT-5-Nano |
|
||||
| `quick` | Simple, fast tasks | GPT-5.4 Mini → Claude Haiku → Gemini Flash → opencode-go/minimax-m2.5 → GPT-5-Nano |
|
||||
| `unspecified-high` | General complex work | Claude Opus → GPT-5.4 → GLM 5 → K2P5 → opencode-go/glm-5 → Kimi K2.5 |
|
||||
| `unspecified-low` | General standard work | Claude Sonnet → GPT-5.3 Codex → opencode-go/kimi-k2.5 → Gemini Flash |
|
||||
| `writing` | Text, docs, prose | Gemini Flash → opencode-go/kimi-k2.5 → Claude Sonnet |
|
||||
@@ -185,7 +186,7 @@ See the [Orchestration System Guide](./orchestration.md) for how agents dispatch
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json",
|
||||
|
||||
"agents": {
|
||||
// Main orchestrator: Claude Opus or Kimi K2.5 work best
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
Paste this into your llm agent session:
|
||||
|
||||
```
|
||||
Install and configure oh-my-opencode by following the instructions here:
|
||||
Install and configure oh-my-openagent by following the instructions here:
|
||||
https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md
|
||||
```
|
||||
|
||||
@@ -14,8 +14,8 @@ https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/do
|
||||
Run the interactive installer:
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode install # recommended
|
||||
npx oh-my-opencode install # alternative
|
||||
bunx oh-my-openagent install # recommended
|
||||
npx oh-my-openagent install # alternative
|
||||
```
|
||||
|
||||
> **Note**: The CLI ships with standalone binaries for all major platforms. No runtime (Bun/Node.js) is required for CLI execution after installation.
|
||||
@@ -34,7 +34,7 @@ After you install it, you can read this [overview guide](./overview.md) to under
|
||||
> curl -fsSL https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md
|
||||
> ```
|
||||
|
||||
If you're an LLM Agent helping set up oh-my-opencode, Welcome! You'll now help your user to install the best agent harness, OhMyOpenCode. Tell user this with greetings, with test 'oMoMoMoMo...'
|
||||
If you're an LLM Agent helping set up oh-my-openagent, Welcome! You'll now help your user to install the best agent harness, OhMyOpenAgent. Tell user this with greetings, with test 'oMoMoMoMo...'
|
||||
|
||||
follow these steps:
|
||||
|
||||
@@ -96,19 +96,19 @@ Spawn a subagent to handle installation and report back - to save context.
|
||||
Based on user's answers, run the CLI installer with appropriate flags:
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode install --no-tui --claude=<yes|no|max20> --gemini=<yes|no> --copilot=<yes|no> [--openai=<yes|no>] [--opencode-go=<yes|no>] [--opencode-zen=<yes|no>] [--zai-coding-plan=<yes|no>]
|
||||
bunx oh-my-openagent install --no-tui --claude=<yes|no|max20> --gemini=<yes|no> --copilot=<yes|no> [--openai=<yes|no>] [--opencode-go=<yes|no>] [--opencode-zen=<yes|no>] [--zai-coding-plan=<yes|no>]
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
|
||||
- User has all native subscriptions: `bunx oh-my-opencode install --no-tui --claude=max20 --openai=yes --gemini=yes --copilot=no`
|
||||
- User has only Claude: `bunx oh-my-opencode install --no-tui --claude=yes --gemini=no --copilot=no`
|
||||
- User has Claude + OpenAI: `bunx oh-my-opencode install --no-tui --claude=yes --openai=yes --gemini=no --copilot=no`
|
||||
- User has only GitHub Copilot: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=yes`
|
||||
- User has Z.ai for Librarian: `bunx oh-my-opencode install --no-tui --claude=yes --gemini=no --copilot=no --zai-coding-plan=yes`
|
||||
- User has only OpenCode Zen: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=no --opencode-zen=yes`
|
||||
- User has OpenCode Go only: `bunx oh-my-opencode install --no-tui --claude=no --openai=no --gemini=no --copilot=no --opencode-go=yes`
|
||||
- User has no subscriptions: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=no`
|
||||
- User has all native subscriptions: `bunx oh-my-openagent install --no-tui --claude=max20 --openai=yes --gemini=yes --copilot=no`
|
||||
- User has only Claude: `bunx oh-my-openagent install --no-tui --claude=yes --gemini=no --copilot=no`
|
||||
- User has Claude + OpenAI: `bunx oh-my-openagent install --no-tui --claude=yes --openai=yes --gemini=no --copilot=no`
|
||||
- User has only GitHub Copilot: `bunx oh-my-openagent install --no-tui --claude=no --gemini=no --copilot=yes`
|
||||
- User has Z.ai for Librarian: `bunx oh-my-openagent install --no-tui --claude=yes --gemini=no --copilot=no --zai-coding-plan=yes`
|
||||
- User has only OpenCode Zen: `bunx oh-my-openagent install --no-tui --claude=no --gemini=no --copilot=no --opencode-zen=yes`
|
||||
- User has OpenCode Go only: `bunx oh-my-openagent install --no-tui --claude=no --openai=no --gemini=no --copilot=no --opencode-go=yes`
|
||||
- User has no subscriptions: `bunx oh-my-openagent install --no-tui --claude=no --gemini=no --copilot=no`
|
||||
|
||||
The CLI will:
|
||||
|
||||
@@ -120,7 +120,7 @@ The CLI will:
|
||||
|
||||
```bash
|
||||
opencode --version # Should be 1.0.150 or higher
|
||||
cat ~/.config/opencode/opencode.json # Should contain "oh-my-opencode" in plugin array
|
||||
cat ~/.config/opencode/opencode.json # Should contain "oh-my-openagent" in plugin array
|
||||
```
|
||||
|
||||
### Step 4: Configure Authentication
|
||||
@@ -145,7 +145,7 @@ First, add the opencode-antigravity-auth plugin:
|
||||
|
||||
```json
|
||||
{
|
||||
"plugin": ["oh-my-opencode", "opencode-antigravity-auth@latest"]
|
||||
"plugin": ["oh-my-openagent", "opencode-antigravity-auth@latest"]
|
||||
}
|
||||
```
|
||||
|
||||
@@ -154,9 +154,9 @@ First, add the opencode-antigravity-auth plugin:
|
||||
You'll also need full model settings in `opencode.json`.
|
||||
Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy the full model configuration from the README, and merge carefully to avoid breaking the user's existing setup. The plugin now uses a **variant system** — models like `antigravity-gemini-3-pro` support `low`/`high` variants instead of separate `-low`/`-high` model entries.
|
||||
|
||||
##### oh-my-opencode Agent Model Override
|
||||
##### oh-my-openagent Agent Model Override
|
||||
|
||||
The `opencode-antigravity-auth` plugin uses different model names than the built-in Google auth. Override the agent models in `oh-my-opencode.json` (or `.opencode/oh-my-opencode.json`):
|
||||
The `opencode-antigravity-auth` plugin uses different model names than the built-in Google auth. Override the agent models in `oh-my-openagent.json` (or `.opencode/oh-my-openagent.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -176,7 +176,7 @@ The `opencode-antigravity-auth` plugin uses different model names than the built
|
||||
|
||||
**Available models (Gemini CLI quota)**:
|
||||
|
||||
- `google/gemini-2.5-flash`, `google/gemini-2.5-pro`, `google/gemini-3-flash-preview`, `google/gemini-3-pro-preview`
|
||||
- `google/gemini-2.5-flash`, `google/gemini-2.5-pro`, `google/gemini-3-flash-preview`, `google/gemini-3.1-pro-preview`
|
||||
|
||||
> **Note**: Legacy tier-suffixed names like `google/antigravity-gemini-3-pro-high` still work but variants are recommended. Use `--variant=high` with the base model name instead.
|
||||
|
||||
@@ -201,7 +201,7 @@ GitHub Copilot is supported as a **fallback provider** when native providers are
|
||||
|
||||
##### Model Mappings
|
||||
|
||||
When GitHub Copilot is the best available provider, oh-my-opencode uses these model assignments:
|
||||
When GitHub Copilot is the best available provider, oh-my-openagent uses these model assignments:
|
||||
|
||||
| Agent | Model |
|
||||
| ------------- | --------------------------------- |
|
||||
@@ -243,7 +243,7 @@ When OpenCode Zen is the best available provider (no native or Copilot), these m
|
||||
Run the installer and select "Yes" for GitHub Copilot:
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode install
|
||||
bunx oh-my-openagent install
|
||||
# Select your subscriptions (Claude, ChatGPT, Gemini)
|
||||
# When prompted: "Do you have a GitHub Copilot subscription?" → Select "Yes"
|
||||
```
|
||||
@@ -251,7 +251,7 @@ bunx oh-my-opencode install
|
||||
Or use non-interactive mode:
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode install --no-tui --claude=no --openai=no --gemini=no --copilot=yes
|
||||
bunx oh-my-openagent install --no-tui --claude=no --openai=no --gemini=no --copilot=yes
|
||||
```
|
||||
|
||||
Then authenticate with GitHub:
|
||||
@@ -263,7 +263,7 @@ opencode auth login
|
||||
|
||||
### Step 5: Understand Your Model Setup
|
||||
|
||||
You've just configured oh-my-opencode. Here's what got set up and why.
|
||||
You've just configured oh-my-openagent. Here's what got set up and why.
|
||||
|
||||
#### Model Families: What You're Working With
|
||||
|
||||
@@ -287,13 +287,14 @@ Not all models behave the same way. Understanding which models are "similar" hel
|
||||
| ----------------- | -------------------------------- | ------------------------------------------------- |
|
||||
| **GPT-5.3-codex** | openai, github-copilot, opencode | Deep coding powerhouse. Required for Hephaestus. |
|
||||
| **GPT-5.4** | openai, github-copilot, opencode | High intelligence. Default for Oracle. |
|
||||
| **GPT-5.4 Mini** | openai, github-copilot, opencode | Fast + strong reasoning. Default for quick category. |
|
||||
| **GPT-5-Nano** | opencode | Ultra-cheap, fast. Good for simple utility tasks. |
|
||||
|
||||
**Different-Behavior Models**:
|
||||
|
||||
| Model | Provider(s) | Notes |
|
||||
| --------------------- | -------------------------------- | ----------------------------------------------------------- |
|
||||
| **Gemini 3 Pro** | google, github-copilot, opencode | Excels at visual/frontend tasks. Different reasoning style. |
|
||||
| **Gemini 3.1 Pro** | google, github-copilot, opencode | Excels at visual/frontend tasks. Different reasoning style. |
|
||||
| **Gemini 3 Flash** | google, github-copilot, opencode | Fast, good for doc search and light tasks. |
|
||||
| **MiniMax M2.5** | venice | Fast and smart. Good for utility tasks. |
|
||||
| **MiniMax M2.5 Free** | opencode | Free-tier MiniMax. Fast for search/retrieval. |
|
||||
@@ -305,7 +306,7 @@ Not all models behave the same way. Understanding which models are "similar" hel
|
||||
| **Grok Code Fast 1** | github-copilot, venice | Very fast | Optimized for code grep/search. Default for Explore. |
|
||||
| **Claude Haiku 4.5** | anthropic, opencode | Fast | Good balance of speed and intelligence. |
|
||||
| **MiniMax M2.5 (Free)** | opencode, venice | Fast | Smart for its speed class. |
|
||||
| **GPT-5.3-codex-spark** | openai | Extremely fast | Blazing fast but compacts so aggressively that oh-my-opencode's context management doesn't work well with it. Not recommended for omo agents. |
|
||||
| **GPT-5.3-codex-spark** | openai | Extremely fast | Blazing fast but compacts so aggressively that oh-my-openagent's context management doesn't work well with it. Not recommended for omo agents. |
|
||||
|
||||
#### What Each Agent Does and Which Model It Got
|
||||
|
||||
@@ -316,7 +317,7 @@ Based on your subscriptions, here's how the agents were configured:
|
||||
| Agent | Role | Default Chain | What It Does |
|
||||
| ------------ | ---------------- | ----------------------------------------------- | ---------------------------------------------------------------------------------------- |
|
||||
| **Sisyphus** | Main ultraworker | Opus (max) → Kimi K2.5 → GLM 5 → Big Pickle | Primary coding agent. Orchestrates everything. **Never use GPT — no GPT prompt exists.** |
|
||||
| **Metis** | Plan review | Opus (max) → Kimi K2.5 → GPT-5.4 → Gemini 3 Pro | Reviews Prometheus plans for gaps. |
|
||||
| **Metis** | Plan review | Opus (max) → Kimi K2.5 → GPT-5.4 → Gemini 3.1 Pro | Reviews Prometheus plans for gaps. |
|
||||
|
||||
**Dual-Prompt Agents** (auto-switch between Claude and GPT prompts):
|
||||
|
||||
@@ -326,7 +327,7 @@ Priority: **Claude > GPT > Claude-like models**
|
||||
|
||||
| Agent | Role | Default Chain | GPT Prompt? |
|
||||
| -------------- | ----------------- | ---------------------------------------------------------- | ---------------------------------------------------------------- |
|
||||
| **Prometheus** | Strategic planner | Opus (max) → **GPT-5.4 (high)** → Kimi K2.5 → Gemini 3 Pro | Yes — XML-tagged, principle-driven (~300 lines vs ~1,100 Claude) |
|
||||
| **Prometheus** | Strategic planner | Opus (max) → **GPT-5.4 (high)** → Kimi K2.5 → Gemini 3.1 Pro | Yes — XML-tagged, principle-driven (~300 lines vs ~1,100 Claude) |
|
||||
| **Atlas** | Todo orchestrator | **Kimi K2.5** → Sonnet → GPT-5.4 | Yes — GPT-optimized todo management |
|
||||
|
||||
**GPT-Native Agents** (built for GPT, don't override to Claude):
|
||||
@@ -334,8 +335,8 @@ Priority: **Claude > GPT > Claude-like models**
|
||||
| Agent | Role | Default Chain | Notes |
|
||||
| -------------- | ---------------------- | -------------------------------------- | ------------------------------------------------------ |
|
||||
| **Hephaestus** | Deep autonomous worker | GPT-5.3-codex (medium) only | "Codex on steroids." No fallback. Requires GPT access. |
|
||||
| **Oracle** | Architecture/debugging | GPT-5.4 (high) → Gemini 3 Pro → Opus | High-IQ strategic backup. GPT preferred. |
|
||||
| **Momus** | High-accuracy reviewer | GPT-5.4 (medium) → Opus → Gemini 3 Pro | Verification agent. GPT preferred. |
|
||||
| **Oracle** | Architecture/debugging | GPT-5.4 (high) → Gemini 3.1 Pro → Opus | High-IQ strategic backup. GPT preferred. |
|
||||
| **Momus** | High-accuracy reviewer | GPT-5.4 (medium) → Opus → Gemini 3.1 Pro | Verification agent. GPT preferred. |
|
||||
|
||||
**Utility Agents** (speed over intelligence):
|
||||
|
||||
@@ -364,7 +365,7 @@ This is why Prometheus and Atlas ship separate prompts per model family — they
|
||||
|
||||
#### Custom Model Configuration
|
||||
|
||||
If the user wants to override which model an agent uses, you can customize in `oh-my-opencode.json`:
|
||||
If the user wants to override which model an agent uses, you can customize in `oh-my-openagent.json`:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
@@ -406,7 +407,7 @@ GPT (5.3-codex, 5.2) > Claude Opus (decent fallback) > Gemini (acceptable)
|
||||
|
||||
#### Provider Priority Chain
|
||||
|
||||
When multiple providers are available, oh-my-opencode uses this priority:
|
||||
When multiple providers are available, oh-my-openagent uses this priority:
|
||||
|
||||
```
|
||||
Native (anthropic/, openai/, google/) > Kimi for Coding > GitHub Copilot > Venice > OpenCode Zen > Z.ai Coding Plan
|
||||
@@ -424,7 +425,7 @@ Read this document again, think about you have done everything correctly.
|
||||
|
||||
#### Say 'Congratulations! 🎉' to the user
|
||||
|
||||
Say to user: Congratulations! 🎉 You have successfully set up OhMyOpenCode! Type `opencode` in your terminal to start using it.
|
||||
Say to user: Congratulations! 🎉 You have successfully set up OhMyOpenAgent! Type `opencode` in your terminal to start using it.
|
||||
|
||||
#### Free advertising
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Orchestration System Guide
|
||||
|
||||
Oh My OpenCode's orchestration system transforms a simple AI agent into a coordinated development team through **separation of planning and execution**.
|
||||
Oh My OpenAgent's orchestration system transforms a simple AI agent into a coordinated development team through **separation of planning and execution**.
|
||||
|
||||
---
|
||||
|
||||
@@ -298,7 +298,7 @@ task({ category: "quick", prompt: "..." }); // "Just get it done fast"
|
||||
| `visual-engineering` | Gemini 3.1 Pro | Frontend, UI/UX, design, styling, animation |
|
||||
| `ultrabrain` | GPT-5.4 (xhigh) | Deep logical reasoning, complex architecture decisions |
|
||||
| `artistry` | Gemini 3.1 Pro (high) | Highly creative or artistic tasks, novel ideas |
|
||||
| `quick` | Claude Haiku 4.5 | Trivial tasks - single file changes, typo fixes |
|
||||
| `quick` | GPT-5.4 Mini | Trivial tasks - single file changes, typo fixes |
|
||||
| `deep` | GPT-5.3 Codex (medium) | Goal-oriented autonomous problem-solving, thorough research |
|
||||
| `unspecified-low` | Claude Sonnet 4.6 | Tasks that don't fit other categories, low effort |
|
||||
| `unspecified-high` | Claude Opus 4.6 (max) | Tasks that don't fit other categories, high effort |
|
||||
@@ -475,7 +475,7 @@ Use the `ulw` keyword in Sisyphus when:
|
||||
|
||||
## Configuration
|
||||
|
||||
You can control related features in `oh-my-opencode.json`:
|
||||
You can control related features in `oh-my-openagent.json`:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# What Is Oh My OpenCode?
|
||||
# What Is Oh My OpenAgent?
|
||||
|
||||
Oh My OpenCode is a multi-model agent orchestration harness for OpenCode. It transforms a single AI agent into a coordinated development team that actually ships code.
|
||||
Oh My OpenAgent is a multi-model agent orchestration harness for OpenCode. It transforms a single AI agent into a coordinated development team that actually ships code.
|
||||
|
||||
Not locked to Claude. Not locked to OpenAI. Not locked to anyone.
|
||||
|
||||
@@ -15,7 +15,7 @@ Just better results, cheaper models, real orchestration.
|
||||
Paste this into your LLM agent session:
|
||||
|
||||
```
|
||||
Install and configure oh-my-opencode by following the instructions here:
|
||||
Install and configure oh-my-openagent by following the instructions here:
|
||||
https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md
|
||||
```
|
||||
|
||||
@@ -41,13 +41,13 @@ We used to call this "Claude Code on steroids." That was wrong.
|
||||
|
||||
This isn't about making Claude Code better. It's about breaking free from the idea that one model, one provider, one way of working is enough. Anthropic wants you locked in. OpenAI wants you locked in. Everyone wants you locked in.
|
||||
|
||||
Oh My OpenCode doesn't play that game. It orchestrates across models, picking the right brain for the right job. Claude for orchestration. GPT for deep reasoning. Gemini for frontend. Haiku for quick tasks. All working together, automatically.
|
||||
Oh My OpenAgent doesn't play that game. It orchestrates across models, picking the right brain for the right job. Claude for orchestration. GPT for deep reasoning. Gemini for frontend. GPT-5.4 Mini for quick tasks. All working together, automatically.
|
||||
|
||||
---
|
||||
|
||||
## How It Works: Agent Orchestration
|
||||
|
||||
Instead of one agent doing everything, Oh My OpenCode uses **specialized agents that delegate to each other** based on task type.
|
||||
Instead of one agent doing everything, Oh My OpenAgent uses **specialized agents that delegate to each other** based on task type.
|
||||
|
||||
**The Architecture:**
|
||||
|
||||
@@ -99,9 +99,9 @@ Use Hephaestus when you need deep architectural reasoning, complex debugging acr
|
||||
|
||||
**Why this beats vanilla Codex CLI:**
|
||||
|
||||
- **Multi-model orchestration.** Pure Codex is single-model. OmO routes different tasks to different models automatically. GPT for deep reasoning. Gemini for frontend. Haiku for speed. The right brain for the right job.
|
||||
- **Multi-model orchestration.** Pure Codex is single-model. OmO routes different tasks to different models automatically. GPT for deep reasoning. Gemini for frontend. GPT-5.4 Mini for speed. The right brain for the right job.
|
||||
- **Background agents.** Fire 5+ agents in parallel. Something Codex simply cannot do. While one agent writes code, another researches patterns, another checks documentation. Like a real dev team.
|
||||
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.4. `quick` gets Haiku. No manual juggling.
|
||||
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.4. `quick` gets GPT-5.4 Mini. No manual juggling.
|
||||
- **Accumulated wisdom.** Subagents learn from previous results. Conventions discovered in task 1 are passed to task 5. Mistakes made early aren't repeated. The system gets smarter as it works.
|
||||
|
||||
### Prometheus: The Strategic Planner
|
||||
@@ -154,7 +154,7 @@ Use Prometheus for multi-day projects, critical production changes, complex refa
|
||||
|
||||
## Agent Model Matching
|
||||
|
||||
Different agents work best with different models. Oh My OpenCode automatically assigns optimal models, but you can customize everything.
|
||||
Different agents work best with different models. Oh My OpenAgent automatically assigns optimal models, but you can customize everything.
|
||||
|
||||
### Default Configuration
|
||||
|
||||
@@ -168,7 +168,7 @@ You can override specific agents or categories in your config:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json",
|
||||
|
||||
"agents": {
|
||||
// Main orchestrator: Claude Opus or Kimi K2.5 work best
|
||||
@@ -195,8 +195,8 @@ You can override specific agents or categories in your config:
|
||||
// General high-effort work
|
||||
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
|
||||
|
||||
// Quick tasks: use the cheapest models
|
||||
"quick": { "model": "anthropic/claude-haiku-4-5" },
|
||||
// Quick tasks: use GPT-5.4-mini (fast and cheap)
|
||||
"quick": { "model": "openai/gpt-5.4-mini" },
|
||||
|
||||
// Deep reasoning: GPT-5.4
|
||||
"ultrabrain": { "model": "openai/gpt-5.4", "variant": "xhigh" },
|
||||
@@ -220,7 +220,7 @@ You can override specific agents or categories in your config:
|
||||
|
||||
**Different-behavior models**:
|
||||
|
||||
- Gemini 3 Pro — excels at visual/frontend tasks
|
||||
- Gemini 3.1 Pro — excels at visual/frontend tasks
|
||||
- MiniMax M2.5 — fast and smart for utility tasks
|
||||
- Grok Code Fast 1 — optimized for code grep/search
|
||||
|
||||
@@ -232,7 +232,7 @@ See the [Agent-Model Matching Guide](./agent-model-matching.md) for complete det
|
||||
|
||||
Claude Code is good. But it's a single agent running a single model doing everything alone.
|
||||
|
||||
Oh My OpenCode turns that into a coordinated team:
|
||||
Oh My OpenAgent turns that into a coordinated team:
|
||||
|
||||
**Parallel execution.** Claude Code processes one thing at a time. OmO fires background agents in parallel — research, implementation, and verification happening simultaneously. Like having 5 engineers instead of 1.
|
||||
|
||||
@@ -246,7 +246,7 @@ Oh My OpenCode turns that into a coordinated team:
|
||||
|
||||
**Discipline enforcement.** Todo enforcer yanks idle agents back to work. Comment checker strips AI slop. Ralph Loop keeps going until 100% done. The system doesn't let the agent slack off.
|
||||
|
||||
**The fundamental advantage.** Models have different temperaments. Claude thinks deeply. GPT reasons architecturally. Gemini visualizes. Haiku moves fast. Single-model tools force you to pick one personality for all tasks. Oh My OpenCode leverages them all, routing by task type. This isn't a temporary hack — it's the only architecture that makes sense as models specialize further. The gap between multi-model orchestration and single-model limitation widens every month. We're betting on that future.
|
||||
**The fundamental advantage.** Models have different temperaments. Claude thinks deeply. GPT reasons architecturally. Gemini visualizes. Haiku moves fast. Single-model tools force you to pick one personality for all tasks. Oh My OpenAgent leverages them all, routing by task type. This isn't a temporary hack — it's the only architecture that makes sense as models specialize further. The gap between multi-model orchestration and single-model limitation widens every month. We're betting on that future.
|
||||
|
||||
---
|
||||
|
||||
@@ -256,7 +256,7 @@ Before acting on any request, Sisyphus classifies your true intent.
|
||||
|
||||
Are you asking for research? Implementation? Investigation? A fix? The Intent Gate figures out what you actually want, not just the literal words you typed. This means the agent understands context, nuance, and the real goal behind your request.
|
||||
|
||||
Claude Code doesn't have this. It takes your prompt and runs. Oh My OpenCode thinks first, then acts.
|
||||
Claude Code doesn't have this. It takes your prompt and runs. Oh My OpenAgent thinks first, then acts.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Manifesto
|
||||
|
||||
The principles and philosophy behind Oh My OpenCode.
|
||||
The principles and philosophy behind Oh My OpenAgent.
|
||||
|
||||
---
|
||||
|
||||
@@ -20,7 +20,7 @@ When you find yourself:
|
||||
|
||||
That's not "human-AI collaboration." That's the AI failing to do its job.
|
||||
|
||||
**Oh My OpenCode is built on this premise**: Human intervention during agentic work is fundamentally a wrong signal. If the system is designed correctly, the agent should complete the work without requiring you to babysit it.
|
||||
**Oh My OpenAgent is built on this premise**: Human intervention during agentic work is fundamentally a wrong signal. If the system is designed correctly, the agent should complete the work without requiring you to babysit it.
|
||||
|
||||
---
|
||||
|
||||
@@ -144,7 +144,7 @@ Human Intent → Agent Execution → Verified Result
|
||||
(intervention only on true failure)
|
||||
```
|
||||
|
||||
Everything in Oh My OpenCode is designed to make this loop work:
|
||||
Everything in Oh My OpenAgent is designed to make this loop work:
|
||||
|
||||
| Feature | Purpose |
|
||||
|---------|---------|
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
# CLI Reference
|
||||
|
||||
Complete reference for the `oh-my-opencode` command-line interface.
|
||||
Complete reference for the `oh-my-openagent` command-line interface.
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```bash
|
||||
# Display help
|
||||
bunx oh-my-opencode
|
||||
bunx oh-my-openagent
|
||||
|
||||
# Or with npx
|
||||
npx oh-my-opencode
|
||||
npx oh-my-openagent
|
||||
```
|
||||
|
||||
## Commands
|
||||
@@ -27,20 +27,20 @@ npx oh-my-opencode
|
||||
|
||||
## install
|
||||
|
||||
Interactive installation tool for initial Oh-My-OpenCode setup. Provides a TUI based on `@clack/prompts`.
|
||||
Interactive installation tool for initial Oh-My-OpenAgent setup. Provides a TUI based on `@clack/prompts`.
|
||||
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode install
|
||||
bunx oh-my-openagent install
|
||||
```
|
||||
|
||||
### Installation Process
|
||||
|
||||
1. **Provider Selection**: Choose your AI provider (Claude, ChatGPT, or Gemini)
|
||||
2. **API Key Input**: Enter the API key for your selected provider
|
||||
3. **Configuration File Creation**: Generates `opencode.json` or `oh-my-opencode.json` files
|
||||
4. **Plugin Registration**: Automatically registers the oh-my-opencode plugin in OpenCode settings
|
||||
3. **Configuration File Creation**: Generates `opencode.json` or `oh-my-openagent.json` files
|
||||
4. **Plugin Registration**: Automatically registers the oh-my-openagent plugin in OpenCode settings
|
||||
|
||||
### Options
|
||||
|
||||
@@ -53,12 +53,12 @@ bunx oh-my-opencode install
|
||||
|
||||
## doctor
|
||||
|
||||
Diagnoses your environment to ensure Oh-My-OpenCode is functioning correctly. Performs 17+ health checks.
|
||||
Diagnoses your environment to ensure Oh-My-OpenAgent is functioning correctly. Performs 17+ health checks.
|
||||
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode doctor
|
||||
bunx oh-my-openagent doctor
|
||||
```
|
||||
|
||||
### Diagnostic Categories
|
||||
@@ -83,10 +83,10 @@ bunx oh-my-opencode doctor
|
||||
### Example Output
|
||||
|
||||
```
|
||||
oh-my-opencode doctor
|
||||
oh-my-openagent doctor
|
||||
|
||||
┌──────────────────────────────────────────────────┐
|
||||
│ Oh-My-OpenCode Doctor │
|
||||
│ Oh-My-OpenAgent Doctor │
|
||||
└──────────────────────────────────────────────────┘
|
||||
|
||||
Installation
|
||||
@@ -94,7 +94,7 @@ Installation
|
||||
✓ Plugin registered in opencode.json
|
||||
|
||||
Configuration
|
||||
✓ oh-my-opencode.json is valid
|
||||
✓ oh-my-openagent.json is valid
|
||||
⚠ categories.visual-engineering: using default model
|
||||
|
||||
Authentication
|
||||
@@ -119,7 +119,7 @@ Executes OpenCode sessions and monitors task completion.
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode run [prompt]
|
||||
bunx oh-my-openagent run [prompt]
|
||||
```
|
||||
|
||||
### Options
|
||||
@@ -148,16 +148,16 @@ Manages OAuth 2.1 authentication for remote MCP servers.
|
||||
|
||||
```bash
|
||||
# Login to an OAuth-protected MCP server
|
||||
bunx oh-my-opencode mcp oauth login <server-name> --server-url https://api.example.com
|
||||
bunx oh-my-openagent mcp oauth login <server-name> --server-url https://api.example.com
|
||||
|
||||
# Login with explicit client ID and scopes
|
||||
bunx oh-my-opencode mcp oauth login my-api --server-url https://api.example.com --client-id my-client --scopes "read,write"
|
||||
bunx oh-my-openagent mcp oauth login my-api --server-url https://api.example.com --client-id my-client --scopes "read,write"
|
||||
|
||||
# Remove stored OAuth tokens
|
||||
bunx oh-my-opencode mcp oauth logout <server-name>
|
||||
bunx oh-my-openagent mcp oauth logout <server-name>
|
||||
|
||||
# Check OAuth token status
|
||||
bunx oh-my-opencode mcp oauth status [server-name]
|
||||
bunx oh-my-openagent mcp oauth status [server-name]
|
||||
```
|
||||
|
||||
### Options
|
||||
@@ -178,8 +178,8 @@ Tokens are stored in `~/.config/opencode/mcp-oauth.json` with `0600` permissions
|
||||
|
||||
The CLI searches for configuration files in the following locations (in priority order):
|
||||
|
||||
1. **Project Level**: `.opencode/oh-my-opencode.json`
|
||||
2. **User Level**: `~/.config/opencode/oh-my-opencode.json`
|
||||
1. **Project Level**: `.opencode/oh-my-openagent.json`
|
||||
2. **User Level**: `~/.config/opencode/oh-my-openagent.json`
|
||||
|
||||
### JSONC Support
|
||||
|
||||
@@ -219,17 +219,17 @@ bun install -g opencode@latest
|
||||
|
||||
```bash
|
||||
# Reinstall plugin
|
||||
bunx oh-my-opencode install
|
||||
bunx oh-my-openagent install
|
||||
```
|
||||
|
||||
### Doctor Check Failures
|
||||
|
||||
```bash
|
||||
# Diagnose with detailed information
|
||||
bunx oh-my-opencode doctor --verbose
|
||||
bunx oh-my-openagent doctor --verbose
|
||||
|
||||
# Check specific category only
|
||||
bunx oh-my-opencode doctor --category authentication
|
||||
bunx oh-my-openagent doctor --category authentication
|
||||
```
|
||||
|
||||
---
|
||||
@@ -240,10 +240,10 @@ Use the `--no-tui` option for CI/CD environments.
|
||||
|
||||
```bash
|
||||
# Run doctor in CI environment
|
||||
bunx oh-my-opencode doctor --no-tui --json
|
||||
bunx oh-my-openagent doctor --no-tui --json
|
||||
|
||||
# Save results to file
|
||||
bunx oh-my-opencode doctor --json > doctor-report.json
|
||||
bunx oh-my-openagent doctor --json > doctor-report.json
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Configuration Reference
|
||||
|
||||
Complete reference for `oh-my-opencode.jsonc` configuration. This document covers every available option with examples.
|
||||
Complete reference for `oh-my-openagent.jsonc` configuration. This document covers every available option with examples.
|
||||
|
||||
---
|
||||
|
||||
@@ -44,13 +44,13 @@ Complete reference for `oh-my-opencode.jsonc` configuration. This document cover
|
||||
|
||||
Priority order (project overrides user):
|
||||
|
||||
1. `.opencode/oh-my-opencode.jsonc` / `.opencode/oh-my-opencode.json`
|
||||
1. `.opencode/oh-my-openagent.jsonc` / `.opencode/oh-my-openagent.json`
|
||||
2. User config (`.jsonc` preferred over `.json`):
|
||||
|
||||
| Platform | Path |
|
||||
| ----------- | ----------------------------------------- |
|
||||
| macOS/Linux | `~/.config/opencode/oh-my-opencode.jsonc` |
|
||||
| Windows | `%APPDATA%\opencode\oh-my-opencode.jsonc` |
|
||||
| macOS/Linux | `~/.config/opencode/oh-my-openagent.jsonc` |
|
||||
| Windows | `%APPDATA%\opencode\oh-my-openagent.jsonc` |
|
||||
|
||||
JSONC supports `// line comments`, `/* block comments */`, and trailing commas.
|
||||
|
||||
@@ -58,11 +58,11 @@ Enable schema autocomplete:
|
||||
|
||||
```json
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json"
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json"
|
||||
}
|
||||
```
|
||||
|
||||
Run `bunx oh-my-opencode install` for guided setup. Run `opencode models` to list available models.
|
||||
Run `bunx oh-my-openagent install` for guided setup. Run `opencode models` to list available models.
|
||||
|
||||
### Quick Start Example
|
||||
|
||||
@@ -70,7 +70,7 @@ Here's a practical starting configuration:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
|
||||
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json",
|
||||
|
||||
"agents": {
|
||||
// Main orchestrator: Claude Opus or Kimi K2.5 work best
|
||||
@@ -228,7 +228,7 @@ Domain-specific model delegation used by the `task()` tool. When Sisyphus delega
|
||||
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture |
|
||||
| `deep` | `openai/gpt-5.3-codex` (medium) | Autonomous problem-solving, thorough research |
|
||||
| `artistry` | `google/gemini-3.1-pro` (high) | Creative/unconventional approaches |
|
||||
| `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks, typo fixes, single-file changes |
|
||||
| `quick` | `openai/gpt-5.4-mini` | Trivial tasks, typo fixes, single-file changes |
|
||||
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | General tasks, low effort |
|
||||
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | General tasks, high effort |
|
||||
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
|
||||
@@ -286,12 +286,12 @@ Disable categories: `{ "disabled_categories": ["ultrabrain"] }`
|
||||
| **ultrabrain** | `gpt-5.4` | `gpt-5.4` → `gemini-3.1-pro` → `claude-opus-4-6` |
|
||||
| **deep** | `gpt-5.3-codex` | `gpt-5.3-codex` → `claude-opus-4-6` → `gemini-3.1-pro` |
|
||||
| **artistry** | `gemini-3.1-pro` | `gemini-3.1-pro` → `claude-opus-4-6` → `gpt-5.4` |
|
||||
| **quick** | `claude-haiku-4-5` | `claude-haiku-4-5` → `gemini-3-flash` → `gpt-5-nano` |
|
||||
| **quick** | `gpt-5.4-mini` | `gpt-5.4-mini` → `claude-haiku-4-5` → `gemini-3-flash` → `minimax-m2.5` → `gpt-5-nano` |
|
||||
| **unspecified-low** | `claude-sonnet-4-6` | `claude-sonnet-4-6` → `gpt-5.3-codex` → `gemini-3-flash` |
|
||||
| **unspecified-high** | `claude-opus-4-6` | `claude-opus-4-6` → `gpt-5.4 (high)` → `glm-5` → `k2p5` → `kimi-k2.5` |
|
||||
| **writing** | `gemini-3-flash` | `gemini-3-flash` → `claude-sonnet-4-6` |
|
||||
|
||||
Run `bunx oh-my-opencode doctor --verbose` to see effective model resolution for your config.
|
||||
Run `bunx oh-my-openagent doctor --verbose` to see effective model resolution for your config.
|
||||
|
||||
---
|
||||
|
||||
@@ -418,15 +418,14 @@ Disable built-in skills: `{ "disabled_skills": ["playwright"] }`
|
||||
Disable built-in hooks via `disabled_hooks`:
|
||||
|
||||
```json
|
||||
{ "disabled_hooks": ["comment-checker", "gpt-permission-continuation"] }
|
||||
{ "disabled_hooks": ["comment-checker"] }
|
||||
```
|
||||
|
||||
Available hooks: `gpt-permission-continuation`, `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `no-sisyphus-gpt`, `start-work`, `runtime-fallback`
|
||||
Available hooks: `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `no-sisyphus-gpt`, `start-work`, `runtime-fallback`
|
||||
|
||||
**Notes:**
|
||||
|
||||
- `directory-agents-injector` — auto-disabled on OpenCode 1.1.37+ (native AGENTS.md support)
|
||||
- `gpt-permission-continuation` — resumes GPT sessions only when the last assistant reply ends with a permission-seeking tail like `If you want, ...`. Disable it if you prefer GPT sessions to wait for explicit user follow-up.
|
||||
- `no-sisyphus-gpt` — **do not disable**. It blocks incompatible GPT models for Sisyphus while allowing the dedicated GPT-5.4 prompt path.
|
||||
- `startup-toast` is a sub-feature of `auto-update-checker`. Disable just the toast by adding `startup-toast` to `disabled_hooks`.
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Oh-My-OpenCode Features Reference
|
||||
# Oh-My-OpenAgent Features Reference
|
||||
|
||||
## Agents
|
||||
|
||||
Oh-My-OpenCode provides 11 specialized AI agents. Each has distinct expertise, optimized models, and tool permissions.
|
||||
Oh-My-OpenAgent provides 11 specialized AI agents. Each has distinct expertise, optimized models, and tool permissions.
|
||||
|
||||
### Core Agents
|
||||
|
||||
@@ -90,7 +90,7 @@ When running inside tmux:
|
||||
- Each pane shows agent output live
|
||||
- Auto-cleanup when agents complete
|
||||
|
||||
Customize agent models, prompts, and permissions in `oh-my-opencode.json`.
|
||||
Customize agent models, prompts, and permissions in `oh-my-openagent.json`.
|
||||
|
||||
## Category System
|
||||
|
||||
@@ -111,7 +111,7 @@ By combining these two concepts, you can generate optimal agents through `task`.
|
||||
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
|
||||
| `deep` | `openai/gpt-5.3-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. |
|
||||
| `artistry` | `google/gemini-3.1-pro` (high) | Highly creative/artistic tasks, novel ideas |
|
||||
| `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
|
||||
| `quick` | `openai/gpt-5.4-mini` | Trivial tasks - single file changes, typo fixes, simple modifications |
|
||||
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | Tasks that don't fit other categories, low effort required |
|
||||
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | Tasks that don't fit other categories, high effort required |
|
||||
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
|
||||
@@ -129,7 +129,7 @@ task({
|
||||
|
||||
### Custom Categories
|
||||
|
||||
You can define custom categories in `oh-my-opencode.json`.
|
||||
You can define custom categories in `oh-my-openagent.json`.
|
||||
|
||||
#### Category Configuration Schema
|
||||
|
||||
@@ -237,7 +237,7 @@ Skills provide specialized workflows with embedded MCP servers and detailed inst
|
||||
|
||||
### Browser Automation Options
|
||||
|
||||
Oh-My-OpenCode provides two browser automation providers, configurable via `browser_automation_engine.provider`.
|
||||
Oh-My-OpenAgent provides two browser automation providers, configurable via `browser_automation_engine.provider`.
|
||||
|
||||
#### Option 1: Playwright MCP (Default)
|
||||
|
||||
@@ -558,7 +558,7 @@ Requires `experimental.task_system: true` in config.
|
||||
|
||||
#### Task System Details
|
||||
|
||||
**Note on Claude Code Alignment**: This implementation follows Claude Code's internal Task tool signatures (`TaskCreate`, `TaskUpdate`, `TaskList`, `TaskGet`) and field naming conventions (`subject`, `blockedBy`, `blocks`, etc.). However, Anthropic has not published official documentation for these tools. This is Oh My OpenCode's own implementation based on observed Claude Code behavior and internal specifications.
|
||||
**Note on Claude Code Alignment**: This implementation follows Claude Code's internal Task tool signatures (`TaskCreate`, `TaskUpdate`, `TaskList`, `TaskGet`) and field naming conventions (`subject`, `blockedBy`, `blocks`, etc.). However, Anthropic has not published official documentation for these tools. This is Oh My OpenAgent's own implementation based on observed Claude Code behavior and internal specifications.
|
||||
|
||||
**Task Schema**:
|
||||
|
||||
@@ -680,7 +680,6 @@ Hooks intercept and modify behavior at key points in the agent lifecycle across
|
||||
| **ralph-loop** | Event + Message | Manages self-referential loop continuation. |
|
||||
| **start-work** | Message | Handles /start-work command execution. |
|
||||
| **auto-slash-command** | Message | Automatically executes slash commands from prompts. |
|
||||
| **gpt-permission-continuation** | Event | Auto-continues GPT sessions when the final assistant reply ends with a permission-seeking tail such as `If you want, ...`. |
|
||||
| **stop-continuation-guard** | Event + Message | Guards the stop-continuation mechanism. |
|
||||
| **category-skill-reminder** | Event + PostToolUse | Reminds agents about available category skills for delegation. |
|
||||
| **anthropic-effort** | Params | Adjusts Anthropic API effort level based on context. |
|
||||
@@ -735,7 +734,6 @@ Hooks intercept and modify behavior at key points in the agent lifecycle across
|
||||
|
||||
| Hook | Event | Description |
|
||||
| ------------------------------ | ----- | ---------------------------------------------------------- |
|
||||
| **gpt-permission-continuation** | Event | Continues GPT replies that end in a permission-seeking tail. |
|
||||
| **todo-continuation-enforcer** | Event | Enforces todo completion — yanks idle agents back to work. |
|
||||
| **compaction-todo-preserver** | Event | Preserves todo state during session compaction. |
|
||||
| **unstable-agent-babysitter** | Event | Handles unstable agent behavior with recovery strategies. |
|
||||
@@ -787,12 +785,10 @@ Disable specific hooks in config:
|
||||
|
||||
```json
|
||||
{
|
||||
"disabled_hooks": ["comment-checker", "gpt-permission-continuation"]
|
||||
"disabled_hooks": ["comment-checker"]
|
||||
}
|
||||
```
|
||||
|
||||
Use `gpt-permission-continuation` when you want GPT sessions to stop at permission-seeking endings instead of auto-resuming.
|
||||
|
||||
## MCPs
|
||||
|
||||
### Built-in MCPs
|
||||
@@ -848,7 +844,7 @@ When a skill MCP has `oauth` configured:
|
||||
Pre-authenticate via CLI:
|
||||
|
||||
```bash
|
||||
bunx oh-my-opencode mcp oauth login <server-name> --server-url https://api.example.com
|
||||
bunx oh-my-openagent mcp oauth login <server-name> --server-url https://api.example.com
|
||||
```
|
||||
|
||||
## Context Injection
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
### Problem
|
||||
|
||||
When using Ollama as a provider with oh-my-opencode agents, you may encounter:
|
||||
When using Ollama as a provider with oh-my-openagent agents, you may encounter:
|
||||
|
||||
```
|
||||
JSON Parse error: Unexpected EOF
|
||||
@@ -26,7 +26,7 @@ Claude Code SDK expects a single JSON object, not multiple NDJSON lines, causing
|
||||
**Why this happens:**
|
||||
- **Ollama API**: Returns streaming responses as NDJSON by design
|
||||
- **Claude Code SDK**: Doesn't properly handle NDJSON responses for tool calls
|
||||
- **oh-my-opencode**: Passes through the SDK's behavior (can't fix at this layer)
|
||||
- **oh-my-openagent**: Passes through the SDK's behavior (can't fix at this layer)
|
||||
|
||||
## Solutions
|
||||
|
||||
@@ -114,7 +114,7 @@ curl -s http://localhost:11434/api/chat \
|
||||
|
||||
## Related Issues
|
||||
|
||||
- **oh-my-opencode**: https://github.com/code-yeongyu/oh-my-openagent/issues/1124
|
||||
- **oh-my-openagent**: https://github.com/code-yeongyu/oh-my-openagent/issues/1124
|
||||
- **Ollama API Docs**: https://github.com/ollama/ollama/blob/main/docs/api.md
|
||||
|
||||
## Getting Help
|
||||
|
||||
@@ -101,7 +101,9 @@ async function main() {
|
||||
console.log("\n✅ All platform binaries built successfully!\n");
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error("Fatal error:", error);
|
||||
process.exit(1);
|
||||
});
|
||||
if (import.meta.main) {
|
||||
main().catch((error) => {
|
||||
console.error("Fatal error:", error);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2239,6 +2239,70 @@
|
||||
"created_at": "2026-03-17T20:42:42Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2656
|
||||
},
|
||||
{
|
||||
"name": "walioo",
|
||||
"id": 25835823,
|
||||
"comment_id": 4087098221,
|
||||
"created_at": "2026-03-19T02:13:02Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2688
|
||||
},
|
||||
{
|
||||
"name": "trafgals",
|
||||
"id": 6454757,
|
||||
"comment_id": 4087725932,
|
||||
"created_at": "2026-03-19T04:22:32Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2690
|
||||
},
|
||||
{
|
||||
"name": "tonymfer",
|
||||
"id": 66512584,
|
||||
"comment_id": 4091847232,
|
||||
"created_at": "2026-03-19T17:13:49Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2701
|
||||
},
|
||||
{
|
||||
"name": "nguyentamdat",
|
||||
"id": 16253213,
|
||||
"comment_id": 4096267323,
|
||||
"created_at": "2026-03-20T07:34:22Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2718
|
||||
},
|
||||
{
|
||||
"name": "whackur",
|
||||
"id": 26926041,
|
||||
"comment_id": 4102330445,
|
||||
"created_at": "2026-03-21T05:27:17Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2733
|
||||
},
|
||||
{
|
||||
"name": "ndaemy",
|
||||
"id": 18691542,
|
||||
"comment_id": 4103008804,
|
||||
"created_at": "2026-03-21T10:18:22Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2734
|
||||
},
|
||||
{
|
||||
"name": "0xYiliu",
|
||||
"id": 3838688,
|
||||
"comment_id": 4104738337,
|
||||
"created_at": "2026-03-21T22:59:33Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2738
|
||||
},
|
||||
{
|
||||
"name": "hunghoang3011",
|
||||
"id": 65234777,
|
||||
"comment_id": 4107900881,
|
||||
"created_at": "2026-03-23T04:28:20Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 2758
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -14,7 +14,7 @@ Entry point `index.ts` orchestrates 5-step initialization: loadConfig → create
|
||||
| `plugin-config.ts` | JSONC parse, multi-level merge, Zod v4 validation |
|
||||
| `create-managers.ts` | TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler |
|
||||
| `create-tools.ts` | SkillContext + AvailableCategories + ToolRegistry (26 tools) |
|
||||
| `create-hooks.ts` | 3-tier: Core(37) + Continuation(7) + Skill(2) = 46 hooks |
|
||||
| `create-hooks.ts` | 3-tier: Core(39) + Continuation(7) + Skill(2) = 48 hooks |
|
||||
| `plugin-interface.ts` | 8 OpenCode hook handlers: config, tool, chat.message, chat.params, chat.headers, event, tool.execute.before, tool.execute.after |
|
||||
|
||||
## CONFIG LOADING
|
||||
@@ -32,10 +32,10 @@ loadPluginConfig(directory, ctx)
|
||||
|
||||
```
|
||||
createHooks()
|
||||
├─→ createCoreHooks() # 37 hooks
|
||||
├─→ createCoreHooks() # 39 hooks
|
||||
│ ├─ createSessionHooks() # 23: contextWindowMonitor, thinkMode, ralphLoop, modelFallback, runtimeFallback, noSisyphusGpt, noHephaestusNonGpt, anthropicEffort, intentGate...
|
||||
│ ├─ createToolGuardHooks() # 10: commentChecker, rulesInjector, writeExistingFileGuard, jsonErrorRecovery, hashlineReadEnhancer...
|
||||
│ ├─ createToolGuardHooks() # 12: commentChecker, rulesInjector, writeExistingFileGuard, jsonErrorRecovery, hashlineReadEnhancer...
|
||||
│ └─ createTransformHooks() # 4: claudeCodeHooks, keywordDetector, contextInjector, thinkingBlockValidator
|
||||
├─→ createContinuationHooks() # 7: todoContinuationEnforcer, atlas, stopContinuationGuard, ralphLoopActivator...
|
||||
├─→ createContinuationHooks() # 7: todoContinuationEnforcer, atlas, stopContinuationGuard, compactionContextInjector...
|
||||
└─→ createSkillHooks() # 2: categorySkillReminder, autoSlashCommand
|
||||
```
|
||||
|
||||
@@ -39,7 +39,7 @@ export function maybeCreateAtlasConfig(input: {
|
||||
const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]
|
||||
|
||||
const atlasResolution = applyModelResolution({
|
||||
uiSelectedModel: orchestratorOverride?.model ? undefined : uiSelectedModel,
|
||||
uiSelectedModel: orchestratorOverride?.model !== undefined ? undefined : uiSelectedModel,
|
||||
userModel: orchestratorOverride?.model,
|
||||
requirement: atlasRequirement,
|
||||
availableModels,
|
||||
|
||||
@@ -69,7 +69,7 @@ export function collectPendingBuiltinAgents(input: {
|
||||
const isPrimaryAgent = isFactory(source) && source.mode === "primary"
|
||||
|
||||
let resolution = applyModelResolution({
|
||||
uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
|
||||
uiSelectedModel: (isPrimaryAgent && override?.model === undefined) ? uiSelectedModel : undefined,
|
||||
userModel: override?.model,
|
||||
requirement,
|
||||
availableModels,
|
||||
|
||||
@@ -52,7 +52,7 @@ export function maybeCreateSisyphusConfig(input: {
|
||||
if (disabledAgents.includes("sisyphus") || !meetsSisyphusAnyModelRequirement) return undefined
|
||||
|
||||
let sisyphusResolution = applyModelResolution({
|
||||
uiSelectedModel: sisyphusOverride?.model ? undefined : uiSelectedModel,
|
||||
uiSelectedModel: sisyphusOverride?.model !== undefined ? undefined : uiSelectedModel,
|
||||
userModel: sisyphusOverride?.model,
|
||||
requirement: sisyphusRequirement,
|
||||
availableModels,
|
||||
|
||||
@@ -181,7 +181,7 @@ describe("buildParallelDelegationSection", () => {
|
||||
|
||||
it("#given non-Claude model with deep category #when building #then returns aggressive delegation section", () => {
|
||||
//#given
|
||||
const model = "google/gemini-3-pro"
|
||||
const model = "google/gemini-3.1-pro"
|
||||
const categories = [deepCategory, otherCategory]
|
||||
|
||||
//#when
|
||||
@@ -237,7 +237,7 @@ describe("buildParallelDelegationSection", () => {
|
||||
describe("buildNonClaudePlannerSection", () => {
|
||||
it("#given non-Claude model #when building #then returns plan agent section", () => {
|
||||
//#given
|
||||
const model = "google/gemini-3-pro"
|
||||
const model = "google/gemini-3.1-pro"
|
||||
|
||||
//#when
|
||||
const result = buildNonClaudePlannerSection(model)
|
||||
@@ -272,4 +272,3 @@ describe("buildNonClaudePlannerSection", () => {
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
|
||||
@@ -162,6 +162,10 @@ Asking the user is the LAST resort after exhausting creative alternatives.
|
||||
- User asks a question implying work → Answer briefly, DO the implied work in the same turn
|
||||
- You wrote a plan in your response → EXECUTE the plan before ending turn — plans are starting lines, not finish lines
|
||||
|
||||
### Task Scope Clarification
|
||||
|
||||
You handle multi-step sub-tasks of a SINGLE GOAL. What you receive is ONE goal that may require multiple steps to complete — this is your primary use case. Only reject when given MULTIPLE INDEPENDENT goals in one request.
|
||||
|
||||
## Hard Constraints
|
||||
|
||||
${hardBlocks}
|
||||
|
||||
@@ -121,6 +121,10 @@ When blocked: try a different approach → decompose the problem → challenge a
|
||||
- User asks a question implying work → Answer briefly, DO the implied work in the same turn
|
||||
- You wrote a plan in your response → EXECUTE the plan before ending turn — plans are starting lines, not finish lines
|
||||
|
||||
### Task Scope Clarification
|
||||
|
||||
You handle multi-step sub-tasks of a SINGLE GOAL. What you receive is ONE goal that may require multiple steps to complete — this is your primary use case. Only reject when given MULTIPLE INDEPENDENT goals in one request.
|
||||
|
||||
## Hard Constraints
|
||||
|
||||
${hardBlocks}
|
||||
|
||||
@@ -112,6 +112,10 @@ Asking the user is the LAST resort after exhausting creative alternatives.
|
||||
- Note assumptions in final message, not as questions mid-work
|
||||
- Need context? Fire explore/librarian in background IMMEDIATELY — continue only with non-overlapping work while they search
|
||||
|
||||
### Task Scope Clarification
|
||||
|
||||
You handle multi-step sub-tasks of a SINGLE GOAL. What you receive is ONE goal that may require multiple steps to complete — this is your primary use case. Only reject when given MULTIPLE INDEPENDENT goals in one request.
|
||||
|
||||
## Hard Constraints
|
||||
|
||||
${hardBlocks}
|
||||
|
||||
42
src/agents/prometheus/system-prompt.test.ts
Normal file
42
src/agents/prometheus/system-prompt.test.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { describe, it, expect } from "bun:test"
|
||||
import { getPrometheusPrompt } from "./system-prompt"
|
||||
|
||||
describe("getPrometheusPrompt", () => {
|
||||
describe("#given question tool is not disabled", () => {
|
||||
describe("#when generating prompt", () => {
|
||||
it("#then should include Question tool references", () => {
|
||||
const prompt = getPrometheusPrompt(undefined, [])
|
||||
|
||||
expect(prompt).toContain("Question({")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given question tool is disabled via disabled_tools", () => {
|
||||
describe("#when generating prompt", () => {
|
||||
it("#then should strip Question tool code examples", () => {
|
||||
const prompt = getPrometheusPrompt(undefined, ["question"])
|
||||
|
||||
expect(prompt).not.toContain("Question({")
|
||||
})
|
||||
})
|
||||
|
||||
describe("#when disabled_tools includes question among other tools", () => {
|
||||
it("#then should strip Question tool code examples", () => {
|
||||
const prompt = getPrometheusPrompt(undefined, ["todowrite", "question", "interactive_bash"])
|
||||
|
||||
expect(prompt).not.toContain("Question({")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given no disabled_tools provided", () => {
|
||||
describe("#when generating prompt with undefined", () => {
|
||||
it("#then should include Question tool references", () => {
|
||||
const prompt = getPrometheusPrompt(undefined, undefined)
|
||||
|
||||
expect(prompt).toContain("Question({")
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -52,16 +52,34 @@ export function getPrometheusPromptSource(model?: string): PrometheusPromptSourc
|
||||
* Gemini models → Gemini-optimized prompt (aggressive tool-call enforcement, thinking checkpoints)
|
||||
* Default (Claude, etc.) → Claude-optimized prompt (modular sections)
|
||||
*/
|
||||
export function getPrometheusPrompt(model?: string): string {
|
||||
export function getPrometheusPrompt(model?: string, disabledTools?: readonly string[]): string {
|
||||
const source = getPrometheusPromptSource(model)
|
||||
const isQuestionDisabled = disabledTools?.includes("question") ?? false
|
||||
|
||||
let prompt: string
|
||||
switch (source) {
|
||||
case "gpt":
|
||||
return getGptPrometheusPrompt()
|
||||
prompt = getGptPrometheusPrompt()
|
||||
break
|
||||
case "gemini":
|
||||
return getGeminiPrometheusPrompt()
|
||||
prompt = getGeminiPrometheusPrompt()
|
||||
break
|
||||
case "default":
|
||||
default:
|
||||
return PROMETHEUS_SYSTEM_PROMPT
|
||||
prompt = PROMETHEUS_SYSTEM_PROMPT
|
||||
}
|
||||
|
||||
if (isQuestionDisabled) {
|
||||
prompt = stripQuestionToolReferences(prompt)
|
||||
}
|
||||
|
||||
return prompt
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes Question tool usage examples from prompt text when question tool is disabled.
|
||||
*/
|
||||
function stripQuestionToolReferences(prompt: string): string {
|
||||
// Remove Question({...}) code blocks (multi-line)
|
||||
return prompt.replace(/```typescript\n\s*Question\(\{[\s\S]*?\}\)\s*\n```/g, "")
|
||||
}
|
||||
|
||||
@@ -248,8 +248,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "openai/gpt-5.3-codex",
|
||||
"variant": "low",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "openai/gpt-5.4",
|
||||
@@ -334,8 +333,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "openai/gpt-5.3-codex",
|
||||
"variant": "low",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "openai/gpt-5.4",
|
||||
@@ -533,7 +531,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "anthropic/claude-haiku-4-5",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "openai/gpt-5.4",
|
||||
@@ -608,7 +606,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "anthropic/claude-haiku-4-5",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "openai/gpt-5.4",
|
||||
@@ -684,7 +682,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "opencode/claude-haiku-4-5",
|
||||
"model": "opencode/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "opencode/gpt-5.4",
|
||||
@@ -759,7 +757,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "opencode/claude-haiku-4-5",
|
||||
"model": "opencode/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "opencode/gpt-5.4",
|
||||
@@ -830,7 +828,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
|
||||
"variant": "high",
|
||||
},
|
||||
"quick": {
|
||||
"model": "github-copilot/claude-haiku-4.5",
|
||||
"model": "github-copilot/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
@@ -900,7 +898,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
|
||||
"variant": "high",
|
||||
},
|
||||
"quick": {
|
||||
"model": "github-copilot/claude-haiku-4.5",
|
||||
"model": "github-copilot/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "github-copilot/gemini-3.1-pro-preview",
|
||||
@@ -1092,7 +1090,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "anthropic/claude-haiku-4-5",
|
||||
"model": "opencode/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "opencode/gpt-5.4",
|
||||
@@ -1167,7 +1165,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "github-copilot/claude-haiku-4.5",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "openai/gpt-5.4",
|
||||
@@ -1375,7 +1373,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "github-copilot/claude-haiku-4.5",
|
||||
"model": "github-copilot/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "opencode/gpt-5.4",
|
||||
@@ -1453,7 +1451,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "anthropic/claude-haiku-4-5",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "openai/gpt-5.4",
|
||||
@@ -1531,7 +1529,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
|
||||
"variant": "medium",
|
||||
},
|
||||
"quick": {
|
||||
"model": "anthropic/claude-haiku-4-5",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
},
|
||||
"ultrabrain": {
|
||||
"model": "openai/gpt-5.4",
|
||||
|
||||
@@ -42,7 +42,7 @@ Examples:
|
||||
Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
|
||||
Claude Native anthropic/ models (Opus, Sonnet, Haiku)
|
||||
OpenAI Native openai/ models (GPT-5.4 for Oracle)
|
||||
Gemini Native google/ models (Gemini 3 Pro, Flash)
|
||||
Gemini Native google/ models (Gemini 3.1 Pro, Flash)
|
||||
Copilot github-copilot/ models (fallback)
|
||||
OpenCode Zen opencode/ models (opencode/claude-opus-4-6, etc.)
|
||||
Z.ai zai-coding-plan/glm-5 (visual-engineering fallback)
|
||||
|
||||
@@ -2,15 +2,15 @@ import { readFileSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
|
||||
import { OhMyOpenCodeConfigSchema } from "../../../config"
|
||||
import { detectConfigFile, getOpenCodeConfigDir, parseJsonc } from "../../../shared"
|
||||
import { detectPluginConfigFile, getOpenCodeConfigDir, parseJsonc } from "../../../shared"
|
||||
import { CHECK_IDS, CHECK_NAMES, PACKAGE_NAME } from "../constants"
|
||||
import type { CheckResult, DoctorIssue } from "../types"
|
||||
import { loadAvailableModelsFromCache } from "./model-resolution-cache"
|
||||
import { getModelResolutionInfoWithOverrides } from "./model-resolution"
|
||||
import type { OmoConfig } from "./model-resolution-types"
|
||||
|
||||
const USER_CONFIG_BASE = join(getOpenCodeConfigDir({ binary: "opencode" }), PACKAGE_NAME)
|
||||
const PROJECT_CONFIG_BASE = join(process.cwd(), ".opencode", PACKAGE_NAME)
|
||||
const USER_CONFIG_DIR = getOpenCodeConfigDir({ binary: "opencode" })
|
||||
const PROJECT_CONFIG_DIR = join(process.cwd(), ".opencode")
|
||||
|
||||
interface ConfigValidationResult {
|
||||
exists: boolean
|
||||
@@ -21,10 +21,10 @@ interface ConfigValidationResult {
|
||||
}
|
||||
|
||||
function findConfigPath(): string | null {
|
||||
const projectConfig = detectConfigFile(PROJECT_CONFIG_BASE)
|
||||
const projectConfig = detectPluginConfigFile(PROJECT_CONFIG_DIR)
|
||||
if (projectConfig.format !== "none") return projectConfig.path
|
||||
|
||||
const userConfig = detectConfigFile(USER_CONFIG_BASE)
|
||||
const userConfig = detectPluginConfigFile(USER_CONFIG_DIR)
|
||||
if (userConfig.format !== "none") return userConfig.path
|
||||
|
||||
return null
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
import { readFileSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
import { detectConfigFile, getOpenCodeConfigPaths, parseJsonc } from "../../../shared"
|
||||
import { detectPluginConfigFile, getOpenCodeConfigPaths, parseJsonc } from "../../../shared"
|
||||
import type { OmoConfig } from "./model-resolution-types"
|
||||
|
||||
const PACKAGE_NAME = "oh-my-opencode"
|
||||
const USER_CONFIG_BASE = join(
|
||||
getOpenCodeConfigPaths({ binary: "opencode", version: null }).configDir,
|
||||
PACKAGE_NAME
|
||||
)
|
||||
const PROJECT_CONFIG_BASE = join(process.cwd(), ".opencode", PACKAGE_NAME)
|
||||
const USER_CONFIG_DIR = getOpenCodeConfigPaths({ binary: "opencode", version: null }).configDir
|
||||
const PROJECT_CONFIG_DIR = join(process.cwd(), ".opencode")
|
||||
|
||||
export function loadOmoConfig(): OmoConfig | null {
|
||||
const projectDetected = detectConfigFile(PROJECT_CONFIG_BASE)
|
||||
const projectDetected = detectPluginConfigFile(PROJECT_CONFIG_DIR)
|
||||
if (projectDetected.format !== "none") {
|
||||
try {
|
||||
const content = readFileSync(projectDetected.path, "utf-8")
|
||||
@@ -21,7 +17,7 @@ export function loadOmoConfig(): OmoConfig | null {
|
||||
}
|
||||
}
|
||||
|
||||
const userDetected = detectConfigFile(USER_CONFIG_BASE)
|
||||
const userDetected = detectPluginConfigFile(USER_CONFIG_DIR)
|
||||
if (userDetected.format !== "none") {
|
||||
try {
|
||||
const content = readFileSync(userDetected.path, "utf-8")
|
||||
|
||||
@@ -53,6 +53,14 @@ describe("install CLI - binary check behavior", () => {
|
||||
isOpenCodeInstalledSpy = spyOn(configManager, "isOpenCodeInstalled").mockResolvedValue(false)
|
||||
getOpenCodeVersionSpy = spyOn(configManager, "getOpenCodeVersion").mockResolvedValue(null)
|
||||
|
||||
// given mock npm fetch
|
||||
globalThis.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ latest: "3.0.0" }),
|
||||
} as Response)
|
||||
) as unknown as typeof fetch
|
||||
|
||||
const args: InstallArgs = {
|
||||
tui: false,
|
||||
claude: "yes",
|
||||
|
||||
@@ -40,7 +40,7 @@ describe("generateModelConfig OpenAI-only model catalog", () => {
|
||||
|
||||
// #then
|
||||
expect(result.categories?.artistry).toEqual({ model: "openai/gpt-5.4", variant: "xhigh" })
|
||||
expect(result.categories?.quick).toEqual({ model: "openai/gpt-5.3-codex", variant: "low" })
|
||||
expect(result.categories?.quick).toEqual({ model: "openai/gpt-5.4-mini" })
|
||||
expect(result.categories?.["visual-engineering"]).toEqual({ model: "openai/gpt-5.4", variant: "high" })
|
||||
expect(result.categories?.writing).toEqual({ model: "openai/gpt-5.4", variant: "medium" })
|
||||
})
|
||||
@@ -55,6 +55,6 @@ describe("generateModelConfig OpenAI-only model catalog", () => {
|
||||
// #then
|
||||
expect(result.agents?.explore).toEqual({ model: "opencode-go/minimax-m2.5" })
|
||||
expect(result.agents?.librarian).toEqual({ model: "opencode-go/minimax-m2.5" })
|
||||
expect(result.categories?.quick).toEqual({ model: "opencode-go/minimax-m2.5" })
|
||||
expect(result.categories?.quick).toEqual({ model: "openai/gpt-5.4-mini" })
|
||||
})
|
||||
})
|
||||
|
||||
@@ -7,7 +7,7 @@ const OPENAI_ONLY_AGENT_OVERRIDES: Record<string, AgentConfig> = {
|
||||
|
||||
const OPENAI_ONLY_CATEGORY_OVERRIDES: Record<string, CategoryConfig> = {
|
||||
artistry: { model: "openai/gpt-5.4", variant: "xhigh" },
|
||||
quick: { model: "openai/gpt-5.3-codex", variant: "low" },
|
||||
quick: { model: "openai/gpt-5.4-mini" },
|
||||
"visual-engineering": { model: "openai/gpt-5.4", variant: "high" },
|
||||
writing: { model: "openai/gpt-5.4", variant: "medium" },
|
||||
}
|
||||
|
||||
@@ -115,6 +115,42 @@ describe("waitForEventProcessorShutdown", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("run environment setup", () => {
|
||||
let originalClient: string | undefined
|
||||
let originalRunMode: string | undefined
|
||||
|
||||
beforeEach(() => {
|
||||
originalClient = process.env.OPENCODE_CLIENT
|
||||
originalRunMode = process.env.OPENCODE_CLI_RUN_MODE
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
if (originalClient === undefined) {
|
||||
delete process.env.OPENCODE_CLIENT
|
||||
} else {
|
||||
process.env.OPENCODE_CLIENT = originalClient
|
||||
}
|
||||
if (originalRunMode === undefined) {
|
||||
delete process.env.OPENCODE_CLI_RUN_MODE
|
||||
} else {
|
||||
process.env.OPENCODE_CLI_RUN_MODE = originalRunMode
|
||||
}
|
||||
})
|
||||
|
||||
it("sets OPENCODE_CLIENT to 'run' to exclude question tool from registry", async () => {
|
||||
//#given
|
||||
delete process.env.OPENCODE_CLIENT
|
||||
|
||||
//#when - run() sets env vars synchronously before any async work
|
||||
const { run } = await import(`./runner?env-setup-${Date.now()}`)
|
||||
run({ message: "test" }).catch(() => {})
|
||||
|
||||
//#then
|
||||
expect(String(process.env.OPENCODE_CLIENT)).toBe("run")
|
||||
expect(String(process.env.OPENCODE_CLI_RUN_MODE)).toBe("true")
|
||||
})
|
||||
})
|
||||
|
||||
describe("run with invalid model", () => {
|
||||
it("given invalid --model value, when run, then returns exit code 1 with error message", async () => {
|
||||
// given
|
||||
|
||||
@@ -31,6 +31,7 @@ export async function waitForEventProcessorShutdown(
|
||||
|
||||
export async function run(options: RunOptions): Promise<number> {
|
||||
process.env.OPENCODE_CLI_RUN_MODE = "true"
|
||||
process.env.OPENCODE_CLIENT = "run"
|
||||
|
||||
const startTime = Date.now()
|
||||
const {
|
||||
|
||||
@@ -54,7 +54,7 @@ export async function promptInstallConfig(detected: DetectedConfig): Promise<Ins
|
||||
message: "Will you integrate Google Gemini?",
|
||||
options: [
|
||||
{ value: "no", label: "No", hint: "Frontend/docs agents will use fallback" },
|
||||
{ value: "yes", label: "Yes", hint: "Beautiful UI generation with Gemini 3 Pro" },
|
||||
{ value: "yes", label: "Yes", hint: "Beautiful UI generation with Gemini 3.1 Pro" },
|
||||
],
|
||||
initialValue: initial.gemini,
|
||||
})
|
||||
|
||||
@@ -14,7 +14,7 @@ config/schema/
|
||||
├── agent-names.ts # BuiltinAgentNameSchema (11), OverridableAgentNameSchema (14)
|
||||
├── agent-overrides.ts # AgentOverrideConfigSchema (21 fields per agent)
|
||||
├── categories.ts # 8 built-in + custom categories
|
||||
├── hooks.ts # HookNameSchema (46 hooks)
|
||||
├── hooks.ts # HookNameSchema (48 hooks)
|
||||
├── skills.ts # SkillsConfigSchema (sources, paths, recursive)
|
||||
├── commands.ts # BuiltinCommandNameSchema
|
||||
├── experimental.ts # Feature flags (plugin_load_timeout_ms min 1000)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { z } from "zod"
|
||||
|
||||
export const HookNameSchema = z.enum([
|
||||
"gpt-permission-continuation",
|
||||
"todo-continuation-enforcer",
|
||||
"context-window-monitor",
|
||||
"session-recovery",
|
||||
@@ -52,6 +51,7 @@ export const HookNameSchema = z.enum([
|
||||
"hashline-read-enhancer",
|
||||
"read-image-resizer",
|
||||
"todo-description-override",
|
||||
"webfetch-redirect-guard",
|
||||
])
|
||||
|
||||
export type HookName = z.infer<typeof HookNameSchema>
|
||||
|
||||
@@ -12,6 +12,7 @@ import { BuiltinCommandNameSchema } from "./commands"
|
||||
import { ExperimentalConfigSchema } from "./experimental"
|
||||
import { GitMasterConfigSchema } from "./git-master"
|
||||
import { NotificationConfigSchema } from "./notification"
|
||||
import { OpenClawConfigSchema } from "./openclaw"
|
||||
import { RalphLoopConfigSchema } from "./ralph-loop"
|
||||
import { RuntimeFallbackConfigSchema } from "./runtime-fallback"
|
||||
import { SkillsConfigSchema } from "./skills"
|
||||
@@ -55,6 +56,7 @@ export const OhMyOpenCodeConfigSchema = z.object({
|
||||
runtime_fallback: z.union([z.boolean(), RuntimeFallbackConfigSchema]).optional(),
|
||||
background_task: BackgroundTaskConfigSchema.optional(),
|
||||
notification: NotificationConfigSchema.optional(),
|
||||
openclaw: OpenClawConfigSchema.optional(),
|
||||
babysitting: BabysittingConfigSchema.optional(),
|
||||
git_master: GitMasterConfigSchema.optional(),
|
||||
browser_automation_engine: BrowserAutomationConfigSchema.optional(),
|
||||
|
||||
50
src/config/schema/openclaw.ts
Normal file
50
src/config/schema/openclaw.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { z } from "zod"
|
||||
|
||||
export const OpenClawGatewaySchema = z.object({
|
||||
type: z.enum(["http", "command"]).default("http"),
|
||||
// HTTP specific
|
||||
url: z.string().optional(),
|
||||
method: z.string().default("POST"),
|
||||
headers: z.record(z.string(), z.string()).optional(),
|
||||
// Command specific
|
||||
command: z.string().optional(),
|
||||
// Shared
|
||||
timeout: z.number().optional(),
|
||||
})
|
||||
|
||||
export const OpenClawHookSchema = z.object({
|
||||
enabled: z.boolean().default(true),
|
||||
gateway: z.string(),
|
||||
instruction: z.string(),
|
||||
})
|
||||
|
||||
export const OpenClawReplyListenerConfigSchema = z.object({
|
||||
discordBotToken: z.string().optional(),
|
||||
discordChannelId: z.string().optional(),
|
||||
discordMention: z.string().optional(), // For allowed_mentions
|
||||
authorizedDiscordUserIds: z.array(z.string()).default([]),
|
||||
|
||||
telegramBotToken: z.string().optional(),
|
||||
telegramChatId: z.string().optional(),
|
||||
|
||||
pollIntervalMs: z.number().default(3000),
|
||||
rateLimitPerMinute: z.number().default(10),
|
||||
maxMessageLength: z.number().default(500),
|
||||
includePrefix: z.boolean().default(true),
|
||||
})
|
||||
|
||||
export const OpenClawConfigSchema = z.object({
|
||||
enabled: z.boolean().default(false),
|
||||
|
||||
// Outbound Configuration
|
||||
gateways: z.record(z.string(), OpenClawGatewaySchema).default({}),
|
||||
hooks: z.record(z.string(), OpenClawHookSchema).default({}),
|
||||
|
||||
// Inbound Configuration (Reply Listener)
|
||||
replyListener: OpenClawReplyListenerConfigSchema.optional(),
|
||||
})
|
||||
|
||||
export type OpenClawConfig = z.infer<typeof OpenClawConfigSchema>
|
||||
export type OpenClawGateway = z.infer<typeof OpenClawGatewaySchema>
|
||||
export type OpenClawHook = z.infer<typeof OpenClawHookSchema>
|
||||
export type OpenClawReplyListenerConfig = z.infer<typeof OpenClawReplyListenerConfigSchema>
|
||||
@@ -4,9 +4,9 @@ import type { BackgroundTask, LaunchInput } from "./types"
|
||||
export const TASK_TTL_MS = 30 * 60 * 1000
|
||||
export const TERMINAL_TASK_TTL_MS = 30 * 60 * 1000
|
||||
export const MIN_STABILITY_TIME_MS = 10 * 1000
|
||||
export const DEFAULT_STALE_TIMEOUT_MS = 1_200_000
|
||||
export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
|
||||
export const DEFAULT_MAX_TOOL_CALLS = 200
|
||||
export const DEFAULT_STALE_TIMEOUT_MS = 2_700_000
|
||||
export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 3_600_000
|
||||
export const DEFAULT_MAX_TOOL_CALLS = 4000
|
||||
export const DEFAULT_CIRCUIT_BREAKER_CONSECUTIVE_THRESHOLD = 20
|
||||
export const DEFAULT_CIRCUIT_BREAKER_ENABLED = true
|
||||
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
|
||||
|
||||
@@ -21,9 +21,9 @@ function createRunningTask(startedAt: Date): BackgroundTask {
|
||||
}
|
||||
|
||||
describe("DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS", () => {
|
||||
test("uses a 30 minute default", () => {
|
||||
test("uses a 60 minute default", () => {
|
||||
// #given
|
||||
const expectedTimeout = 30 * 60 * 1000
|
||||
const expectedTimeout = 60 * 60 * 1000
|
||||
|
||||
// #when
|
||||
const timeout = DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS
|
||||
|
||||
@@ -4,9 +4,9 @@ const { describe, expect, test } = require("bun:test")
|
||||
import { DEFAULT_STALE_TIMEOUT_MS } from "./constants"
|
||||
|
||||
describe("DEFAULT_STALE_TIMEOUT_MS", () => {
|
||||
test("uses a 20 minute default", () => {
|
||||
test("uses a 45 minute default", () => {
|
||||
// #given
|
||||
const expectedTimeout = 20 * 60 * 1000
|
||||
const expectedTimeout = 45 * 60 * 1000
|
||||
|
||||
// #when
|
||||
const timeout = DEFAULT_STALE_TIMEOUT_MS
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import {
|
||||
createToolCallSignature,
|
||||
@@ -19,7 +21,7 @@ function buildWindow(
|
||||
}
|
||||
|
||||
function buildWindowWithInputs(
|
||||
calls: Array<{ tool: string; input?: Record<string, unknown> }>,
|
||||
calls: Array<{ tool: string; input?: Record<string, unknown> | null }>,
|
||||
override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
|
||||
) {
|
||||
const settings = resolveCircuitBreakerSettings(override)
|
||||
@@ -148,7 +150,12 @@ describe("loop-detector", () => {
|
||||
|
||||
describe("#given the same tool is called consecutively", () => {
|
||||
test("#when evaluated #then it triggers", () => {
|
||||
const window = buildWindow(Array.from({ length: 20 }, () => "read"))
|
||||
const window = buildWindowWithInputs(
|
||||
Array.from({ length: 20 }, () => ({
|
||||
tool: "read",
|
||||
input: { filePath: "/src/same.ts" },
|
||||
}))
|
||||
)
|
||||
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
|
||||
@@ -176,7 +183,12 @@ describe("loop-detector", () => {
|
||||
|
||||
describe("#given threshold boundary", () => {
|
||||
test("#when below threshold #then it does not trigger", () => {
|
||||
const belowThresholdWindow = buildWindow(Array.from({ length: 19 }, () => "read"))
|
||||
const belowThresholdWindow = buildWindowWithInputs(
|
||||
Array.from({ length: 19 }, () => ({
|
||||
tool: "read",
|
||||
input: { filePath: "/src/same.ts" },
|
||||
}))
|
||||
)
|
||||
|
||||
const result = detectRepetitiveToolUse(belowThresholdWindow)
|
||||
|
||||
@@ -184,7 +196,12 @@ describe("loop-detector", () => {
|
||||
})
|
||||
|
||||
test("#when equal to threshold #then it triggers", () => {
|
||||
const atThresholdWindow = buildWindow(Array.from({ length: 20 }, () => "read"))
|
||||
const atThresholdWindow = buildWindowWithInputs(
|
||||
Array.from({ length: 20 }, () => ({
|
||||
tool: "read",
|
||||
input: { filePath: "/src/same.ts" },
|
||||
}))
|
||||
)
|
||||
|
||||
const result = detectRepetitiveToolUse(atThresholdWindow)
|
||||
|
||||
@@ -224,16 +241,22 @@ describe("loop-detector", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given tool calls with no input", () => {
|
||||
test("#when evaluated #then it triggers", () => {
|
||||
describe("#given tool calls with undefined input", () => {
|
||||
test("#when evaluated #then it does not trigger", () => {
|
||||
const calls = Array.from({ length: 20 }, () => ({ tool: "read" }))
|
||||
const window = buildWindowWithInputs(calls)
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
expect(result).toEqual({
|
||||
triggered: true,
|
||||
toolName: "read",
|
||||
repeatedCount: 20,
|
||||
})
|
||||
expect(result).toEqual({ triggered: false })
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given tool calls with null input", () => {
|
||||
test("#when evaluated #then it does not trigger", () => {
|
||||
const calls = Array.from({ length: 20 }, () => ({ tool: "read", input: null }))
|
||||
const window = buildWindowWithInputs(calls)
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
|
||||
expect(result).toEqual({ triggered: false })
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -36,6 +36,14 @@ export function recordToolCall(
|
||||
settings: CircuitBreakerSettings,
|
||||
toolInput?: Record<string, unknown> | null
|
||||
): ToolCallWindow {
|
||||
if (toolInput === undefined || toolInput === null) {
|
||||
return {
|
||||
lastSignature: `${toolName}::__unknown-input__`,
|
||||
consecutiveCount: 1,
|
||||
threshold: settings.consecutiveThreshold,
|
||||
}
|
||||
}
|
||||
|
||||
const signature = createToolCallSignature(toolName, toolInput)
|
||||
|
||||
if (window && window.lastSignature === signature) {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { tmpdir } from "node:os"
|
||||
@@ -38,8 +40,8 @@ async function flushAsyncWork() {
|
||||
}
|
||||
|
||||
describe("BackgroundManager circuit breaker", () => {
|
||||
describe("#given the same tool is called consecutively", () => {
|
||||
test("#when consecutive tool events arrive #then the task is cancelled", async () => {
|
||||
describe("#given flat-format tool events have no state.input", () => {
|
||||
test("#when 20 consecutive read events arrive #then the task keeps running", async () => {
|
||||
const manager = createManager({
|
||||
circuitBreaker: {
|
||||
consecutiveThreshold: 20,
|
||||
@@ -71,8 +73,8 @@ describe("BackgroundManager circuit breaker", () => {
|
||||
|
||||
await flushAsyncWork()
|
||||
|
||||
expect(task.status).toBe("cancelled")
|
||||
expect(task.error).toContain("read 20 consecutive times")
|
||||
expect(task.status).toBe("running")
|
||||
expect(task.progress?.toolCalls).toBe(20)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -126,7 +128,7 @@ describe("BackgroundManager circuit breaker", () => {
|
||||
})
|
||||
|
||||
describe("#given the absolute cap is configured lower than the repetition detector needs", () => {
|
||||
test("#when the raw tool-call cap is reached #then the backstop still cancels the task", async () => {
|
||||
test("#when repeated flat-format tool events reach maxToolCalls #then the backstop still cancels the task", async () => {
|
||||
const manager = createManager({
|
||||
maxToolCalls: 3,
|
||||
circuitBreaker: {
|
||||
@@ -150,10 +152,10 @@ describe("BackgroundManager circuit breaker", () => {
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
for (const toolName of ["read", "grep", "edit"]) {
|
||||
for (let i = 0; i < 3; i++) {
|
||||
manager.handleEvent({
|
||||
type: "message.part.updated",
|
||||
properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
|
||||
properties: { sessionID: task.sessionID, type: "tool", tool: "read" },
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
declare const require: (name: string) => any
|
||||
const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
|
||||
const { describe, test, expect, beforeEach, afterEach, spyOn } = require("bun:test")
|
||||
import { tmpdir } from "node:os"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import type { BackgroundTask, ResumeInput } from "./types"
|
||||
@@ -1668,7 +1668,7 @@ describe("BackgroundManager.resume model persistence", () => {
|
||||
// then - model should be passed in prompt body
|
||||
expect(promptCalls).toHaveLength(1)
|
||||
expect(promptCalls[0].body.model).toEqual({ providerID: "anthropic", modelID: "claude-sonnet-4-20250514" })
|
||||
expect(promptCalls[0].body.agent).toBe("explore")
|
||||
expect("agent" in promptCalls[0].body).toBe(false)
|
||||
})
|
||||
|
||||
test("should NOT pass model when task has no model (backward compatibility)", async () => {
|
||||
@@ -1806,9 +1806,9 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
|
||||
expect(task.sessionID).toBeUndefined()
|
||||
})
|
||||
|
||||
test("should return immediately even with concurrency limit", async () => {
|
||||
// given
|
||||
const config = { defaultConcurrency: 1 }
|
||||
test("should return immediately even with concurrency limit", async () => {
|
||||
// given
|
||||
const config = { defaultConcurrency: 1 }
|
||||
manager.shutdown()
|
||||
manager = new BackgroundManager({ client: mockClient, directory: tmpdir() } as unknown as PluginInput, config)
|
||||
|
||||
@@ -1828,9 +1828,76 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
|
||||
|
||||
// then
|
||||
expect(endTime - startTime).toBeLessThan(100) // Should be instant
|
||||
expect(task1.status).toBe("pending")
|
||||
expect(task2.status).toBe("pending")
|
||||
expect(task1.status).toBe("pending")
|
||||
expect(task2.status).toBe("pending")
|
||||
})
|
||||
|
||||
test("should omit agent when launch has model and keep agent without model", async () => {
|
||||
// given
|
||||
const promptBodies: Array<Record<string, unknown>> = []
|
||||
let resolveFirstPromptStarted: (() => void) | undefined
|
||||
let resolveSecondPromptStarted: (() => void) | undefined
|
||||
const firstPromptStarted = new Promise<void>((resolve) => {
|
||||
resolveFirstPromptStarted = resolve
|
||||
})
|
||||
const secondPromptStarted = new Promise<void>((resolve) => {
|
||||
resolveSecondPromptStarted = resolve
|
||||
})
|
||||
const customClient = {
|
||||
session: {
|
||||
create: async (_args?: unknown) => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
|
||||
get: async () => ({ data: { directory: "/test/dir" } }),
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
|
||||
promptBodies.push(args.body)
|
||||
if (promptBodies.length === 1) {
|
||||
resolveFirstPromptStarted?.()
|
||||
}
|
||||
if (promptBodies.length === 2) {
|
||||
resolveSecondPromptStarted?.()
|
||||
}
|
||||
return {}
|
||||
},
|
||||
messages: async () => ({ data: [] }),
|
||||
todo: async () => ({ data: [] }),
|
||||
status: async () => ({ data: {} }),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
manager.shutdown()
|
||||
manager = new BackgroundManager({ client: customClient, directory: tmpdir() } as unknown as PluginInput)
|
||||
|
||||
const launchInputWithModel = {
|
||||
description: "Test task with model",
|
||||
prompt: "Do something",
|
||||
agent: "test-agent",
|
||||
parentSessionID: "parent-session",
|
||||
parentMessageID: "parent-message",
|
||||
model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
|
||||
}
|
||||
const launchInputWithoutModel = {
|
||||
description: "Test task without model",
|
||||
prompt: "Do something else",
|
||||
agent: "test-agent",
|
||||
parentSessionID: "parent-session",
|
||||
parentMessageID: "parent-message",
|
||||
}
|
||||
|
||||
// when
|
||||
const taskWithModel = await manager.launch(launchInputWithModel)
|
||||
await firstPromptStarted
|
||||
const taskWithoutModel = await manager.launch(launchInputWithoutModel)
|
||||
await secondPromptStarted
|
||||
|
||||
// then
|
||||
expect(taskWithModel.status).toBe("pending")
|
||||
expect(taskWithoutModel.status).toBe("pending")
|
||||
expect(promptBodies).toHaveLength(2)
|
||||
expect(promptBodies[0].model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" })
|
||||
expect("agent" in promptBodies[0]).toBe(false)
|
||||
expect(promptBodies[1].agent).toBe("test-agent")
|
||||
expect("model" in promptBodies[1]).toBe(false)
|
||||
})
|
||||
|
||||
test("should queue multiple tasks without blocking", async () => {
|
||||
// given
|
||||
@@ -2781,6 +2848,18 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
|
||||
})
|
||||
|
||||
describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
const originalDateNow = Date.now
|
||||
let fixedTime: number
|
||||
|
||||
beforeEach(() => {
|
||||
fixedTime = Date.now()
|
||||
spyOn(globalThis.Date, "now").mockReturnValue(fixedTime)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
Date.now = originalDateNow
|
||||
})
|
||||
|
||||
test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
@@ -3027,10 +3106,10 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
prompt: "Test",
|
||||
agent: "test-agent",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - 25 * 60 * 1000),
|
||||
startedAt: new Date(Date.now() - 50 * 60 * 1000),
|
||||
progress: {
|
||||
toolCalls: 1,
|
||||
lastUpdate: new Date(Date.now() - 21 * 60 * 1000),
|
||||
lastUpdate: new Date(Date.now() - 46 * 60 * 1000),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -515,7 +515,9 @@ export class BackgroundManager {
|
||||
promptWithModelSuggestionRetry(this.client, {
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
agent: input.agent,
|
||||
// When a model is explicitly provided, omit the agent name so opencode's
|
||||
// built-in agent fallback chain does not override the user-specified model.
|
||||
...(launchModel ? {} : { agent: input.agent }),
|
||||
...(launchModel ? { model: launchModel } : {}),
|
||||
...(launchVariant ? { variant: launchVariant } : {}),
|
||||
system: input.skillContent,
|
||||
@@ -792,7 +794,9 @@ export class BackgroundManager {
|
||||
this.client.session.promptAsync({
|
||||
path: { id: existingTask.sessionID },
|
||||
body: {
|
||||
agent: existingTask.agent,
|
||||
// When a model is explicitly provided, omit the agent name so opencode's
|
||||
// built-in agent fallback chain does not override the user-specified model.
|
||||
...(resumeModel ? {} : { agent: existingTask.agent }),
|
||||
...(resumeModel ? { model: resumeModel } : {}),
|
||||
...(resumeVariant ? { variant: resumeVariant } : {}),
|
||||
tools: (() => {
|
||||
|
||||
@@ -135,7 +135,9 @@ export async function startTask(
|
||||
promptWithModelSuggestionRetry(client, {
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
agent: input.agent,
|
||||
// When a model is explicitly provided, omit the agent name so opencode's
|
||||
// built-in agent fallback chain does not override the user-specified model.
|
||||
...(launchModel ? {} : { agent: input.agent }),
|
||||
...(launchModel ? { model: launchModel } : {}),
|
||||
...(launchVariant ? { variant: launchVariant } : {}),
|
||||
system: input.skillContent,
|
||||
@@ -220,7 +222,9 @@ export async function resumeTask(
|
||||
client.session.promptAsync({
|
||||
path: { id: task.sessionID },
|
||||
body: {
|
||||
agent: task.agent,
|
||||
// When a model is explicitly provided, omit the agent name so opencode's
|
||||
// built-in agent fallback chain does not override the user-specified model.
|
||||
...(resumeModel ? {} : { agent: task.agent }),
|
||||
...(resumeModel ? { model: resumeModel } : {}),
|
||||
...(resumeVariant ? { variant: resumeVariant } : {}),
|
||||
tools: {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
declare const require: (name: string) => any
|
||||
const { describe, it, expect, mock } = require("bun:test")
|
||||
const { describe, it, expect, mock, spyOn, beforeEach, afterEach } = require("bun:test")
|
||||
|
||||
import { checkAndInterruptStaleTasks, pruneStaleTasksAndNotifications } from "./task-poller"
|
||||
import type { BackgroundTask } from "./types"
|
||||
@@ -29,6 +29,18 @@ describe("checkAndInterruptStaleTasks", () => {
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
const originalDateNow = Date.now
|
||||
let fixedTime: number
|
||||
|
||||
beforeEach(() => {
|
||||
fixedTime = Date.now()
|
||||
spyOn(globalThis.Date, "now").mockReturnValue(fixedTime)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
Date.now = originalDateNow
|
||||
})
|
||||
|
||||
|
||||
it("should interrupt tasks with lastUpdate exceeding stale timeout", async () => {
|
||||
//#given
|
||||
@@ -117,13 +129,13 @@ describe("checkAndInterruptStaleTasks", () => {
|
||||
})
|
||||
|
||||
it("should use DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS when messageStalenessTimeoutMs is not configured", async () => {
|
||||
//#given — task started 35 minutes ago, no config for messageStalenessTimeoutMs
|
||||
//#given — task started 65 minutes ago, no config for messageStalenessTimeoutMs
|
||||
const task = createRunningTask({
|
||||
startedAt: new Date(Date.now() - 35 * 60 * 1000),
|
||||
startedAt: new Date(Date.now() - 65 * 60 * 1000),
|
||||
progress: undefined,
|
||||
})
|
||||
|
||||
//#when — default is 30 minutes (1_800_000ms)
|
||||
//#when — default is 60 minutes (3_600_000ms)
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: [task],
|
||||
client: mockClient as never,
|
||||
|
||||
@@ -130,7 +130,7 @@ export async function checkAndInterruptStaleTasks(args: {
|
||||
|
||||
const staleMinutes = Math.round(runtime / 60000)
|
||||
task.status = "cancelled"
|
||||
task.error = `Stale timeout (no activity for ${staleMinutes}min since start)`
|
||||
task.error = `Stale timeout (no activity for ${staleMinutes}min since start). This is a FINAL cancellation - do NOT create a replacement task. If the timeout is too short, increase 'background_task.staleTimeoutMs' in .opencode/oh-my-opencode.json.`
|
||||
task.completedAt = new Date()
|
||||
|
||||
if (task.concurrencyKey) {
|
||||
@@ -159,10 +159,10 @@ export async function checkAndInterruptStaleTasks(args: {
|
||||
if (timeSinceLastUpdate <= staleTimeoutMs) continue
|
||||
if (task.status !== "running") continue
|
||||
|
||||
const staleMinutes = Math.round(timeSinceLastUpdate / 60000)
|
||||
task.status = "cancelled"
|
||||
task.error = `Stale timeout (no activity for ${staleMinutes}min)`
|
||||
task.completedAt = new Date()
|
||||
const staleMinutes = Math.round(timeSinceLastUpdate / 60000)
|
||||
task.status = "cancelled"
|
||||
task.error = `Stale timeout (no activity for ${staleMinutes}min). This is a FINAL cancellation - do NOT create a replacement task. If the timeout is too short, increase 'background_task.staleTimeoutMs' in .opencode/oh-my-opencode.json.`
|
||||
task.completedAt = new Date()
|
||||
|
||||
if (task.concurrencyKey) {
|
||||
concurrencyManager.release(task.concurrencyKey)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
export * from "./types"
|
||||
export * from "./constants"
|
||||
export * from "./storage"
|
||||
export * from "./top-level-task"
|
||||
|
||||
@@ -11,8 +11,11 @@ import {
|
||||
getPlanName,
|
||||
createBoulderState,
|
||||
findPrometheusPlans,
|
||||
getTaskSessionState,
|
||||
upsertTaskSessionState,
|
||||
} from "./storage"
|
||||
import type { BoulderState } from "./types"
|
||||
import { readCurrentTopLevelTask } from "./top-level-task"
|
||||
|
||||
describe("boulder-state", () => {
|
||||
const TEST_DIR = join(tmpdir(), "boulder-state-test-" + Date.now())
|
||||
@@ -134,6 +137,24 @@ describe("boulder-state", () => {
|
||||
expect(result?.session_ids).toEqual(["session-1", "session-2"])
|
||||
expect(result?.plan_name).toBe("my-plan")
|
||||
})
|
||||
|
||||
test("should default task_sessions to empty object when missing from JSON", () => {
|
||||
// given - boulder.json without task_sessions field
|
||||
const boulderFile = join(SISYPHUS_DIR, "boulder.json")
|
||||
writeFileSync(boulderFile, JSON.stringify({
|
||||
active_plan: "/path/to/plan.md",
|
||||
started_at: "2026-01-01T00:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "plan",
|
||||
}))
|
||||
|
||||
// when
|
||||
const result = readBoulderState(TEST_DIR)
|
||||
|
||||
// then
|
||||
expect(result).not.toBeNull()
|
||||
expect(result!.task_sessions).toEqual({})
|
||||
})
|
||||
})
|
||||
|
||||
describe("writeBoulderState", () => {
|
||||
@@ -249,6 +270,115 @@ describe("boulder-state", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("task session state", () => {
|
||||
test("should persist and read preferred session for a top-level plan task", () => {
|
||||
// given - existing boulder state
|
||||
const state: BoulderState = {
|
||||
active_plan: "/plan.md",
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
// when
|
||||
upsertTaskSessionState(TEST_DIR, {
|
||||
taskKey: "todo:1",
|
||||
taskLabel: "1",
|
||||
taskTitle: "Implement auth flow",
|
||||
sessionId: "ses_task_123",
|
||||
agent: "sisyphus-junior",
|
||||
category: "deep",
|
||||
})
|
||||
const result = getTaskSessionState(TEST_DIR, "todo:1")
|
||||
|
||||
// then
|
||||
expect(result).not.toBeNull()
|
||||
expect(result?.session_id).toBe("ses_task_123")
|
||||
expect(result?.task_title).toBe("Implement auth flow")
|
||||
expect(result?.agent).toBe("sisyphus-junior")
|
||||
expect(result?.category).toBe("deep")
|
||||
})
|
||||
|
||||
test("should overwrite preferred session for the same top-level plan task", () => {
|
||||
// given - existing boulder state with prior preferred session
|
||||
const state: BoulderState = {
|
||||
active_plan: "/plan.md",
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "plan",
|
||||
task_sessions: {
|
||||
"todo:1": {
|
||||
task_key: "todo:1",
|
||||
task_label: "1",
|
||||
task_title: "Implement auth flow",
|
||||
session_id: "ses_old",
|
||||
updated_at: "2026-01-02T10:00:00Z",
|
||||
},
|
||||
},
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
// when
|
||||
upsertTaskSessionState(TEST_DIR, {
|
||||
taskKey: "todo:1",
|
||||
taskLabel: "1",
|
||||
taskTitle: "Implement auth flow",
|
||||
sessionId: "ses_new",
|
||||
})
|
||||
const result = getTaskSessionState(TEST_DIR, "todo:1")
|
||||
|
||||
// then
|
||||
expect(result?.session_id).toBe("ses_new")
|
||||
})
|
||||
})
|
||||
|
||||
describe("readCurrentTopLevelTask", () => {
|
||||
test("should return the first unchecked top-level task in TODOs", () => {
|
||||
// given - plan with nested and top-level unchecked tasks
|
||||
const planPath = join(TEST_DIR, "current-task-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Finished task
|
||||
- [ ] nested acceptance checkbox
|
||||
- [ ] 2. Current task
|
||||
|
||||
## Final Verification Wave
|
||||
- [ ] F1. Final review
|
||||
`)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).not.toBeNull()
|
||||
expect(result?.key).toBe("todo:2")
|
||||
expect(result?.title).toBe("Current task")
|
||||
})
|
||||
|
||||
test("should fall back to final-wave task when implementation tasks are complete", () => {
|
||||
// given - plan with only final-wave work remaining
|
||||
const planPath = join(TEST_DIR, "final-wave-current-task-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Finished task
|
||||
|
||||
## Final Verification Wave
|
||||
- [ ] F1. Final review
|
||||
`)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).not.toBeNull()
|
||||
expect(result?.key).toBe("final-wave:f1")
|
||||
expect(result?.title).toBe("Final review")
|
||||
})
|
||||
})
|
||||
|
||||
describe("getPlanProgress", () => {
|
||||
test("should count completed and uncompleted checkboxes", () => {
|
||||
// given - plan file with checkboxes
|
||||
@@ -351,7 +481,7 @@ describe("boulder-state", () => {
|
||||
expect(progress.isComplete).toBe(true)
|
||||
})
|
||||
|
||||
test("should return isComplete true for empty plan", () => {
|
||||
test("should return isComplete false for empty plan", () => {
|
||||
// given - plan with no checkboxes
|
||||
const planPath = join(TEST_DIR, "empty-plan.md")
|
||||
writeFileSync(planPath, "# Plan\nNo tasks here")
|
||||
@@ -361,7 +491,7 @@ describe("boulder-state", () => {
|
||||
|
||||
// then
|
||||
expect(progress.total).toBe(0)
|
||||
expect(progress.isComplete).toBe(true)
|
||||
expect(progress.isComplete).toBe(false)
|
||||
})
|
||||
|
||||
test("should handle non-existent file", () => {
|
||||
|
||||
@@ -6,9 +6,11 @@
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs"
|
||||
import { dirname, join, basename } from "node:path"
|
||||
import type { BoulderState, PlanProgress } from "./types"
|
||||
import type { BoulderState, PlanProgress, TaskSessionState } from "./types"
|
||||
import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants"
|
||||
|
||||
const RESERVED_KEYS = new Set(["__proto__", "prototype", "constructor"])
|
||||
|
||||
export function getBoulderFilePath(directory: string): string {
|
||||
return join(directory, BOULDER_DIR, BOULDER_FILE)
|
||||
}
|
||||
@@ -29,6 +31,9 @@ export function readBoulderState(directory: string): BoulderState | null {
|
||||
if (!Array.isArray(parsed.session_ids)) {
|
||||
parsed.session_ids = []
|
||||
}
|
||||
if (!parsed.task_sessions || typeof parsed.task_sessions !== "object" || Array.isArray(parsed.task_sessions)) {
|
||||
parsed.task_sessions = {}
|
||||
}
|
||||
return parsed as BoulderState
|
||||
} catch {
|
||||
return null
|
||||
@@ -85,6 +90,54 @@ export function clearBoulderState(directory: string): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
export function getTaskSessionState(directory: string, taskKey: string): TaskSessionState | null {
|
||||
const state = readBoulderState(directory)
|
||||
if (!state?.task_sessions) {
|
||||
return null
|
||||
}
|
||||
|
||||
return state.task_sessions[taskKey] ?? null
|
||||
}
|
||||
|
||||
export function upsertTaskSessionState(
|
||||
directory: string,
|
||||
input: {
|
||||
taskKey: string
|
||||
taskLabel: string
|
||||
taskTitle: string
|
||||
sessionId: string
|
||||
agent?: string
|
||||
category?: string
|
||||
},
|
||||
): BoulderState | null {
|
||||
const state = readBoulderState(directory)
|
||||
if (!state) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (RESERVED_KEYS.has(input.taskKey)) {
|
||||
return null
|
||||
}
|
||||
|
||||
const taskSessions = state.task_sessions ?? {}
|
||||
taskSessions[input.taskKey] = {
|
||||
task_key: input.taskKey,
|
||||
task_label: input.taskLabel,
|
||||
task_title: input.taskTitle,
|
||||
session_id: input.sessionId,
|
||||
...(input.agent !== undefined ? { agent: input.agent } : {}),
|
||||
...(input.category !== undefined ? { category: input.category } : {}),
|
||||
updated_at: new Date().toISOString(),
|
||||
}
|
||||
|
||||
state.task_sessions = taskSessions
|
||||
if (writeBoulderState(directory, state)) {
|
||||
return state
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Find Prometheus plan files for this project.
|
||||
* Prometheus stores plans at: {project}/.sisyphus/plans/{name}.md
|
||||
@@ -133,7 +186,7 @@ export function getPlanProgress(planPath: string): PlanProgress {
|
||||
return {
|
||||
total,
|
||||
completed,
|
||||
isComplete: total === 0 || completed === total,
|
||||
isComplete: total > 0 && completed === total,
|
||||
}
|
||||
} catch {
|
||||
return { total: 0, completed: 0, isComplete: true }
|
||||
|
||||
268
src/features/boulder-state/top-level-task.test.ts
Normal file
268
src/features/boulder-state/top-level-task.test.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import { writeFileSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
import { tmpdir } from "node:os"
|
||||
|
||||
import { readCurrentTopLevelTask } from "./top-level-task"
|
||||
|
||||
function writePlanFile(fileName: string, content: string): string {
|
||||
const planPath = join(tmpdir(), fileName)
|
||||
writeFileSync(planPath, content, "utf-8")
|
||||
return planPath
|
||||
}
|
||||
|
||||
describe("readCurrentTopLevelTask", () => {
|
||||
test("returns first unchecked top-level task in TODOs", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-happy-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Done task
|
||||
- [ ] 2. Current task
|
||||
|
||||
## Final Verification Wave
|
||||
- [ ] F1. Final review
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
key: "todo:2",
|
||||
section: "todo",
|
||||
label: "2",
|
||||
title: "Current task",
|
||||
})
|
||||
})
|
||||
|
||||
test("returns null when all tasks are checked", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-all-checked-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Done task
|
||||
- [x] 2. Another done task
|
||||
|
||||
## Final Verification Wave
|
||||
- [x] F1. Final done review
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
test("returns null for empty plan file", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(`top-level-task-empty-${Date.now()}.md`, "")
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
test("returns null when plan file does not exist", () => {
|
||||
// given
|
||||
const planPath = join(tmpdir(), `top-level-task-missing-${Date.now()}.md`)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
test("skips nested or indented checkboxes", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-nested-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Done task
|
||||
- [ ] nested should be ignored
|
||||
- [ ] 2. Top-level pending
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result?.key).toBe("todo:2")
|
||||
})
|
||||
|
||||
test("falls back to Final Verification Wave when TODOs are all checked", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-fallback-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Done task
|
||||
- [x] 2. Done task
|
||||
|
||||
## Final Verification Wave
|
||||
- [ ] F1. Final review pending
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
key: "final-wave:f1",
|
||||
section: "final-wave",
|
||||
label: "F1",
|
||||
title: "Final review pending",
|
||||
})
|
||||
})
|
||||
|
||||
test("selects the first unchecked task among mixed checked and unchecked TODOs", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-mixed-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Done task
|
||||
- [ ] 2. First unchecked
|
||||
- [ ] 3. Second unchecked
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result?.key).toBe("todo:2")
|
||||
expect(result?.title).toBe("First unchecked")
|
||||
})
|
||||
|
||||
test("ignores malformed labels and continues to next unchecked task", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-malformed-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] no number prefix
|
||||
- [ ] 2. Valid task after malformed label
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
key: "todo:2",
|
||||
section: "todo",
|
||||
label: "2",
|
||||
title: "Valid task after malformed label",
|
||||
})
|
||||
})
|
||||
|
||||
test("supports unchecked tasks with asterisk bullets", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-asterisk-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
* [ ] 1. Task using asterisk bullet
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result?.key).toBe("todo:1")
|
||||
expect(result?.title).toBe("Task using asterisk bullet")
|
||||
})
|
||||
|
||||
test("returns final-wave task when plan has only Final Verification Wave section", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-final-only-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## Final Verification Wave
|
||||
- [ ] F2. Final-only task
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
key: "final-wave:f2",
|
||||
section: "final-wave",
|
||||
label: "F2",
|
||||
title: "Final-only task",
|
||||
})
|
||||
})
|
||||
|
||||
test("returns the first unchecked task when multiple unchecked tasks exist", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-multiple-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] 1. First unchecked task
|
||||
- [ ] 2. Second unchecked task
|
||||
- [ ] 3. Third unchecked task
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result?.label).toBe("1")
|
||||
expect(result?.title).toBe("First unchecked task")
|
||||
})
|
||||
|
||||
test("ignores unchecked content in non-target sections during section transitions", () => {
|
||||
// given
|
||||
const planPath = writePlanFile(
|
||||
`top-level-task-sections-${Date.now()}.md`,
|
||||
`# Plan
|
||||
|
||||
## Notes
|
||||
- [ ] 99. Should be ignored because section is not tracked
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Done implementation task
|
||||
|
||||
## Decisions
|
||||
- [ ] 100. Should also be ignored
|
||||
|
||||
## Final Verification Wave
|
||||
- [ ] F3. Final verification task
|
||||
`,
|
||||
)
|
||||
|
||||
// when
|
||||
const result = readCurrentTopLevelTask(planPath)
|
||||
|
||||
// then
|
||||
expect(result?.key).toBe("final-wave:f3")
|
||||
expect(result?.section).toBe("final-wave")
|
||||
})
|
||||
})
|
||||
77
src/features/boulder-state/top-level-task.ts
Normal file
77
src/features/boulder-state/top-level-task.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { existsSync, readFileSync } from "node:fs"
|
||||
|
||||
import type { TopLevelTaskRef } from "./types"
|
||||
|
||||
const TODO_HEADING_PATTERN = /^##\s+TODOs\b/i
|
||||
const FINAL_VERIFICATION_HEADING_PATTERN = /^##\s+Final Verification Wave\b/i
|
||||
const SECOND_LEVEL_HEADING_PATTERN = /^##\s+/
|
||||
const UNCHECKED_CHECKBOX_PATTERN = /^(\s*)[-*]\s*\[\s*\]\s*(.+)$/
|
||||
const TODO_TASK_PATTERN = /^(\d+)\.\s+(.+)$/
|
||||
const FINAL_WAVE_TASK_PATTERN = /^(F\d+)\.\s+(.+)$/i
|
||||
|
||||
type PlanSection = "todo" | "final-wave" | "other"
|
||||
|
||||
function buildTaskRef(
|
||||
section: "todo" | "final-wave",
|
||||
taskLabel: string,
|
||||
): TopLevelTaskRef | null {
|
||||
const pattern = section === "todo" ? TODO_TASK_PATTERN : FINAL_WAVE_TASK_PATTERN
|
||||
const match = taskLabel.match(pattern)
|
||||
if (!match) {
|
||||
return null
|
||||
}
|
||||
|
||||
const rawLabel = match[1]
|
||||
const title = match[2].trim()
|
||||
|
||||
return {
|
||||
key: `${section}:${rawLabel.toLowerCase()}`,
|
||||
section,
|
||||
label: rawLabel,
|
||||
title,
|
||||
}
|
||||
}
|
||||
|
||||
export function readCurrentTopLevelTask(planPath: string): TopLevelTaskRef | null {
|
||||
if (!existsSync(planPath)) {
|
||||
return null
|
||||
}
|
||||
|
||||
try {
|
||||
const content = readFileSync(planPath, "utf-8")
|
||||
const lines = content.split(/\r?\n/)
|
||||
let section: PlanSection = "other"
|
||||
|
||||
for (const line of lines) {
|
||||
if (SECOND_LEVEL_HEADING_PATTERN.test(line)) {
|
||||
section = TODO_HEADING_PATTERN.test(line)
|
||||
? "todo"
|
||||
: FINAL_VERIFICATION_HEADING_PATTERN.test(line)
|
||||
? "final-wave"
|
||||
: "other"
|
||||
}
|
||||
|
||||
const uncheckedTaskMatch = line.match(UNCHECKED_CHECKBOX_PATTERN)
|
||||
if (!uncheckedTaskMatch) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (uncheckedTaskMatch[1].length > 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (section !== "todo" && section !== "final-wave") {
|
||||
continue
|
||||
}
|
||||
|
||||
const taskRef = buildTaskRef(section, uncheckedTaskMatch[2].trim())
|
||||
if (taskRef) {
|
||||
return taskRef
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,8 @@ export interface BoulderState {
|
||||
agent?: string
|
||||
/** Absolute path to the git worktree root where work happens */
|
||||
worktree_path?: string
|
||||
/** Preferred reusable subagent sessions keyed by current top-level plan task */
|
||||
task_sessions?: Record<string, TaskSessionState>
|
||||
}
|
||||
|
||||
export interface PlanProgress {
|
||||
@@ -28,3 +30,31 @@ export interface PlanProgress {
|
||||
/** Whether all tasks are done */
|
||||
isComplete: boolean
|
||||
}
|
||||
|
||||
export interface TaskSessionState {
|
||||
/** Stable identifier for the current top-level plan task (e.g. todo:1 / final-wave:F1) */
|
||||
task_key: string
|
||||
/** Original task label from the plan file */
|
||||
task_label: string
|
||||
/** Full task title from the plan file */
|
||||
task_title: string
|
||||
/** Preferred reusable subagent session */
|
||||
session_id: string
|
||||
/** Agent associated with the task session, when known */
|
||||
agent?: string
|
||||
/** Category associated with the task session, when known */
|
||||
category?: string
|
||||
/** Last update timestamp */
|
||||
updated_at: string
|
||||
}
|
||||
|
||||
export interface TopLevelTaskRef {
|
||||
/** Stable identifier for the current top-level plan task */
|
||||
key: string
|
||||
/** Task section in the Prometheus plan */
|
||||
section: "todo" | "final-wave"
|
||||
/** Original label token (e.g. 1 / F1) */
|
||||
label: string
|
||||
/** Full task title extracted from the checkbox line */
|
||||
title: string
|
||||
}
|
||||
|
||||
112
src/features/claude-code-plugin-loader/loader.test.ts
Normal file
112
src/features/claude-code-plugin-loader/loader.test.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "bun:test"
|
||||
import type { PluginComponentsResult } from "./loader"
|
||||
|
||||
describe("loadAllPluginComponents", () => {
|
||||
const originalEnv = { ...process.env }
|
||||
|
||||
beforeEach(() => {
|
||||
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE
|
||||
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
process.env = { ...originalEnv }
|
||||
})
|
||||
|
||||
describe("when OPENCODE_DISABLE_CLAUDE_CODE is set to 'true'", () => {
|
||||
it("returns empty result without loading any plugins", async () => {
|
||||
// given
|
||||
process.env.OPENCODE_DISABLE_CLAUDE_CODE = "true"
|
||||
|
||||
// when
|
||||
const { loadAllPluginComponents } = await import("./loader")
|
||||
const result: PluginComponentsResult = await loadAllPluginComponents()
|
||||
|
||||
// then
|
||||
expect(result.commands).toEqual({})
|
||||
expect(result.skills).toEqual({})
|
||||
expect(result.agents).toEqual({})
|
||||
expect(result.mcpServers).toEqual({})
|
||||
expect(result.hooksConfigs).toEqual([])
|
||||
expect(result.plugins).toEqual([])
|
||||
expect(result.errors).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe("when OPENCODE_DISABLE_CLAUDE_CODE is set to '1'", () => {
|
||||
it("returns empty result without loading any plugins", async () => {
|
||||
// given
|
||||
process.env.OPENCODE_DISABLE_CLAUDE_CODE = "1"
|
||||
|
||||
// when
|
||||
const { loadAllPluginComponents } = await import("./loader")
|
||||
const result: PluginComponentsResult = await loadAllPluginComponents()
|
||||
|
||||
// then
|
||||
expect(result.commands).toEqual({})
|
||||
expect(result.plugins).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe("when OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS is set to 'true'", () => {
|
||||
it("returns empty result without loading any plugins", async () => {
|
||||
// given
|
||||
process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS = "true"
|
||||
|
||||
// when
|
||||
const { loadAllPluginComponents } = await import("./loader")
|
||||
const result: PluginComponentsResult = await loadAllPluginComponents()
|
||||
|
||||
// then
|
||||
expect(result.commands).toEqual({})
|
||||
expect(result.plugins).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe("when OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS is set to '1'", () => {
|
||||
it("returns empty result without loading any plugins", async () => {
|
||||
// given
|
||||
process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS = "1"
|
||||
|
||||
// when
|
||||
const { loadAllPluginComponents } = await import("./loader")
|
||||
const result: PluginComponentsResult = await loadAllPluginComponents()
|
||||
|
||||
// then
|
||||
expect(result.commands).toEqual({})
|
||||
expect(result.plugins).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe("when neither env var is set", () => {
|
||||
it("does not skip plugin loading", async () => {
|
||||
// given
|
||||
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE
|
||||
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS
|
||||
|
||||
// when
|
||||
const { loadAllPluginComponents } = await import("./loader")
|
||||
const result: PluginComponentsResult = await loadAllPluginComponents()
|
||||
|
||||
// then — should attempt to load (may find 0 plugins, but shouldn't early-return)
|
||||
expect(result).toBeDefined()
|
||||
expect(result).toHaveProperty("commands")
|
||||
expect(result).toHaveProperty("plugins")
|
||||
})
|
||||
})
|
||||
|
||||
describe("when env var is set to unrecognized value", () => {
|
||||
it("does not skip plugin loading", async () => {
|
||||
// given
|
||||
process.env.OPENCODE_DISABLE_CLAUDE_CODE = "yes"
|
||||
|
||||
// when
|
||||
const { loadAllPluginComponents } = await import("./loader")
|
||||
const result: PluginComponentsResult = await loadAllPluginComponents()
|
||||
|
||||
// then — "yes" is not "true" or "1", should not skip
|
||||
expect(result).toBeDefined()
|
||||
expect(result).toHaveProperty("plugins")
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -27,7 +27,26 @@ export interface PluginComponentsResult {
|
||||
errors: PluginLoadError[]
|
||||
}
|
||||
|
||||
function isClaudeCodePluginsDisabled(): boolean {
|
||||
const disableFlag = process.env.OPENCODE_DISABLE_CLAUDE_CODE
|
||||
const disablePluginsFlag = process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS
|
||||
return disableFlag === "true" || disableFlag === "1" || disablePluginsFlag === "true" || disablePluginsFlag === "1"
|
||||
}
|
||||
|
||||
export async function loadAllPluginComponents(options?: PluginLoaderOptions): Promise<PluginComponentsResult> {
|
||||
if (isClaudeCodePluginsDisabled()) {
|
||||
log("Claude Code plugin loading disabled via OPENCODE_DISABLE_CLAUDE_CODE env var")
|
||||
return {
|
||||
commands: {},
|
||||
skills: {},
|
||||
agents: {},
|
||||
mcpServers: {},
|
||||
hooksConfigs: [],
|
||||
plugins: [],
|
||||
errors: [],
|
||||
}
|
||||
}
|
||||
|
||||
const { plugins, errors } = discoverInstalledPlugins(options)
|
||||
|
||||
const [commands, skills, agents, mcpServers, hooksConfigs] = await Promise.all([
|
||||
|
||||
@@ -226,6 +226,29 @@ describe('TmuxSessionManager', () => {
|
||||
// then
|
||||
expect(manager).toBeDefined()
|
||||
})
|
||||
|
||||
test('falls back to default port when serverUrl has port 0', async () => {
|
||||
// given
|
||||
mockIsInsideTmux.mockReturnValue(true)
|
||||
const { TmuxSessionManager } = await import('./manager')
|
||||
const ctx = {
|
||||
...createMockContext(),
|
||||
serverUrl: new URL('http://127.0.0.1:0/'),
|
||||
}
|
||||
const config: TmuxConfig = {
|
||||
enabled: true,
|
||||
layout: 'main-vertical',
|
||||
main_pane_size: 60,
|
||||
main_pane_min_width: 80,
|
||||
agent_pane_min_width: 40,
|
||||
}
|
||||
|
||||
// when
|
||||
const manager = new TmuxSessionManager(ctx, config, mockTmuxDeps)
|
||||
|
||||
// then
|
||||
expect((manager as any).serverUrl).toBe('http://localhost:4096')
|
||||
})
|
||||
})
|
||||
|
||||
describe('onSessionCreated', () => {
|
||||
|
||||
@@ -73,10 +73,18 @@ export class TmuxSessionManager {
|
||||
this.tmuxConfig = tmuxConfig
|
||||
this.deps = deps
|
||||
const defaultPort = process.env.OPENCODE_PORT ?? "4096"
|
||||
const fallbackUrl = `http://localhost:${defaultPort}`
|
||||
try {
|
||||
this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
|
||||
const raw = ctx.serverUrl?.toString()
|
||||
if (raw) {
|
||||
const parsed = new URL(raw)
|
||||
const port = parsed.port || (parsed.protocol === 'https:' ? '443' : '80')
|
||||
this.serverUrl = port === '0' ? fallbackUrl : raw
|
||||
} else {
|
||||
this.serverUrl = fallbackUrl
|
||||
}
|
||||
} catch {
|
||||
this.serverUrl = `http://localhost:${defaultPort}`
|
||||
this.serverUrl = fallbackUrl
|
||||
}
|
||||
this.sourcePaneId = deps.getCurrentPaneId()
|
||||
this.pollingManager = new TmuxPollingManager(
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# src/hooks/ — 46 Lifecycle Hooks
|
||||
# src/hooks/ — 48 Lifecycle Hooks
|
||||
|
||||
**Generated:** 2026-03-06
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
46 hooks across 45 directories + 11 standalone files. Three-tier composition: Core(37) + Continuation(7) + Skill(2). All hooks follow `createXXXHook(deps) → HookFunction` factory pattern.
|
||||
48 hooks across dedicated modules and standalone files. Three-tier composition: Core(39) + Continuation(7) + Skill(2). All hooks follow `createXXXHook(deps) → HookFunction` factory pattern.
|
||||
|
||||
## HOOK TIERS
|
||||
|
||||
@@ -85,7 +85,7 @@ hooks/
|
||||
| noHephaestusNonGpt | chat.message | Block Hephaestus from using non-GPT models |
|
||||
| runtimeFallback | event | Auto-switch models on API provider errors |
|
||||
|
||||
### Tier 2: Tool Guard Hooks (10) — `create-tool-guard-hooks.ts`
|
||||
### Tier 2: Tool Guard Hooks (12) — `create-tool-guard-hooks.ts`
|
||||
|
||||
| Hook | Event | Purpose |
|
||||
|------|-------|---------|
|
||||
|
||||
@@ -98,9 +98,9 @@ describe("runSummarizeRetryStrategy", () => {
|
||||
}) as typeof setTimeout
|
||||
|
||||
autoCompactState.retryStateBySession.set(sessionID, {
|
||||
attempt: 1,
|
||||
attempt: 0,
|
||||
lastAttemptTime: Date.now(),
|
||||
firstAttemptTime: Date.now() - 119700,
|
||||
firstAttemptTime: Date.now() - 119900,
|
||||
})
|
||||
summarizeMock.mockRejectedValueOnce(new Error("rate limited"))
|
||||
|
||||
@@ -117,6 +117,6 @@ describe("runSummarizeRetryStrategy", () => {
|
||||
//#then
|
||||
expect(timeoutCalls.length).toBe(1)
|
||||
expect(timeoutCalls[0]!.delay).toBeGreaterThan(0)
|
||||
expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(500)
|
||||
expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(300)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -114,6 +114,7 @@ export async function runSummarizeRetryStrategy(params: {
|
||||
body: summarizeBody as never,
|
||||
query: { directory: params.directory },
|
||||
})
|
||||
clearSessionState(params.autoCompactState, params.sessionID)
|
||||
return
|
||||
} catch {
|
||||
const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime)
|
||||
|
||||
@@ -2,11 +2,12 @@ import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { createAtlasEventHandler } from "./event-handler"
|
||||
import { createToolExecuteAfterHandler } from "./tool-execute-after"
|
||||
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
|
||||
import type { AtlasHookOptions, SessionState } from "./types"
|
||||
import type { AtlasHookOptions, PendingTaskRef, SessionState } from "./types"
|
||||
|
||||
export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
|
||||
const sessions = new Map<string, SessionState>()
|
||||
const pendingFilePaths = new Map<string, string>()
|
||||
const pendingTaskRefs = new Map<string, PendingTaskRef>()
|
||||
const autoCommit = options?.autoCommit ?? true
|
||||
|
||||
function getState(sessionID: string): SessionState {
|
||||
@@ -20,7 +21,7 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
|
||||
|
||||
return {
|
||||
handler: createAtlasEventHandler({ ctx, options, sessions, getState }),
|
||||
"tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }),
|
||||
"tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit, getState }),
|
||||
"tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths, pendingTaskRefs }),
|
||||
"tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState }),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,8 @@ export async function injectBoulderContinuation(input: {
|
||||
total: number
|
||||
agent?: string
|
||||
worktreePath?: string
|
||||
preferredTaskSessionId?: string
|
||||
preferredTaskTitle?: string
|
||||
backgroundManager?: BackgroundManager
|
||||
sessionState: SessionState
|
||||
}): Promise<void> {
|
||||
@@ -26,6 +28,8 @@ export async function injectBoulderContinuation(input: {
|
||||
total,
|
||||
agent,
|
||||
worktreePath,
|
||||
preferredTaskSessionId,
|
||||
preferredTaskTitle,
|
||||
backgroundManager,
|
||||
sessionState,
|
||||
} = input
|
||||
@@ -40,9 +44,13 @@ export async function injectBoulderContinuation(input: {
|
||||
}
|
||||
|
||||
const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
|
||||
const preferredSessionContext = preferredTaskSessionId
|
||||
? `\n\n[Preferred reuse session for current top-level plan task${preferredTaskTitle ? `: ${preferredTaskTitle}` : ""}: ${preferredTaskSessionId}]`
|
||||
: ""
|
||||
const prompt =
|
||||
BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
|
||||
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
|
||||
preferredSessionContext +
|
||||
worktreeContext
|
||||
|
||||
try {
|
||||
|
||||
@@ -4,7 +4,7 @@ import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
|
||||
import { tmpdir } from "node:os"
|
||||
import { join } from "node:path"
|
||||
import { createOpencodeClient } from "@opencode-ai/sdk"
|
||||
import type { AssistantMessage } from "@opencode-ai/sdk"
|
||||
import type { AssistantMessage, Session } from "@opencode-ai/sdk"
|
||||
import type { BoulderState } from "../../features/boulder-state"
|
||||
import { clearBoulderState, writeBoulderState } from "../../features/boulder-state"
|
||||
|
||||
@@ -52,6 +52,23 @@ describe("Atlas final-wave approval gate regressions", () => {
|
||||
response: new Response(),
|
||||
}))
|
||||
|
||||
Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => {
|
||||
const parentID = path.id === "ses_nested_scope_review"
|
||||
? "atlas-nested-final-wave-session"
|
||||
: path.id.startsWith("ses_parallel_review_")
|
||||
? "atlas-parallel-final-wave-session"
|
||||
: "main-session-123"
|
||||
|
||||
return {
|
||||
data: {
|
||||
id: path.id,
|
||||
parentID,
|
||||
} as Session,
|
||||
request: new Request(`http://localhost/session/${path.id}`),
|
||||
response: new Response(),
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
directory: testDirectory,
|
||||
project: {} as AtlasHookContext["project"],
|
||||
|
||||
@@ -60,10 +60,18 @@ describe("Atlas final verification approval gate", () => {
|
||||
}
|
||||
})
|
||||
|
||||
Reflect.set(client.session, "get", async () => {
|
||||
Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => {
|
||||
const parentID = path.id === "ses_final_wave_review"
|
||||
? "atlas-final-wave-session"
|
||||
: path.id === "ses_feature_task"
|
||||
? "atlas-non-final-session"
|
||||
: "main-session-123"
|
||||
return {
|
||||
data: { parentID: "main-session-123" } as Session,
|
||||
request: new Request("http://localhost/session/main-session-123"),
|
||||
data: {
|
||||
id: path.id,
|
||||
parentID,
|
||||
} as Session,
|
||||
request: new Request(`http://localhost/session/${path.id}`),
|
||||
response: new Response(),
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { getPlanProgress, readBoulderState } from "../../features/boulder-state"
|
||||
import {
|
||||
getPlanProgress,
|
||||
getTaskSessionState,
|
||||
readBoulderState,
|
||||
readCurrentTopLevelTask,
|
||||
} from "../../features/boulder-state"
|
||||
import { log } from "../../shared/logger"
|
||||
import { injectBoulderContinuation } from "./boulder-continuation-injector"
|
||||
import { HOOK_NAME } from "./hook-name"
|
||||
@@ -8,6 +13,7 @@ import type { AtlasHookOptions, SessionState } from "./types"
|
||||
|
||||
const CONTINUATION_COOLDOWN_MS = 5000
|
||||
const FAILURE_BACKOFF_MS = 5 * 60 * 1000
|
||||
const MAX_CONSECUTIVE_PROMPT_FAILURES = 10
|
||||
const RETRY_DELAY_MS = CONTINUATION_COOLDOWN_MS + 1000
|
||||
|
||||
function hasRunningBackgroundTasks(sessionID: string, options?: AtlasHookOptions): boolean {
|
||||
@@ -31,6 +37,14 @@ async function injectContinuation(input: {
|
||||
input.sessionState.lastContinuationInjectedAt = Date.now()
|
||||
|
||||
try {
|
||||
const currentBoulder = readBoulderState(input.ctx.directory)
|
||||
const currentTask = currentBoulder
|
||||
? readCurrentTopLevelTask(currentBoulder.active_plan)
|
||||
: null
|
||||
const preferredTaskSession = currentTask
|
||||
? getTaskSessionState(input.ctx.directory, currentTask.key)
|
||||
: null
|
||||
|
||||
await injectBoulderContinuation({
|
||||
ctx: input.ctx,
|
||||
sessionID: input.sessionID,
|
||||
@@ -39,6 +53,8 @@ async function injectContinuation(input: {
|
||||
total: input.progress.total,
|
||||
agent: input.agent,
|
||||
worktreePath: input.worktreePath,
|
||||
preferredTaskSessionId: preferredTaskSession?.session_id,
|
||||
preferredTaskTitle: preferredTaskSession?.task_title,
|
||||
backgroundManager: input.options?.backgroundManager,
|
||||
sessionState: input.sessionState,
|
||||
})
|
||||
@@ -62,7 +78,7 @@ function scheduleRetry(input: {
|
||||
sessionState.pendingRetryTimer = setTimeout(async () => {
|
||||
sessionState.pendingRetryTimer = undefined
|
||||
|
||||
if (sessionState.promptFailureCount >= 2) return
|
||||
if (sessionState.promptFailureCount >= MAX_CONSECUTIVE_PROMPT_FAILURES) return
|
||||
if (sessionState.waitingForFinalWaveApproval) return
|
||||
|
||||
const currentBoulder = readBoulderState(ctx.directory)
|
||||
@@ -72,7 +88,6 @@ function scheduleRetry(input: {
|
||||
const currentProgress = getPlanProgress(currentBoulder.active_plan)
|
||||
if (currentProgress.isComplete) return
|
||||
if (options?.isContinuationStopped?.(sessionID)) return
|
||||
if (options?.shouldSkipContinuation?.(sessionID)) return
|
||||
if (hasRunningBackgroundTasks(sessionID, options)) return
|
||||
|
||||
await injectContinuation({
|
||||
@@ -135,7 +150,7 @@ export async function handleAtlasSessionIdle(input: {
|
||||
return
|
||||
}
|
||||
|
||||
if (sessionState.promptFailureCount >= 2) {
|
||||
if (sessionState.promptFailureCount >= MAX_CONSECUTIVE_PROMPT_FAILURES) {
|
||||
const timeSinceLastFailure =
|
||||
sessionState.lastFailureAt !== undefined ? now - sessionState.lastFailureAt : Number.POSITIVE_INFINITY
|
||||
if (timeSinceLastFailure < FAILURE_BACKOFF_MS) {
|
||||
@@ -161,11 +176,6 @@ export async function handleAtlasSessionIdle(input: {
|
||||
return
|
||||
}
|
||||
|
||||
if (options?.shouldSkipContinuation?.(sessionID)) {
|
||||
log(`[${HOOK_NAME}] Skipped: another continuation hook already injected`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
if (sessionState.lastContinuationInjectedAt && now - sessionState.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) {
|
||||
scheduleRetry({ ctx, sessionID, sessionState, options })
|
||||
log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, {
|
||||
|
||||
@@ -10,6 +10,7 @@ import {
|
||||
} from "../../features/boulder-state"
|
||||
import type { BoulderState } from "../../features/boulder-state"
|
||||
import { _resetForTesting, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state"
|
||||
import type { PendingTaskRef } from "./types"
|
||||
|
||||
const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`)
|
||||
const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message")
|
||||
@@ -33,25 +34,40 @@ mock.module("../../shared/opencode-storage-detection", () => ({
|
||||
}))
|
||||
|
||||
const { createAtlasHook } = await import("./index")
|
||||
const { createToolExecuteAfterHandler } = await import("./tool-execute-after")
|
||||
const { createToolExecuteBeforeHandler } = await import("./tool-execute-before")
|
||||
const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector")
|
||||
|
||||
describe("atlas hook", () => {
|
||||
let TEST_DIR: string
|
||||
let SISYPHUS_DIR: string
|
||||
|
||||
function createMockPluginInput(overrides?: { promptMock?: ReturnType<typeof mock> }) {
|
||||
function createMockPluginInput(overrides?: {
|
||||
promptMock?: ReturnType<typeof mock>
|
||||
sessionGetMock?: ReturnType<typeof mock>
|
||||
}) {
|
||||
const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve())
|
||||
const sessionGetMock = overrides?.sessionGetMock ?? mock(async ({ path }: { path: { id: string } }) => ({
|
||||
data: {
|
||||
id: path.id,
|
||||
parentID: path.id.startsWith("ses_") ? "session-1" : "main-session-123",
|
||||
},
|
||||
}))
|
||||
return {
|
||||
directory: TEST_DIR,
|
||||
client: {
|
||||
session: {
|
||||
get: async () => ({ data: { parentID: "main-session-123" } }),
|
||||
get: sessionGetMock,
|
||||
prompt: promptMock,
|
||||
promptAsync: promptMock,
|
||||
},
|
||||
},
|
||||
_promptMock: promptMock,
|
||||
} as unknown as Parameters<typeof createAtlasHook>[0] & { _promptMock: ReturnType<typeof mock> }
|
||||
_sessionGetMock: sessionGetMock,
|
||||
} as unknown as Parameters<typeof createAtlasHook>[0] & {
|
||||
_promptMock: ReturnType<typeof mock>
|
||||
_sessionGetMock: ReturnType<typeof mock>
|
||||
}
|
||||
}
|
||||
|
||||
function setupMessageStorage(sessionID: string, agent: string): void {
|
||||
@@ -404,12 +420,417 @@ describe("atlas hook", () => {
|
||||
|
||||
// then - should include verification instructions
|
||||
expect(output.output).toContain("LYING")
|
||||
expect(output.output).toContain("PHASE 1")
|
||||
expect(output.output).toContain("PHASE 2")
|
||||
expect(output.output).toContain("PHASE 1")
|
||||
expect(output.output).toContain("PHASE 2")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should clean pending task refs when a task returns background launch output", async () => {
|
||||
// given - direct handlers with shared pending maps
|
||||
const sessionID = "session-bg-launch-cleanup-test"
|
||||
setupMessageStorage(sessionID, "atlas")
|
||||
|
||||
const planPath = join(TEST_DIR, "background-cleanup-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] 1. Implement auth flow
|
||||
`)
|
||||
writeBoulderState(TEST_DIR, {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "background-cleanup-plan",
|
||||
})
|
||||
|
||||
const pendingFilePaths = new Map<string, string>()
|
||||
const pendingTaskRefs = new Map<string, PendingTaskRef>()
|
||||
const beforeHandler = createToolExecuteBeforeHandler({
|
||||
ctx: createMockPluginInput(),
|
||||
pendingFilePaths,
|
||||
pendingTaskRefs,
|
||||
})
|
||||
const afterHandler = createToolExecuteAfterHandler({
|
||||
ctx: createMockPluginInput(),
|
||||
pendingFilePaths,
|
||||
pendingTaskRefs,
|
||||
autoCommit: true,
|
||||
getState: () => ({ promptFailureCount: 0 }),
|
||||
})
|
||||
|
||||
// when - the task is captured before execution
|
||||
await beforeHandler(
|
||||
{ tool: "task", sessionID, callID: "call-bg-launch" },
|
||||
{ args: { prompt: "Implement auth flow" } }
|
||||
)
|
||||
expect(pendingTaskRefs.size).toBe(1)
|
||||
|
||||
// and the task returns a background launch result
|
||||
await afterHandler(
|
||||
{ tool: "task", sessionID, callID: "call-bg-launch" },
|
||||
{
|
||||
title: "Sisyphus Task",
|
||||
output: "Background task launched.\n\nSession ID: ses_bg_12345",
|
||||
metadata: {},
|
||||
}
|
||||
)
|
||||
|
||||
// then - the pending task ref is still cleaned up
|
||||
expect(pendingTaskRefs.size).toBe(0)
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should persist preferred subagent session for the current top-level task", async () => {
|
||||
// given - boulder state with a current top-level task, Atlas caller
|
||||
const sessionID = "session-task-session-track-test"
|
||||
setupMessageStorage(sessionID, "atlas")
|
||||
|
||||
const planPath = join(TEST_DIR, "task-session-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] 1. Implement auth flow
|
||||
- [ ] nested acceptance checkbox
|
||||
`)
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "task-session-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: `Task completed successfully
|
||||
|
||||
<task_metadata>
|
||||
session_id: ses_auth_flow_123
|
||||
</task_metadata>`,
|
||||
metadata: {
|
||||
agent: "sisyphus-junior",
|
||||
category: "deep",
|
||||
},
|
||||
}
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// then
|
||||
const updatedState = readBoulderState(TEST_DIR)
|
||||
expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
|
||||
expect(updatedState?.task_sessions?.["todo:1"]?.task_title).toBe("Implement auth flow")
|
||||
expect(updatedState?.task_sessions?.["todo:1"]?.agent).toBe("sisyphus-junior")
|
||||
expect(updatedState?.task_sessions?.["todo:1"]?.category).toBe("deep")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should preserve the delegated task key even after the plan advances to the next task", async () => {
|
||||
// given - Atlas caller starts task 1, then the plan advances before task output is processed
|
||||
const sessionID = "session-stable-task-key-test"
|
||||
setupMessageStorage(sessionID, "atlas")
|
||||
|
||||
const planPath = join(TEST_DIR, "stable-task-key-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] 1. Implement auth flow
|
||||
- [ ] 2. Add API validation
|
||||
`)
|
||||
|
||||
writeBoulderState(TEST_DIR, {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "stable-task-key-plan",
|
||||
})
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
|
||||
// when - Atlas delegates task 1
|
||||
await hook["tool.execute.before"](
|
||||
{ tool: "task", sessionID, callID: "call-task-1" },
|
||||
{ args: { prompt: "Implement auth flow" } }
|
||||
)
|
||||
|
||||
// and the plan is advanced before the task output is processed
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Implement auth flow
|
||||
- [ ] 2. Add API validation
|
||||
`)
|
||||
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID, callID: "call-task-1" },
|
||||
{
|
||||
title: "Sisyphus Task",
|
||||
output: `Task completed successfully
|
||||
|
||||
<task_metadata>
|
||||
session_id: ses_auth_flow_123
|
||||
</task_metadata>`,
|
||||
metadata: {
|
||||
agent: "sisyphus-junior",
|
||||
category: "deep",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// then - the completed task session is still recorded against task 1, not task 2
|
||||
const updatedState = readBoulderState(TEST_DIR)
|
||||
expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
|
||||
expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should not overwrite the current task mapping when task() explicitly resumes an older session", async () => {
|
||||
// given - current plan is on task 2, but Atlas explicitly resumes an older session for a previous task
|
||||
const sessionID = "session-cross-task-resume-test"
|
||||
setupMessageStorage(sessionID, "atlas")
|
||||
|
||||
const planPath = join(TEST_DIR, "cross-task-resume-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Implement auth flow
|
||||
- [ ] 2. Add API validation
|
||||
`)
|
||||
|
||||
writeBoulderState(TEST_DIR, {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "cross-task-resume-plan",
|
||||
})
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
|
||||
// when - Atlas resumes an explicit prior session
|
||||
await hook["tool.execute.before"](
|
||||
{ tool: "task", sessionID, callID: "call-resume-old-task" },
|
||||
{ args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
|
||||
)
|
||||
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: `Task continued successfully
|
||||
|
||||
<task_metadata>
|
||||
session_id: ses_old_task_111
|
||||
</task_metadata>`,
|
||||
metadata: {
|
||||
agent: "sisyphus-junior",
|
||||
category: "deep",
|
||||
},
|
||||
}
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID, callID: "call-resume-old-task" },
|
||||
output
|
||||
)
|
||||
|
||||
// then - Atlas does not poison task 2's preferred session mapping
|
||||
const updatedState = readBoulderState(TEST_DIR)
|
||||
expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()
|
||||
expect(output.output).not.toContain('task(session_id="ses_old_task_111"')
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should not reuse an explicitly resumed session id in completion reminders", async () => {
|
||||
// given - current plan is on task 2 with an existing tracked session
|
||||
const sessionID = "session-explicit-resume-reminder-test"
|
||||
setupMessageStorage(sessionID, "atlas")
|
||||
|
||||
const planPath = join(TEST_DIR, "explicit-resume-reminder-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [x] 1. Implement auth flow
|
||||
- [ ] 2. Add API validation
|
||||
`)
|
||||
|
||||
writeBoulderState(TEST_DIR, {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "explicit-resume-reminder-plan",
|
||||
task_sessions: {
|
||||
"todo:2": {
|
||||
task_key: "todo:2",
|
||||
task_label: "2",
|
||||
task_title: "Add API validation",
|
||||
session_id: "ses_tracked_current_task",
|
||||
updated_at: "2026-01-02T10:00:00Z",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: `Task continued successfully
|
||||
|
||||
<task_metadata>
|
||||
session_id: ses_old_task_111
|
||||
</task_metadata>`,
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// when
|
||||
await hook["tool.execute.before"](
|
||||
{ tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
|
||||
{ args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
|
||||
)
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
|
||||
output
|
||||
)
|
||||
|
||||
// then
|
||||
expect(output.output).not.toContain('task(session_id="ses_old_task_111"')
|
||||
expect(output.output).toContain("ses_tracked_current_task")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should skip persistence when multiple in-flight task calls claim the same top-level task", async () => {
|
||||
// given
|
||||
const sessionID = "session-parallel-task-collision-test"
|
||||
setupMessageStorage(sessionID, "atlas")
|
||||
|
||||
const planPath = join(TEST_DIR, "parallel-task-collision-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] 1. Implement auth flow
|
||||
- [ ] 2. Add API validation
|
||||
`)
|
||||
|
||||
writeBoulderState(TEST_DIR, {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "parallel-task-collision-plan",
|
||||
})
|
||||
|
||||
const pendingFilePaths = new Map<string, string>()
|
||||
const pendingTaskRefs = new Map<string, PendingTaskRef>()
|
||||
const beforeHandler = createToolExecuteBeforeHandler({
|
||||
ctx: createMockPluginInput(),
|
||||
pendingFilePaths,
|
||||
pendingTaskRefs,
|
||||
})
|
||||
const afterHandler = createToolExecuteAfterHandler({
|
||||
ctx: createMockPluginInput(),
|
||||
pendingFilePaths,
|
||||
pendingTaskRefs,
|
||||
autoCommit: true,
|
||||
getState: () => ({ promptFailureCount: 0 }),
|
||||
})
|
||||
|
||||
// when - two task() calls start before either one completes
|
||||
await beforeHandler(
|
||||
{ tool: "task", sessionID, callID: "call-task-first" },
|
||||
{ args: { prompt: "Implement auth flow part 1" } }
|
||||
)
|
||||
await beforeHandler(
|
||||
{ tool: "task", sessionID, callID: "call-task-second" },
|
||||
{ args: { prompt: "Implement auth flow part 2" } }
|
||||
)
|
||||
|
||||
const secondPendingTaskRef = pendingTaskRefs.get("call-task-second")
|
||||
|
||||
await afterHandler(
|
||||
{ tool: "task", sessionID, callID: "call-task-second" },
|
||||
{
|
||||
title: "Sisyphus Task",
|
||||
output: `Task completed successfully
|
||||
|
||||
<task_metadata>
|
||||
session_id: ses_parallel_collision_222
|
||||
</task_metadata>`,
|
||||
metadata: {},
|
||||
}
|
||||
)
|
||||
|
||||
// then
|
||||
expect(secondPendingTaskRef).toEqual({
|
||||
kind: "skip",
|
||||
reason: "ambiguous_task_key",
|
||||
task: {
|
||||
key: "todo:1",
|
||||
label: "1",
|
||||
title: "Implement auth flow",
|
||||
},
|
||||
})
|
||||
const updatedState = readBoulderState(TEST_DIR)
|
||||
expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should ignore extracted session ids that are outside the active boulder lineage", async () => {
|
||||
// given
|
||||
const sessionID = "session-untrusted-session-id-test"
|
||||
setupMessageStorage(sessionID, "atlas")
|
||||
|
||||
const planPath = join(TEST_DIR, "untrusted-session-id-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] 1. Implement auth flow
|
||||
`)
|
||||
|
||||
writeBoulderState(TEST_DIR, {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "untrusted-session-id-plan",
|
||||
})
|
||||
|
||||
const hook = createAtlasHook(createMockPluginInput({
|
||||
sessionGetMock: mock(async ({ path }: { path: { id: string } }) => ({
|
||||
data: {
|
||||
id: path.id,
|
||||
parentID: path.id === "ses_untrusted_999" ? "session-outside-lineage" : "main-session-123",
|
||||
},
|
||||
})),
|
||||
}))
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: `Task completed successfully
|
||||
|
||||
<task_metadata>
|
||||
session_id: ses_untrusted_999
|
||||
</task_metadata>`,
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// then
|
||||
const updatedState = readBoulderState(TEST_DIR)
|
||||
expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()
|
||||
expect(output.output).not.toContain('task(session_id="ses_untrusted_999"')
|
||||
expect(output.output).toContain('task(session_id="<session_id>"')
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
describe("completion gate output ordering", () => {
|
||||
const COMPLETION_GATE_SESSION = "completion-gate-order-test"
|
||||
|
||||
@@ -1043,37 +1464,6 @@ describe("atlas hook", () => {
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should skip when another continuation hook already injected", async () => {
|
||||
// given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createAtlasHook(mockInput, {
|
||||
directory: TEST_DIR,
|
||||
shouldSkipContinuation: (sessionID: string) => sessionID === MAIN_SESSION_ID,
|
||||
})
|
||||
|
||||
// when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// then - should not call prompt because another continuation already handled it
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should clear abort state on message.updated", async () => {
|
||||
// given - boulder with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
@@ -1147,6 +1537,48 @@ describe("atlas hook", () => {
|
||||
expect(callArgs.body.parts[0].text).toContain("2 remaining")
|
||||
})
|
||||
|
||||
test("should include preferred reuse session in continuation prompt for current top-level task", async () => {
|
||||
// given - boulder state with tracked preferred session
|
||||
const planPath = join(TEST_DIR, "preferred-session-plan.md")
|
||||
writeFileSync(planPath, `# Plan
|
||||
|
||||
## TODOs
|
||||
- [ ] 1. Implement auth flow
|
||||
`)
|
||||
|
||||
writeBoulderState(TEST_DIR, {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "preferred-session-plan",
|
||||
task_sessions: {
|
||||
"todo:1": {
|
||||
task_key: "todo:1",
|
||||
task_label: "1",
|
||||
task_title: "Implement auth flow",
|
||||
session_id: "ses_auth_flow_123",
|
||||
updated_at: "2026-01-02T10:00:00Z",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
// when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// then
|
||||
const callArgs = mockInput._promptMock.mock.calls[0][0]
|
||||
expect(callArgs.body.parts[0].text).toContain("Preferred reuse session for current top-level plan task")
|
||||
expect(callArgs.body.parts[0].text).toContain("ses_auth_flow_123")
|
||||
})
|
||||
|
||||
test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
|
||||
// given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
@@ -1283,7 +1715,7 @@ describe("atlas hook", () => {
|
||||
expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
test("should stop continuation after 2 consecutive prompt failures (issue #1355)", async () => {
|
||||
test("should stop continuation after 10 consecutive prompt failures (issue #1355)", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
@@ -1296,7 +1728,7 @@ describe("atlas hook", () => {
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
|
||||
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
|
||||
@@ -1306,25 +1738,23 @@ describe("atlas hook", () => {
|
||||
|
||||
try {
|
||||
//#when - idle fires repeatedly, past cooldown each time
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
for (let i = 0; i < 10; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - should attempt only twice, then disable continuation
|
||||
expect(promptMock).toHaveBeenCalledTimes(2)
|
||||
//#then - should attempt only 10 times, then disable continuation
|
||||
expect(promptMock).toHaveBeenCalledTimes(10)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should reset prompt failure counter on success and only stop after 2 consecutive failures", async () => {
|
||||
test("should reset prompt failure counter on success and only stop after 10 consecutive failures", async () => {
|
||||
//#given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
@@ -1337,11 +1767,9 @@ describe("atlas hook", () => {
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock(() => Promise.resolve())
|
||||
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.resolve())
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
@@ -1351,21 +1779,21 @@ describe("atlas hook", () => {
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - fail, succeed (reset), then fail twice (disable), then attempt again
|
||||
for (let i = 0; i < 5; i++) {
|
||||
//#when - fail, succeed (reset), then fail 10 times (disable), then attempt again
|
||||
for (let i = 0; i < 13; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
//#then - 4 prompt attempts; 5th idle is skipped after 2 consecutive failures
|
||||
expect(promptMock).toHaveBeenCalledTimes(4)
|
||||
//#then - 12 prompt attempts; 13th idle is skipped after 10 consecutive failures
|
||||
expect(promptMock).toHaveBeenCalledTimes(12)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should keep skipping continuation during 5-minute backoff after 2 consecutive failures", async () => {
|
||||
test("should keep skipping continuation during 5-minute backoff after 10 consecutive failures", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
@@ -1387,26 +1815,26 @@ describe("atlas hook", () => {
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - third idle occurs inside 5-minute backoff window
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
//#when - 11th idle occurs inside 5-minute backoff window
|
||||
for (let i = 0; i < 10; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 60000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - third attempt should still be skipped
|
||||
expect(promptMock).toHaveBeenCalledTimes(2)
|
||||
//#then - 11th attempt should still be skipped
|
||||
expect(promptMock).toHaveBeenCalledTimes(10)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
})
|
||||
|
||||
test("should retry continuation after 5-minute backoff expires following 2 consecutive failures", async () => {
|
||||
test("should retry continuation after 5-minute backoff expires following 10 consecutive failures", async () => {
|
||||
//#given - boulder state with incomplete plan and prompt always fails
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
@@ -1428,20 +1856,20 @@ describe("atlas hook", () => {
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - third idle occurs after 5+ minutes
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
//#when - 11th idle occurs after 5+ minutes
|
||||
for (let i = 0; i < 10; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 300000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - third attempt should run after backoff expiration
|
||||
expect(promptMock).toHaveBeenCalledTimes(3)
|
||||
//#then - 11th attempt should run after backoff expiration
|
||||
expect(promptMock).toHaveBeenCalledTimes(11)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
@@ -1461,8 +1889,9 @@ describe("atlas hook", () => {
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
for (let i = 0; i < 10; i++) {
|
||||
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||
}
|
||||
promptMock.mockImplementationOnce(() => Promise.resolve(undefined))
|
||||
const mockInput = createMockPluginInput({ promptMock })
|
||||
const hook = createAtlasHook(mockInput)
|
||||
@@ -1472,32 +1901,30 @@ describe("atlas hook", () => {
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - fail twice, recover after backoff with success, then fail twice again
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
//#when - fail 10 times, recover after backoff with success, then fail 10 times again
|
||||
for (let i = 0; i < 10; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 300000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
for (let i = 0; i < 10; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - success retry resets counter, so two additional failures are allowed before skip
|
||||
expect(promptMock).toHaveBeenCalledTimes(5)
|
||||
//#then - success retry resets counter, so 10 additional failures are allowed before skip
|
||||
expect(promptMock).toHaveBeenCalledTimes(21)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
@@ -1525,14 +1952,12 @@ describe("atlas hook", () => {
|
||||
Date.now = () => now
|
||||
|
||||
try {
|
||||
//#when - two failures disables continuation, then compaction resets it
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
//#when - 10 failures disable continuation, then compaction resets it
|
||||
for (let i = 0; i < 10; i++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
now += 6000
|
||||
}
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
@@ -1543,8 +1968,8 @@ describe("atlas hook", () => {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||
await flushMicrotasks()
|
||||
|
||||
//#then - 2 attempts + 1 after compaction (3 total)
|
||||
expect(promptMock).toHaveBeenCalledTimes(3)
|
||||
//#then - 10 attempts + 1 after compaction (11 total)
|
||||
expect(promptMock).toHaveBeenCalledTimes(11)
|
||||
} finally {
|
||||
Date.now = originalDateNow
|
||||
}
|
||||
|
||||
79
src/hooks/atlas/subagent-session-id.test.ts
Normal file
79
src/hooks/atlas/subagent-session-id.test.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
|
||||
import { extractSessionIdFromOutput } from "./subagent-session-id"
|
||||
|
||||
describe("extractSessionIdFromOutput", () => {
|
||||
test("extracts Session ID blocks from background output", () => {
|
||||
// given
|
||||
const output = `Background task launched.\n\nSession ID: ses_bg_12345`
|
||||
|
||||
// when
|
||||
const result = extractSessionIdFromOutput(output)
|
||||
|
||||
// then
|
||||
expect(result).toBe("ses_bg_12345")
|
||||
})
|
||||
|
||||
test("extracts session_id from task metadata blocks", () => {
|
||||
// given
|
||||
const output = `Task completed.\n\n<task_metadata>\nsession_id: ses_sync_12345\n</task_metadata>`
|
||||
|
||||
// when
|
||||
const result = extractSessionIdFromOutput(output)
|
||||
|
||||
// then
|
||||
expect(result).toBe("ses_sync_12345")
|
||||
})
|
||||
|
||||
test("extracts hyphenated session IDs from task metadata blocks", () => {
|
||||
// given
|
||||
const output = `Task completed.\n\n<task_metadata>\nsession_id: ses_auth-flow-123\n</task_metadata>`
|
||||
|
||||
// when
|
||||
const result = extractSessionIdFromOutput(output)
|
||||
|
||||
// then
|
||||
expect(result).toBe("ses_auth-flow-123")
|
||||
})
|
||||
|
||||
test("returns undefined when no session id is present", () => {
|
||||
// given
|
||||
const output = "Task completed without metadata"
|
||||
|
||||
// when
|
||||
const result = extractSessionIdFromOutput(output)
|
||||
|
||||
// then
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
test("prefers the session id inside the trailing task_metadata block", () => {
|
||||
// given
|
||||
const output = `The previous attempt mentioned session_id: ses_wrong_body_123 but that was only context.
|
||||
|
||||
<task_metadata>
|
||||
session_id: ses_real_metadata_456
|
||||
</task_metadata>`
|
||||
|
||||
// when
|
||||
const result = extractSessionIdFromOutput(output)
|
||||
|
||||
// then
|
||||
expect(result).toBe("ses_real_metadata_456")
|
||||
})
|
||||
|
||||
test("does not let task_metadata parsing bleed into incidental body text after the closing tag", () => {
|
||||
// given
|
||||
const output = `<task_metadata>
|
||||
session_id: ses_real_metadata_456
|
||||
</task_metadata>
|
||||
|
||||
debug log: session_id: ses_wrong_body_789`
|
||||
|
||||
// when
|
||||
const result = extractSessionIdFromOutput(output)
|
||||
|
||||
// then
|
||||
expect(result).toBe("ses_real_metadata_456")
|
||||
})
|
||||
})
|
||||
@@ -1,4 +1,44 @@
|
||||
export function extractSessionIdFromOutput(output: string): string {
|
||||
const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/)
|
||||
return match?.[1] ?? "<session_id>"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { log } from "../../shared/logger"
|
||||
import { isSessionInBoulderLineage } from "./boulder-session-lineage"
|
||||
import { HOOK_NAME } from "./hook-name"
|
||||
|
||||
export function extractSessionIdFromOutput(output: string): string | undefined {
|
||||
const taskMetadataBlocks = [...output.matchAll(/<task_metadata>([\s\S]*?)<\/task_metadata>/gi)]
|
||||
const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1]
|
||||
if (lastTaskMetadataBlock) {
|
||||
const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_-]+)/i)
|
||||
if (taskMetadataSessionMatch) {
|
||||
return taskMetadataSessionMatch[1]
|
||||
}
|
||||
}
|
||||
|
||||
const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_-]+)/g)]
|
||||
return explicitSessionMatches.at(-1)?.[1]
|
||||
}
|
||||
|
||||
export async function validateSubagentSessionId(input: {
|
||||
client: PluginInput["client"]
|
||||
sessionID?: string
|
||||
lineageSessionIDs: string[]
|
||||
}): Promise<string | undefined> {
|
||||
if (!input.sessionID || input.lineageSessionIDs.length === 0) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const belongsToLineage = await isSessionInBoulderLineage({
|
||||
client: input.client,
|
||||
sessionID: input.sessionID,
|
||||
boulderSessionIDs: input.lineageSessionIDs,
|
||||
})
|
||||
|
||||
if (!belongsToLineage) {
|
||||
log(`[${HOOK_NAME}] Ignoring extracted session id outside active lineage`, {
|
||||
sessionID: input.sessionID,
|
||||
lineageSessionIDs: input.lineageSessionIDs,
|
||||
})
|
||||
return undefined
|
||||
}
|
||||
|
||||
return input.sessionID
|
||||
}
|
||||
|
||||
@@ -218,21 +218,31 @@ ${createSystemDirective(SystemDirectiveTypes.SINGLE_TASK_ONLY)}
|
||||
|
||||
**STOP. READ THIS BEFORE PROCEEDING.**
|
||||
|
||||
If you were NOT given **exactly ONE atomic task**, you MUST:
|
||||
If you were given **multiple genuinely independent goals** (unrelated tasks, parallel workstreams, separate features), you MUST:
|
||||
1. **IMMEDIATELY REFUSE** this request
|
||||
2. **DEMAND** the orchestrator provide a single, specific task
|
||||
2. **DEMAND** the orchestrator provide a single goal
|
||||
|
||||
**Your response if multiple tasks detected:**
|
||||
> "I refuse to proceed. You provided multiple tasks. An orchestrator's impatience destroys work quality.
|
||||
**What counts as multiple independent tasks (REFUSE):**
|
||||
- "Implement feature A. Also, add feature B."
|
||||
- "Fix bug X. Then refactor module Y. Also update the docs."
|
||||
- Multiple unrelated changes bundled into one request
|
||||
|
||||
**What is a single task with sequential steps (PROCEED):**
|
||||
- A single goal broken into numbered steps (e.g., "Implement X by: 1. finding files, 2. adding logic, 3. writing tests")
|
||||
- Multi-step context where all steps serve ONE objective
|
||||
- Orchestrator-provided context explaining approach for a single deliverable
|
||||
|
||||
**Your response if genuinely independent tasks are detected:**
|
||||
> "I refuse to proceed. You provided multiple independent tasks. Each task needs full attention.
|
||||
>
|
||||
> PROVIDE EXACTLY ONE TASK. One file. One change. One verification.
|
||||
> PROVIDE EXACTLY ONE GOAL. One deliverable. One clear outcome.
|
||||
>
|
||||
> Your rushing will cause: incomplete work, missed edge cases, broken tests, wasted context."
|
||||
> Batching unrelated tasks causes: incomplete work, missed edge cases, broken tests, wasted context."
|
||||
|
||||
**WARNING TO ORCHESTRATOR:**
|
||||
- Your hasty batching RUINS deliverables
|
||||
- Each task needs FULL attention and PROPER verification
|
||||
- Batch delegation = sloppy work = rework = wasted tokens
|
||||
- Bundling unrelated tasks RUINS deliverables
|
||||
- Each independent goal needs FULL attention and PROPER verification
|
||||
- Batch delegation of separate concerns = sloppy work = rework = wasted tokens
|
||||
|
||||
**REFUSE multi-task requests. DEMAND single-task clarity.**
|
||||
**REFUSE genuinely multi-task requests. ALLOW single-goal multi-step workflows.**
|
||||
`
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state"
|
||||
import {
|
||||
appendSessionId,
|
||||
getPlanProgress,
|
||||
getTaskSessionState,
|
||||
readBoulderState,
|
||||
readCurrentTopLevelTask,
|
||||
upsertTaskSessionState,
|
||||
} from "../../features/boulder-state"
|
||||
import { log } from "../../shared/logger"
|
||||
import { isCallerOrchestrator } from "../../shared/session-utils"
|
||||
import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree"
|
||||
@@ -7,7 +14,7 @@ import { shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate"
|
||||
import { HOOK_NAME } from "./hook-name"
|
||||
import { DIRECT_WORK_REMINDER } from "./system-reminder-templates"
|
||||
import { isSisyphusPath } from "./sisyphus-path"
|
||||
import { extractSessionIdFromOutput } from "./subagent-session-id"
|
||||
import { extractSessionIdFromOutput, validateSubagentSessionId } from "./subagent-session-id"
|
||||
import {
|
||||
buildCompletionGate,
|
||||
buildFinalWaveApprovalReminder,
|
||||
@@ -15,16 +22,60 @@ import {
|
||||
buildStandaloneVerificationReminder,
|
||||
} from "./verification-reminders"
|
||||
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
|
||||
import type { SessionState } from "./types"
|
||||
import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types"
|
||||
import type { PendingTaskRef, SessionState } from "./types"
|
||||
import type { ToolExecuteAfterInput, ToolExecuteAfterOutput, TrackedTopLevelTaskRef } from "./types"
|
||||
|
||||
function resolvePreferredSessionId(currentSessionId?: string, trackedSessionId?: string): string {
|
||||
return currentSessionId ?? trackedSessionId ?? "<session_id>"
|
||||
}
|
||||
|
||||
function resolveTaskContext(
|
||||
pendingTaskRef: PendingTaskRef | undefined,
|
||||
planPath: string,
|
||||
): {
|
||||
currentTask: TrackedTopLevelTaskRef | null
|
||||
shouldSkipTaskSessionUpdate: boolean
|
||||
shouldIgnoreCurrentSessionId: boolean
|
||||
} {
|
||||
if (!pendingTaskRef) {
|
||||
return {
|
||||
currentTask: readCurrentTopLevelTask(planPath),
|
||||
shouldSkipTaskSessionUpdate: false,
|
||||
shouldIgnoreCurrentSessionId: false,
|
||||
}
|
||||
}
|
||||
|
||||
if (pendingTaskRef.kind === "track") {
|
||||
return {
|
||||
currentTask: pendingTaskRef.task,
|
||||
shouldSkipTaskSessionUpdate: false,
|
||||
shouldIgnoreCurrentSessionId: false,
|
||||
}
|
||||
}
|
||||
|
||||
if (pendingTaskRef.reason === "explicit_resume") {
|
||||
return {
|
||||
currentTask: readCurrentTopLevelTask(planPath),
|
||||
shouldSkipTaskSessionUpdate: true,
|
||||
shouldIgnoreCurrentSessionId: true,
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
currentTask: pendingTaskRef.task,
|
||||
shouldSkipTaskSessionUpdate: true,
|
||||
shouldIgnoreCurrentSessionId: true,
|
||||
}
|
||||
}
|
||||
|
||||
export function createToolExecuteAfterHandler(input: {
|
||||
ctx: PluginInput
|
||||
pendingFilePaths: Map<string, string>
|
||||
pendingTaskRefs: Map<string, PendingTaskRef>
|
||||
autoCommit: boolean
|
||||
getState: (sessionID: string) => SessionState
|
||||
}): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise<void> {
|
||||
const { ctx, pendingFilePaths, autoCommit, getState } = input
|
||||
const { ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState } = input
|
||||
return async (toolInput, toolOutput): Promise<void> => {
|
||||
// Guard against undefined output (e.g., from /review command - see issue #1035)
|
||||
if (!toolOutput) {
|
||||
@@ -59,19 +110,33 @@ export function createToolExecuteAfterHandler(input: {
|
||||
}
|
||||
|
||||
const outputStr = toolOutput.output && typeof toolOutput.output === "string" ? toolOutput.output : ""
|
||||
const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined
|
||||
if (toolInput.callID) {
|
||||
pendingTaskRefs.delete(toolInput.callID)
|
||||
}
|
||||
const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued")
|
||||
if (isBackgroundLaunch) {
|
||||
return
|
||||
}
|
||||
|
||||
if (toolOutput.output && typeof toolOutput.output === "string") {
|
||||
const gitStats = collectGitDiffStats(ctx.directory)
|
||||
const fileChanges = formatFileChanges(gitStats)
|
||||
const subagentSessionId = extractSessionIdFromOutput(toolOutput.output)
|
||||
|
||||
const boulderState = readBoulderState(ctx.directory)
|
||||
const worktreePath = boulderState?.worktree_path?.trim()
|
||||
const verificationDirectory = worktreePath ? worktreePath : ctx.directory
|
||||
const gitStats = collectGitDiffStats(verificationDirectory)
|
||||
const fileChanges = formatFileChanges(gitStats)
|
||||
const extractedSessionId = extractSessionIdFromOutput(toolOutput.output)
|
||||
|
||||
if (boulderState) {
|
||||
const progress = getPlanProgress(boulderState.active_plan)
|
||||
const {
|
||||
currentTask,
|
||||
shouldSkipTaskSessionUpdate,
|
||||
shouldIgnoreCurrentSessionId,
|
||||
} = resolveTaskContext(pendingTaskRef, boulderState.active_plan)
|
||||
const trackedTaskSession = currentTask
|
||||
? getTaskSessionState(ctx.directory, currentTask.key)
|
||||
: null
|
||||
const sessionState = toolInput.sessionID ? getState(toolInput.sessionID) : undefined
|
||||
|
||||
if (toolInput.sessionID && !boulderState.session_ids?.includes(toolInput.sessionID)) {
|
||||
@@ -82,6 +147,31 @@ export function createToolExecuteAfterHandler(input: {
|
||||
})
|
||||
}
|
||||
|
||||
const lineageSessionIDs = toolInput.sessionID && !boulderState.session_ids.includes(toolInput.sessionID)
|
||||
? [...boulderState.session_ids, toolInput.sessionID]
|
||||
: boulderState.session_ids
|
||||
const subagentSessionId = await validateSubagentSessionId({
|
||||
client: ctx.client,
|
||||
sessionID: extractedSessionId,
|
||||
lineageSessionIDs,
|
||||
})
|
||||
|
||||
if (currentTask && subagentSessionId && !shouldSkipTaskSessionUpdate) {
|
||||
upsertTaskSessionState(ctx.directory, {
|
||||
taskKey: currentTask.key,
|
||||
taskLabel: currentTask.label,
|
||||
taskTitle: currentTask.title,
|
||||
sessionId: subagentSessionId,
|
||||
agent: typeof toolOutput.metadata?.agent === "string" ? toolOutput.metadata.agent : undefined,
|
||||
category: typeof toolOutput.metadata?.category === "string" ? toolOutput.metadata.category : undefined,
|
||||
})
|
||||
}
|
||||
|
||||
const preferredSessionId = resolvePreferredSessionId(
|
||||
shouldIgnoreCurrentSessionId ? undefined : subagentSessionId,
|
||||
trackedTaskSession?.session_id,
|
||||
)
|
||||
|
||||
// Preserve original subagent response - critical for debugging failed tasks
|
||||
const originalResponse = toolOutput.output
|
||||
const shouldPauseForApproval = sessionState
|
||||
@@ -102,11 +192,11 @@ export function createToolExecuteAfterHandler(input: {
|
||||
}
|
||||
|
||||
const leadReminder = shouldPauseForApproval
|
||||
? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, subagentSessionId)
|
||||
: buildCompletionGate(boulderState.plan_name, subagentSessionId)
|
||||
? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, preferredSessionId)
|
||||
: buildCompletionGate(boulderState.plan_name, preferredSessionId)
|
||||
const followupReminder = shouldPauseForApproval
|
||||
? null
|
||||
: buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)
|
||||
: buildOrchestratorReminder(boulderState.plan_name, progress, preferredSessionId, autoCommit, false)
|
||||
|
||||
toolOutput.output = `
|
||||
<system-reminder>
|
||||
@@ -132,10 +222,22 @@ ${
|
||||
plan: boulderState.plan_name,
|
||||
progress: `${progress.completed}/${progress.total}`,
|
||||
fileCount: gitStats.length,
|
||||
preferredSessionId,
|
||||
waitingForFinalWaveApproval: shouldPauseForApproval,
|
||||
})
|
||||
} else {
|
||||
toolOutput.output += `\n<system-reminder>\n${buildStandaloneVerificationReminder(subagentSessionId)}\n</system-reminder>`
|
||||
const lineageSessionIDs = toolInput.sessionID ? [toolInput.sessionID] : []
|
||||
const subagentSessionId = await validateSubagentSessionId({
|
||||
client: ctx.client,
|
||||
sessionID: extractedSessionId,
|
||||
lineageSessionIDs,
|
||||
})
|
||||
const preferredSessionId = pendingTaskRef?.kind === "skip"
|
||||
? undefined
|
||||
: subagentSessionId
|
||||
toolOutput.output += `\n<system-reminder>\n${buildStandaloneVerificationReminder(
|
||||
resolvePreferredSessionId(preferredSessionId),
|
||||
)}\n</system-reminder>`
|
||||
|
||||
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
|
||||
sessionID: toolInput.sessionID,
|
||||
|
||||
@@ -2,19 +2,26 @@ import { log } from "../../shared/logger"
|
||||
import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
|
||||
import { isCallerOrchestrator } from "../../shared/session-utils"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { readBoulderState, readCurrentTopLevelTask } from "../../features/boulder-state"
|
||||
import { HOOK_NAME } from "./hook-name"
|
||||
import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates"
|
||||
import { isSisyphusPath } from "./sisyphus-path"
|
||||
import type { PendingTaskRef, TrackedTopLevelTaskRef } from "./types"
|
||||
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
|
||||
|
||||
export function createToolExecuteBeforeHandler(input: {
|
||||
ctx: PluginInput
|
||||
pendingFilePaths: Map<string, string>
|
||||
pendingTaskRefs: Map<string, PendingTaskRef>
|
||||
}): (
|
||||
toolInput: { tool: string; sessionID?: string; callID?: string },
|
||||
toolOutput: { args: Record<string, unknown>; message?: string }
|
||||
) => Promise<void> {
|
||||
const { ctx, pendingFilePaths } = input
|
||||
const { ctx, pendingFilePaths, pendingTaskRefs } = input
|
||||
|
||||
function trackTask(callID: string, task: TrackedTopLevelTaskRef): void {
|
||||
pendingTaskRefs.set(callID, { kind: "track", task })
|
||||
}
|
||||
|
||||
return async (toolInput, toolOutput): Promise<void> => {
|
||||
if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) {
|
||||
@@ -43,6 +50,46 @@ export function createToolExecuteBeforeHandler(input: {
|
||||
|
||||
// Check task - inject single-task directive
|
||||
if (toolInput.tool === "task") {
|
||||
if (toolInput.callID) {
|
||||
const requestedSessionId = toolOutput.args.session_id as string | undefined
|
||||
if (requestedSessionId) {
|
||||
pendingTaskRefs.set(toolInput.callID, {
|
||||
kind: "skip",
|
||||
reason: "explicit_resume",
|
||||
})
|
||||
} else {
|
||||
const boulderState = readBoulderState(ctx.directory)
|
||||
const currentTask = boulderState
|
||||
? readCurrentTopLevelTask(boulderState.active_plan)
|
||||
: null
|
||||
if (currentTask) {
|
||||
const task = {
|
||||
key: currentTask.key,
|
||||
label: currentTask.label,
|
||||
title: currentTask.title,
|
||||
}
|
||||
const hasExistingClaim = [...pendingTaskRefs.values()].some((pendingTaskRef) => (
|
||||
pendingTaskRef.kind === "track" && pendingTaskRef.task.key === task.key
|
||||
))
|
||||
|
||||
if (hasExistingClaim) {
|
||||
pendingTaskRefs.set(toolInput.callID, {
|
||||
kind: "skip",
|
||||
reason: "ambiguous_task_key",
|
||||
task,
|
||||
})
|
||||
log(`[${HOOK_NAME}] Skipping task session persistence for ambiguous task key`, {
|
||||
sessionID: toolInput.sessionID,
|
||||
callID: toolInput.callID,
|
||||
taskKey: task.key,
|
||||
})
|
||||
} else {
|
||||
trackTask(toolInput.callID, task)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const prompt = toolOutput.args.prompt as string | undefined
|
||||
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
|
||||
toolOutput.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { AgentOverrides } from "../../config"
|
||||
import type { BackgroundManager } from "../../features/background-agent"
|
||||
import type { TopLevelTaskRef } from "../../features/boulder-state"
|
||||
|
||||
export type ModelInfo = { providerID: string; modelID: string }
|
||||
|
||||
@@ -7,7 +8,6 @@ export interface AtlasHookOptions {
|
||||
directory: string
|
||||
backgroundManager?: BackgroundManager
|
||||
isContinuationStopped?: (sessionID: string) => boolean
|
||||
shouldSkipContinuation?: (sessionID: string) => boolean
|
||||
agentOverrides?: AgentOverrides
|
||||
/** Enable auto-commit after each atomic task completion (default: true) */
|
||||
autoCommit?: boolean
|
||||
@@ -25,6 +25,13 @@ export interface ToolExecuteAfterOutput {
|
||||
metadata: Record<string, unknown>
|
||||
}
|
||||
|
||||
export type TrackedTopLevelTaskRef = Pick<TopLevelTaskRef, "key" | "label" | "title">
|
||||
|
||||
export type PendingTaskRef =
|
||||
| { kind: "track"; task: TrackedTopLevelTaskRef }
|
||||
| { kind: "skip"; reason: "explicit_resume" }
|
||||
| { kind: "skip"; reason: "ambiguous_task_key"; task: TrackedTopLevelTaskRef }
|
||||
|
||||
export interface SessionState {
|
||||
lastEventWasAbortError?: boolean
|
||||
lastContinuationInjectedAt?: number
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
import { VERIFICATION_REMINDER } from "./system-reminder-templates"
|
||||
|
||||
function buildReuseHint(sessionId: string): string {
|
||||
return `
|
||||
**PREFERRED REUSE SESSION FOR THE CURRENT TOP-LEVEL PLAN TASK**
|
||||
|
||||
- Reuse \`${sessionId}\` first if verification fails or the result needs follow-up.
|
||||
- Start a fresh subagent session only when reuse is unavailable or would cross task boundaries.
|
||||
`
|
||||
}
|
||||
|
||||
export function buildCompletionGate(planName: string, sessionId: string): string {
|
||||
return `
|
||||
**COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE**
|
||||
@@ -25,7 +34,8 @@ task(session_id="${sessionId}", prompt="fix: checkbox not recorded correctly")
|
||||
|
||||
**Your completion is NOT tracked until the checkbox is marked in the plan file.**
|
||||
|
||||
**VERIFICATION_REMINDER**`
|
||||
**VERIFICATION_REMINDER**
|
||||
${buildReuseHint(sessionId)}`
|
||||
}
|
||||
|
||||
function buildVerificationReminder(sessionId: string): string {
|
||||
@@ -38,7 +48,9 @@ ${VERIFICATION_REMINDER}
|
||||
**If ANY verification fails, use this immediately:**
|
||||
\`\`\`
|
||||
task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
|
||||
\`\`\``
|
||||
\`\`\`
|
||||
|
||||
${buildReuseHint(sessionId)}`
|
||||
}
|
||||
|
||||
export function buildOrchestratorReminder(
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
type TextPart = {
|
||||
type?: string
|
||||
text?: string
|
||||
}
|
||||
|
||||
type MessageInfo = {
|
||||
id?: string
|
||||
role?: string
|
||||
error?: unknown
|
||||
model?: {
|
||||
providerID?: string
|
||||
modelID?: string
|
||||
}
|
||||
providerID?: string
|
||||
modelID?: string
|
||||
}
|
||||
|
||||
export type SessionMessage = {
|
||||
info?: MessageInfo
|
||||
parts?: TextPart[]
|
||||
}
|
||||
|
||||
export function getLastAssistantMessage(messages: SessionMessage[]): SessionMessage | null {
|
||||
for (let index = messages.length - 1; index >= 0; index--) {
|
||||
if (messages[index].info?.role === "assistant") {
|
||||
return messages[index]
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
export function extractAssistantText(message: SessionMessage): string {
|
||||
return (message.parts ?? [])
|
||||
.filter((part) => part.type === "text" && typeof part.text === "string")
|
||||
.map((part) => part.text?.trim() ?? "")
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
export function isGptAssistantMessage(message: SessionMessage): boolean {
|
||||
const modelID = message.info?.model?.modelID ?? message.info?.modelID
|
||||
return typeof modelID === "string" && modelID.toLowerCase().includes("gpt")
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
export const HOOK_NAME = "gpt-permission-continuation"
|
||||
export const CONTINUATION_PROMPT = "continue"
|
||||
export const MAX_CONSECUTIVE_AUTO_CONTINUES = 3
|
||||
|
||||
export const DEFAULT_STALL_PATTERNS = [
|
||||
"if you want",
|
||||
"would you like",
|
||||
"shall i",
|
||||
"do you want me to",
|
||||
"let me know if",
|
||||
] as const
|
||||
@@ -1,23 +0,0 @@
|
||||
import { DEFAULT_STALL_PATTERNS } from "./constants"
|
||||
|
||||
function getTrailingSegment(text: string): string {
|
||||
const normalized = text.trim().replace(/\s+/g, " ")
|
||||
if (!normalized) return ""
|
||||
|
||||
const sentenceParts = normalized.split(/(?<=[.!?])\s+/)
|
||||
return sentenceParts[sentenceParts.length - 1]?.trim().toLowerCase() ?? ""
|
||||
}
|
||||
|
||||
export function detectStallPattern(
|
||||
text: string,
|
||||
patterns: readonly string[] = DEFAULT_STALL_PATTERNS,
|
||||
): boolean {
|
||||
if (!text.trim()) return false
|
||||
|
||||
const tail = text.slice(-800)
|
||||
const lines = tail.split("\n").map((line) => line.trim()).filter(Boolean)
|
||||
const hotZone = lines.slice(-3).join(" ")
|
||||
const trailingSegment = getTrailingSegment(hotZone)
|
||||
|
||||
return patterns.some((pattern) => trailingSegment.startsWith(pattern.toLowerCase()))
|
||||
}
|
||||
@@ -1,334 +0,0 @@
|
||||
/// <reference path="../../../bun-test.d.ts" />
|
||||
|
||||
import { createOpencodeClient } from "@opencode-ai/sdk"
|
||||
import { describe, expect, it as test } from "bun:test"
|
||||
|
||||
import { createGptPermissionContinuationHook } from "."
|
||||
|
||||
type SessionMessage = {
|
||||
info: {
|
||||
id: string
|
||||
role: "user" | "assistant"
|
||||
model?: {
|
||||
providerID?: string
|
||||
modelID?: string
|
||||
}
|
||||
modelID?: string
|
||||
}
|
||||
parts?: Array<{ type: string; text?: string }>
|
||||
}
|
||||
|
||||
type GptPermissionContext = Parameters<typeof createGptPermissionContinuationHook>[0]
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
}
|
||||
|
||||
function extractPromptText(input: unknown): string {
|
||||
if (!isRecord(input)) return ""
|
||||
|
||||
const body = input.body
|
||||
if (!isRecord(body)) return ""
|
||||
|
||||
const parts = body.parts
|
||||
if (!Array.isArray(parts)) return ""
|
||||
|
||||
const firstPart = parts[0]
|
||||
if (!isRecord(firstPart)) return ""
|
||||
|
||||
return typeof firstPart.text === "string" ? firstPart.text : ""
|
||||
}
|
||||
|
||||
function createMockPluginInput(messages: SessionMessage[]): {
|
||||
ctx: GptPermissionContext
|
||||
promptCalls: string[]
|
||||
} {
|
||||
const promptCalls: string[] = []
|
||||
const client = createOpencodeClient({ directory: "/tmp/test" })
|
||||
const shell = Object.assign(
|
||||
() => {
|
||||
throw new Error("$ is not used in this test")
|
||||
},
|
||||
{
|
||||
braces: () => [],
|
||||
escape: (input: string) => input,
|
||||
env() {
|
||||
return shell
|
||||
},
|
||||
cwd() {
|
||||
return shell
|
||||
},
|
||||
nothrow() {
|
||||
return shell
|
||||
},
|
||||
throws() {
|
||||
return shell
|
||||
},
|
||||
},
|
||||
)
|
||||
const request = new Request("http://localhost")
|
||||
const response = new Response()
|
||||
|
||||
Reflect.set(client.session, "messages", async () => ({ data: messages, error: undefined, request, response }))
|
||||
Reflect.set(client.session, "prompt", async (input: unknown) => {
|
||||
promptCalls.push(extractPromptText(input))
|
||||
return { data: undefined, error: undefined, request, response }
|
||||
})
|
||||
Reflect.set(client.session, "promptAsync", async (input: unknown) => {
|
||||
promptCalls.push(extractPromptText(input))
|
||||
return { data: undefined, error: undefined, request, response }
|
||||
})
|
||||
|
||||
const ctx: GptPermissionContext = {
|
||||
client,
|
||||
project: {
|
||||
id: "test-project",
|
||||
worktree: "/tmp/test",
|
||||
time: { created: Date.now() },
|
||||
},
|
||||
directory: "/tmp/test",
|
||||
worktree: "/tmp/test",
|
||||
serverUrl: new URL("http://localhost"),
|
||||
$: shell,
|
||||
}
|
||||
|
||||
return { ctx, promptCalls }
|
||||
}
|
||||
|
||||
function createAssistantMessage(id: string, text: string): SessionMessage {
|
||||
return {
|
||||
info: { id, role: "assistant", modelID: "gpt-5.4" },
|
||||
parts: [{ type: "text", text }],
|
||||
}
|
||||
}
|
||||
|
||||
function createUserMessage(id: string, text: string): SessionMessage {
|
||||
return {
|
||||
info: { id, role: "user" },
|
||||
parts: [{ type: "text", text }],
|
||||
}
|
||||
}
|
||||
|
||||
describe("gpt-permission-continuation", () => {
|
||||
test("injects continue when the last GPT assistant reply asks for permission", async () => {
|
||||
// given
|
||||
const { ctx, promptCalls } = createMockPluginInput([
|
||||
{
|
||||
info: { id: "msg-1", role: "assistant", modelID: "gpt-5.4" },
|
||||
parts: [{ type: "text", text: "I finished the analysis. If you want, I can apply the changes next." }],
|
||||
},
|
||||
])
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual(["continue"])
|
||||
})
|
||||
|
||||
test("does not inject when the last assistant model is not GPT", async () => {
|
||||
// given
|
||||
const { ctx, promptCalls } = createMockPluginInput([
|
||||
{
|
||||
info: {
|
||||
id: "msg-1",
|
||||
role: "assistant",
|
||||
model: { providerID: "anthropic", modelID: "claude-sonnet-4" },
|
||||
},
|
||||
parts: [{ type: "text", text: "If you want, I can keep going." }],
|
||||
},
|
||||
])
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual([])
|
||||
})
|
||||
|
||||
test("does not inject when the last assistant reply is not a stall pattern", async () => {
|
||||
// given
|
||||
const { ctx, promptCalls } = createMockPluginInput([
|
||||
{
|
||||
info: { id: "msg-1", role: "assistant", modelID: "gpt-5.4" },
|
||||
parts: [{ type: "text", text: "I completed the refactor and all tests pass." }],
|
||||
},
|
||||
])
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual([])
|
||||
})
|
||||
|
||||
test("does not inject when a permission phrase appears before the final sentence", async () => {
|
||||
// given
|
||||
const { ctx, promptCalls } = createMockPluginInput([
|
||||
{
|
||||
info: { id: "msg-1", role: "assistant", modelID: "gpt-5.4" },
|
||||
parts: [{ type: "text", text: "If you want, I can keep going. The current work is complete." }],
|
||||
},
|
||||
])
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual([])
|
||||
})
|
||||
|
||||
test("does not inject when continuation is stopped for the session", async () => {
|
||||
// given
|
||||
const { ctx, promptCalls } = createMockPluginInput([
|
||||
{
|
||||
info: { id: "msg-1", role: "assistant", modelID: "gpt-5.4" },
|
||||
parts: [{ type: "text", text: "If you want, I can continue with the fix." }],
|
||||
},
|
||||
])
|
||||
const hook = createGptPermissionContinuationHook(ctx, {
|
||||
isContinuationStopped: (sessionID) => sessionID === "ses-1",
|
||||
})
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual([])
|
||||
})
|
||||
|
||||
test("does not inject twice for the same assistant message", async () => {
|
||||
// given
|
||||
const { ctx, promptCalls } = createMockPluginInput([
|
||||
{
|
||||
info: { id: "msg-1", role: "assistant", modelID: "gpt-5.4" },
|
||||
parts: [{ type: "text", text: "Would you like me to continue with the fix?" }],
|
||||
},
|
||||
])
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual(["continue"])
|
||||
})
|
||||
|
||||
describe("#given repeated GPT permission tails in the same session", () => {
|
||||
describe("#when the permission phrases keep changing", () => {
|
||||
test("stops injecting after three consecutive auto-continues", async () => {
|
||||
// given
|
||||
const messages: SessionMessage[] = [
|
||||
createUserMessage("msg-0", "Please continue the fix."),
|
||||
createAssistantMessage("msg-1", "If you want, I can apply the patch next."),
|
||||
]
|
||||
const { ctx, promptCalls } = createMockPluginInput(messages)
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-2", "continue"))
|
||||
messages.push(createAssistantMessage("msg-3", "Would you like me to continue with the tests?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-4", "continue"))
|
||||
messages.push(createAssistantMessage("msg-5", "Do you want me to wire the remaining cleanup?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-6", "continue"))
|
||||
messages.push(createAssistantMessage("msg-7", "Shall I finish the remaining updates?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual(["continue", "continue", "continue"])
|
||||
})
|
||||
})
|
||||
|
||||
describe("#when a real user message arrives between auto-continues", () => {
|
||||
test("resets the consecutive auto-continue counter", async () => {
|
||||
// given
|
||||
const messages: SessionMessage[] = [
|
||||
createUserMessage("msg-0", "Please continue the fix."),
|
||||
createAssistantMessage("msg-1", "If you want, I can apply the patch next."),
|
||||
]
|
||||
const { ctx, promptCalls } = createMockPluginInput(messages)
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-2", "continue"))
|
||||
messages.push(createAssistantMessage("msg-3", "Would you like me to continue with the tests?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-4", "Please keep going and finish the cleanup."))
|
||||
messages.push(createAssistantMessage("msg-5", "Do you want me to wire the remaining cleanup?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-6", "continue"))
|
||||
messages.push(createAssistantMessage("msg-7", "Shall I finish the remaining updates?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-8", "continue"))
|
||||
messages.push(createAssistantMessage("msg-9", "If you want, I can apply the final polish."))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-10", "continue"))
|
||||
messages.push(createAssistantMessage("msg-11", "Would you like me to ship the final verification?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual(["continue", "continue", "continue", "continue", "continue"])
|
||||
})
|
||||
})
|
||||
|
||||
describe("#when the same permission phrase repeats after an auto-continue", () => {
|
||||
test("stops immediately on stagnation", async () => {
|
||||
// given
|
||||
const messages: SessionMessage[] = [
|
||||
createUserMessage("msg-0", "Please continue the fix."),
|
||||
createAssistantMessage("msg-1", "If you want, I can apply the patch next."),
|
||||
]
|
||||
const { ctx, promptCalls } = createMockPluginInput(messages)
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-2", "continue"))
|
||||
messages.push(createAssistantMessage("msg-3", "If you want, I can apply the patch next."))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual(["continue"])
|
||||
})
|
||||
})
|
||||
|
||||
describe("#when a user manually types continue after the cap is reached", () => {
|
||||
test("resets the cap and allows another auto-continue", async () => {
|
||||
// given
|
||||
const messages: SessionMessage[] = [
|
||||
createUserMessage("msg-0", "Please continue the fix."),
|
||||
createAssistantMessage("msg-1", "If you want, I can apply the patch next."),
|
||||
]
|
||||
const { ctx, promptCalls } = createMockPluginInput(messages)
|
||||
const hook = createGptPermissionContinuationHook(ctx)
|
||||
|
||||
// when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-2", "continue"))
|
||||
messages.push(createAssistantMessage("msg-3", "Would you like me to continue with the tests?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-4", "continue"))
|
||||
messages.push(createAssistantMessage("msg-5", "Do you want me to wire the remaining cleanup?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-6", "continue"))
|
||||
messages.push(createAssistantMessage("msg-7", "Shall I finish the remaining updates?"))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
messages.push(createUserMessage("msg-8", "continue"))
|
||||
messages.push(createAssistantMessage("msg-9", "If you want, I can apply the final polish."))
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } })
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual(["continue", "continue", "continue", "continue"])
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -1,200 +0,0 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
|
||||
import { normalizeSDKResponse } from "../../shared"
|
||||
import { log } from "../../shared/logger"
|
||||
|
||||
import {
|
||||
extractAssistantText,
|
||||
getLastAssistantMessage,
|
||||
isGptAssistantMessage,
|
||||
type SessionMessage,
|
||||
} from "./assistant-message"
|
||||
import {
|
||||
CONTINUATION_PROMPT,
|
||||
HOOK_NAME,
|
||||
MAX_CONSECUTIVE_AUTO_CONTINUES,
|
||||
} from "./constants"
|
||||
import { detectStallPattern } from "./detector"
|
||||
import type { SessionStateStore } from "./session-state"
|
||||
|
||||
type SessionState = ReturnType<SessionStateStore["getState"]>
|
||||
|
||||
async function promptContinuation(
|
||||
ctx: PluginInput,
|
||||
sessionID: string,
|
||||
): Promise<void> {
|
||||
const payload = {
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
parts: [{ type: "text" as const, text: CONTINUATION_PROMPT }],
|
||||
},
|
||||
query: { directory: ctx.directory },
|
||||
}
|
||||
|
||||
if (typeof ctx.client.session.promptAsync === "function") {
|
||||
await ctx.client.session.promptAsync(payload)
|
||||
return
|
||||
}
|
||||
|
||||
await ctx.client.session.prompt(payload)
|
||||
}
|
||||
|
||||
function getLastUserMessageBefore(
|
||||
messages: SessionMessage[],
|
||||
lastAssistantIndex: number,
|
||||
): SessionMessage | null {
|
||||
for (let index = lastAssistantIndex - 1; index >= 0; index--) {
|
||||
if (messages[index].info?.role === "user") {
|
||||
return messages[index]
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function isAutoContinuationUserMessage(message: SessionMessage): boolean {
|
||||
return extractAssistantText(message).trim().toLowerCase() === CONTINUATION_PROMPT
|
||||
}
|
||||
|
||||
function extractPermissionPhrase(text: string): string | null {
|
||||
const tail = text.slice(-800)
|
||||
const lines = tail.split("\n").map((line) => line.trim()).filter(Boolean)
|
||||
const hotZone = lines.slice(-3).join(" ")
|
||||
const sentenceParts = hotZone.trim().replace(/\s+/g, " ").split(/(?<=[.!?])\s+/)
|
||||
const trailingSegment = sentenceParts[sentenceParts.length - 1]?.trim().toLowerCase() ?? ""
|
||||
return trailingSegment || null
|
||||
}
|
||||
|
||||
function resetAutoContinuationState(state: SessionState): void {
|
||||
state.consecutiveAutoContinueCount = 0
|
||||
state.awaitingAutoContinuationResponse = false
|
||||
state.lastAutoContinuePermissionPhrase = undefined
|
||||
}
|
||||
|
||||
export function createGptPermissionContinuationHandler(args: {
|
||||
ctx: PluginInput
|
||||
sessionStateStore: SessionStateStore
|
||||
isContinuationStopped?: (sessionID: string) => boolean
|
||||
}): (input: { event: { type: string; properties?: unknown } }) => Promise<void> {
|
||||
const { ctx, sessionStateStore, isContinuationStopped } = args
|
||||
|
||||
return async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
|
||||
const properties = event.properties as Record<string, unknown> | undefined
|
||||
|
||||
if (event.type === "session.deleted") {
|
||||
const sessionID = (properties?.info as { id?: string } | undefined)?.id
|
||||
if (sessionID) {
|
||||
sessionStateStore.cleanup(sessionID)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type !== "session.idle") return
|
||||
|
||||
const sessionID = properties?.sessionID as string | undefined
|
||||
if (!sessionID) return
|
||||
|
||||
if (isContinuationStopped?.(sessionID)) {
|
||||
log(`[${HOOK_NAME}] Skipped: continuation stopped for session`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
const state = sessionStateStore.getState(sessionID)
|
||||
if (state.inFlight) {
|
||||
log(`[${HOOK_NAME}] Skipped: prompt already in flight`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
const messagesResponse = await ctx.client.session.messages({
|
||||
path: { id: sessionID },
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
const messages = normalizeSDKResponse(messagesResponse, [] as SessionMessage[], {
|
||||
preferResponseOnMissingData: true,
|
||||
})
|
||||
const lastAssistantMessage = getLastAssistantMessage(messages)
|
||||
if (!lastAssistantMessage) return
|
||||
|
||||
const lastAssistantIndex = messages.lastIndexOf(lastAssistantMessage)
|
||||
const previousUserMessage = getLastUserMessageBefore(messages, lastAssistantIndex)
|
||||
const previousUserMessageWasAutoContinuation =
|
||||
previousUserMessage !== null
|
||||
&& state.awaitingAutoContinuationResponse
|
||||
&& isAutoContinuationUserMessage(previousUserMessage)
|
||||
|
||||
if (previousUserMessageWasAutoContinuation) {
|
||||
state.awaitingAutoContinuationResponse = false
|
||||
} else if (previousUserMessage) {
|
||||
resetAutoContinuationState(state)
|
||||
} else {
|
||||
state.awaitingAutoContinuationResponse = false
|
||||
}
|
||||
|
||||
const messageID = lastAssistantMessage.info?.id
|
||||
if (messageID && state.lastHandledMessageID === messageID) {
|
||||
log(`[${HOOK_NAME}] Skipped: already handled assistant message`, { sessionID, messageID })
|
||||
return
|
||||
}
|
||||
|
||||
if (lastAssistantMessage.info?.error) {
|
||||
log(`[${HOOK_NAME}] Skipped: last assistant message has error`, { sessionID, messageID })
|
||||
return
|
||||
}
|
||||
|
||||
if (!isGptAssistantMessage(lastAssistantMessage)) {
|
||||
log(`[${HOOK_NAME}] Skipped: last assistant model is not GPT`, { sessionID, messageID })
|
||||
return
|
||||
}
|
||||
|
||||
const assistantText = extractAssistantText(lastAssistantMessage)
|
||||
if (!detectStallPattern(assistantText)) {
|
||||
return
|
||||
}
|
||||
|
||||
const permissionPhrase = extractPermissionPhrase(assistantText)
|
||||
if (!permissionPhrase) {
|
||||
return
|
||||
}
|
||||
|
||||
if (state.consecutiveAutoContinueCount >= MAX_CONSECUTIVE_AUTO_CONTINUES) {
|
||||
state.lastHandledMessageID = messageID
|
||||
log(`[${HOOK_NAME}] Skipped: reached max consecutive auto-continues`, {
|
||||
sessionID,
|
||||
messageID,
|
||||
consecutiveAutoContinueCount: state.consecutiveAutoContinueCount,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
state.consecutiveAutoContinueCount >= 1
|
||||
&& state.lastAutoContinuePermissionPhrase === permissionPhrase
|
||||
) {
|
||||
state.lastHandledMessageID = messageID
|
||||
log(`[${HOOK_NAME}] Skipped: repeated permission phrase after auto-continue`, {
|
||||
sessionID,
|
||||
messageID,
|
||||
permissionPhrase,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
state.inFlight = true
|
||||
await promptContinuation(ctx, sessionID)
|
||||
state.lastHandledMessageID = messageID
|
||||
state.consecutiveAutoContinueCount += 1
|
||||
state.awaitingAutoContinuationResponse = true
|
||||
state.lastAutoContinuePermissionPhrase = permissionPhrase
|
||||
state.lastInjectedAt = Date.now()
|
||||
log(`[${HOOK_NAME}] Injected continuation prompt`, { sessionID, messageID })
|
||||
} catch (error) {
|
||||
log(`[${HOOK_NAME}] Failed to inject continuation prompt`, {
|
||||
sessionID,
|
||||
error: String(error),
|
||||
})
|
||||
} finally {
|
||||
state.inFlight = false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
|
||||
import { createGptPermissionContinuationHandler } from "./handler"
|
||||
import { createSessionStateStore } from "./session-state"
|
||||
|
||||
export type GptPermissionContinuationHook = {
|
||||
handler: (input: { event: { type: string; properties?: unknown } }) => Promise<void>
|
||||
wasRecentlyInjected: (sessionID: string) => boolean
|
||||
}
|
||||
|
||||
export function createGptPermissionContinuationHook(
|
||||
ctx: PluginInput,
|
||||
options?: {
|
||||
isContinuationStopped?: (sessionID: string) => boolean
|
||||
},
|
||||
): GptPermissionContinuationHook {
|
||||
const sessionStateStore = createSessionStateStore()
|
||||
|
||||
return {
|
||||
handler: createGptPermissionContinuationHandler({
|
||||
ctx,
|
||||
sessionStateStore,
|
||||
isContinuationStopped: options?.isContinuationStopped,
|
||||
}),
|
||||
wasRecentlyInjected(sessionID: string): boolean {
|
||||
return sessionStateStore.wasRecentlyInjected(sessionID, 5_000)
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
type SessionState = {
|
||||
inFlight: boolean
|
||||
consecutiveAutoContinueCount: number
|
||||
awaitingAutoContinuationResponse: boolean
|
||||
lastHandledMessageID?: string
|
||||
lastAutoContinuePermissionPhrase?: string
|
||||
lastInjectedAt?: number
|
||||
}
|
||||
|
||||
export type SessionStateStore = ReturnType<typeof createSessionStateStore>
|
||||
|
||||
export function createSessionStateStore() {
|
||||
const states = new Map<string, SessionState>()
|
||||
|
||||
const getState = (sessionID: string): SessionState => {
|
||||
const existing = states.get(sessionID)
|
||||
if (existing) return existing
|
||||
|
||||
const created: SessionState = {
|
||||
inFlight: false,
|
||||
consecutiveAutoContinueCount: 0,
|
||||
awaitingAutoContinuationResponse: false,
|
||||
}
|
||||
states.set(sessionID, created)
|
||||
return created
|
||||
}
|
||||
|
||||
return {
|
||||
getState,
|
||||
wasRecentlyInjected(sessionID: string, windowMs: number): boolean {
|
||||
const state = states.get(sessionID)
|
||||
if (!state?.lastInjectedAt) return false
|
||||
return Date.now() - state.lastInjectedAt <= windowMs
|
||||
},
|
||||
cleanup(sessionID: string): void {
|
||||
states.delete(sessionID)
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
|
||||
import { createTodoContinuationEnforcer } from "../todo-continuation-enforcer"
|
||||
import { createGptPermissionContinuationHook } from "."
|
||||
|
||||
describe("gpt-permission-continuation coordination", () => {
|
||||
test("injects only once when GPT permission continuation and todo continuation are both eligible", async () => {
|
||||
// given
|
||||
const promptCalls: string[] = []
|
||||
const toastCalls: string[] = []
|
||||
const sessionID = "ses-dual-continuation"
|
||||
const ctx = {
|
||||
directory: "/tmp/test",
|
||||
client: {
|
||||
session: {
|
||||
messages: async () => ({
|
||||
data: [
|
||||
{
|
||||
info: { id: "msg-1", role: "assistant", modelID: "gpt-5.4" },
|
||||
parts: [{ type: "text", text: "If you want, I can implement the fix next." }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
todo: async () => ({
|
||||
data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
|
||||
}),
|
||||
prompt: async (input: { body: { parts: Array<{ text: string }> } }) => {
|
||||
promptCalls.push(input.body.parts[0]?.text ?? "")
|
||||
return {}
|
||||
},
|
||||
promptAsync: async (input: { body: { parts: Array<{ text: string }> } }) => {
|
||||
promptCalls.push(input.body.parts[0]?.text ?? "")
|
||||
return {}
|
||||
},
|
||||
},
|
||||
tui: {
|
||||
showToast: async (input: { body: { title: string } }) => {
|
||||
toastCalls.push(input.body.title)
|
||||
return {}
|
||||
},
|
||||
},
|
||||
},
|
||||
} as any
|
||||
|
||||
const gptPermissionContinuation = createGptPermissionContinuationHook(ctx)
|
||||
const todoContinuationEnforcer = createTodoContinuationEnforcer(ctx, {
|
||||
shouldSkipContinuation: (id) => gptPermissionContinuation.wasRecentlyInjected(id),
|
||||
})
|
||||
|
||||
// when
|
||||
await gptPermissionContinuation.handler({
|
||||
event: { type: "session.idle", properties: { sessionID } },
|
||||
})
|
||||
await todoContinuationEnforcer.handler({
|
||||
event: { type: "session.idle", properties: { sessionID } },
|
||||
})
|
||||
|
||||
// then
|
||||
expect(promptCalls).toEqual(["continue"])
|
||||
expect(toastCalls).toEqual([])
|
||||
})
|
||||
})
|
||||
@@ -30,7 +30,6 @@ export { createCategorySkillReminderHook } from "./category-skill-reminder";
|
||||
export { createRalphLoopHook, type RalphLoopHook } from "./ralph-loop";
|
||||
export { createNoSisyphusGptHook } from "./no-sisyphus-gpt";
|
||||
export { createNoHephaestusNonGptHook } from "./no-hephaestus-non-gpt";
|
||||
export { createGptPermissionContinuationHook, type GptPermissionContinuationHook } from "./gpt-permission-continuation"
|
||||
export { createAutoSlashCommandHook } from "./auto-slash-command";
|
||||
export { createEditErrorRecoveryHook } from "./edit-error-recovery";
|
||||
|
||||
@@ -53,3 +52,4 @@ export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
|
||||
export { createJsonErrorRecoveryHook, JSON_ERROR_TOOL_EXCLUDE_LIST, JSON_ERROR_PATTERNS, JSON_ERROR_REMINDER } from "./json-error-recovery";
|
||||
export { createReadImageResizerHook } from "./read-image-resizer"
|
||||
export { createTodoDescriptionOverrideHook } from "./todo-description-override"
|
||||
export { createWebFetchRedirectGuardHook } from "./webfetch-redirect-guard"
|
||||
|
||||
@@ -2,7 +2,7 @@ export const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g
|
||||
export const INLINE_CODE_PATTERN = /`[^`]+`/g
|
||||
|
||||
// Re-export from submodules
|
||||
export { isPlannerAgent, getUltraworkMessage } from "./ultrawork"
|
||||
export { isPlannerAgent, isNonOmoAgent, getUltraworkMessage } from "./ultrawork"
|
||||
export { SEARCH_PATTERN, SEARCH_MESSAGE } from "./search"
|
||||
export { ANALYZE_PATTERN, ANALYZE_MESSAGE } from "./analyze"
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { detectKeywordsWithType, extractPromptText } from "./detector"
|
||||
import { isPlannerAgent } from "./constants"
|
||||
import { isPlannerAgent, isNonOmoAgent } from "./constants"
|
||||
import { log } from "../../shared"
|
||||
import {
|
||||
isSystemDirective,
|
||||
@@ -45,6 +45,12 @@ export function createKeywordDetectorHook(ctx: PluginInput, _collector?: Context
|
||||
|
||||
const currentAgent = getSessionAgent(input.sessionID) ?? input.agent
|
||||
|
||||
// Skip all keyword injection for non-OMO agents (e.g., OpenCode-Builder, Plan)
|
||||
if (isNonOmoAgent(currentAgent)) {
|
||||
log(`[keyword-detector] Skipping keyword injection for non-OMO agent`, { sessionID: input.sessionID, agent: currentAgent })
|
||||
return
|
||||
}
|
||||
|
||||
// Remove system-reminder content to prevent automated system messages from triggering mode keywords
|
||||
const cleanText = removeSystemReminders(promptText)
|
||||
const modelID = input.model?.modelID
|
||||
|
||||
@@ -746,3 +746,109 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
|
||||
expect(textPart!.text).not.toContain("YOU ARE A PLANNER, NOT AN IMPLEMENTER")
|
||||
})
|
||||
})
|
||||
|
||||
describe("keyword-detector non-OMO agent skipping", () => {
|
||||
let logCalls: Array<{ msg: string; data?: unknown }>
|
||||
let logSpy: ReturnType<typeof spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
_resetForTesting()
|
||||
logCalls = []
|
||||
logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => {
|
||||
logCalls.push({ msg, data })
|
||||
})
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
logSpy?.mockRestore()
|
||||
_resetForTesting()
|
||||
})
|
||||
|
||||
function createMockPluginInput() {
|
||||
return {
|
||||
client: {
|
||||
tui: {
|
||||
showToast: async () => {},
|
||||
},
|
||||
},
|
||||
} as any
|
||||
}
|
||||
|
||||
test("should skip all keyword injection for OpenCode-Builder agent", async () => {
|
||||
// given - keyword-detector hook with Builder agent
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "builder-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{ type: "text", text: "ultrawork search and analyze this code" }],
|
||||
}
|
||||
|
||||
// when - keyword detection runs with OpenCode-Builder agent
|
||||
await hook["chat.message"]({ sessionID, agent: "OpenCode-Builder" }, output)
|
||||
|
||||
// then - no keywords should be injected
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).toBe("ultrawork search and analyze this code")
|
||||
})
|
||||
|
||||
test("should skip all keyword injection for Plan agent", async () => {
|
||||
// given - keyword-detector hook with Plan agent
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "plan-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{ type: "text", text: "search mode analyze mode ultrawork" }],
|
||||
}
|
||||
|
||||
// when - keyword detection runs with Plan agent
|
||||
await hook["chat.message"]({ sessionID, agent: "Plan" }, output)
|
||||
|
||||
// then - no keywords should be injected for non-OMO Plan agent
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).toBe("search mode analyze mode ultrawork")
|
||||
})
|
||||
|
||||
test("should still inject keywords for OMO agents like Sisyphus", async () => {
|
||||
// given - keyword-detector hook with Sisyphus agent
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "sisyphus-session-omo"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{ type: "text", text: "ultrawork implement this" }],
|
||||
}
|
||||
|
||||
// when - keyword detection runs with Sisyphus (OMO agent)
|
||||
await hook["chat.message"]({ sessionID, agent: "sisyphus" }, output)
|
||||
|
||||
// then - keywords should be injected normally
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).toContain("YOU MUST LEVERAGE ALL AVAILABLE AGENTS")
|
||||
expect(textPart!.text).toContain("implement this")
|
||||
})
|
||||
|
||||
test("should skip keyword injection for agent names containing 'builder'", async () => {
|
||||
// given - keyword-detector hook with a builder-variant agent name
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "custom-builder-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{ type: "text", text: "search this codebase" }],
|
||||
}
|
||||
|
||||
// when - keyword detection runs with a builder-type agent
|
||||
await hook["chat.message"]({ sessionID, agent: "Custom-Builder" }, output)
|
||||
|
||||
// then - search-mode should NOT be injected
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).toBe("search this codebase")
|
||||
expect(textPart!.text).not.toContain("[search-mode]")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
export {
|
||||
isPlannerAgent,
|
||||
isNonOmoAgent,
|
||||
isGptModel,
|
||||
isGeminiModel,
|
||||
getUltraworkSource,
|
||||
|
||||
@@ -23,6 +23,16 @@ export function isPlannerAgent(agentName?: string): boolean {
|
||||
return /\bplan\b/.test(normalized)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if agent is a non-OMO agent (e.g., OpenCode's built-in Builder/Plan).
|
||||
* Non-OMO agents should not receive keyword injection (search-mode, analyze-mode, etc.).
|
||||
*/
|
||||
export function isNonOmoAgent(agentName?: string): boolean {
|
||||
if (!agentName) return false
|
||||
const lowerName = agentName.toLowerCase()
|
||||
return lowerName.includes("builder") || lowerName === "plan"
|
||||
}
|
||||
|
||||
export { isGptModel, isGeminiModel }
|
||||
|
||||
/** Ultrawork message source type */
|
||||
|
||||
@@ -337,7 +337,7 @@ describe("model fallback hook", () => {
|
||||
clearPendingModelFallback(sessionID)
|
||||
})
|
||||
|
||||
test("transforms model names for google provider via fallback chain", async () => {
|
||||
test("preserves canonical google preview model names via fallback chain", async () => {
|
||||
//#given
|
||||
const sessionID = "ses_model_fallback_google"
|
||||
clearPendingModelFallback(sessionID)
|
||||
@@ -351,20 +351,20 @@ describe("model fallback hook", () => {
|
||||
|
||||
// Set a custom fallback chain that routes through google
|
||||
setSessionFallbackChain(sessionID, [
|
||||
{ providers: ["google"], model: "gemini-3-pro" },
|
||||
{ providers: ["google"], model: "gemini-3.1-pro-preview" },
|
||||
])
|
||||
|
||||
const set = setPendingModelFallback(
|
||||
sessionID,
|
||||
"Oracle",
|
||||
"google",
|
||||
"gemini-3-pro",
|
||||
"gemini-3.1-pro-preview",
|
||||
)
|
||||
expect(set).toBe(true)
|
||||
|
||||
const output = {
|
||||
message: {
|
||||
model: { providerID: "google", modelID: "gemini-3-pro" },
|
||||
model: { providerID: "google", modelID: "gemini-3.1-pro-preview" },
|
||||
},
|
||||
parts: [{ type: "text", text: "continue" }],
|
||||
}
|
||||
@@ -372,10 +372,10 @@ describe("model fallback hook", () => {
|
||||
//#when
|
||||
await hook["chat.message"]?.({ sessionID }, output)
|
||||
|
||||
//#then — model name should remain gemini-3-pro because no google transform exists for this ID
|
||||
//#then: model name should remain gemini-3.1-pro-preview because no google transform exists for this ID
|
||||
expect(output.message["model"]).toEqual({
|
||||
providerID: "google",
|
||||
modelID: "gemini-3-pro",
|
||||
modelID: "gemini-3.1-pro-preview",
|
||||
})
|
||||
|
||||
clearPendingModelFallback(sessionID)
|
||||
|
||||
@@ -52,8 +52,6 @@ export function createNonInteractiveEnvHook(_ctx: PluginInput) {
|
||||
// The env vars (GIT_EDITOR=:, EDITOR=:, etc.) must ALWAYS be injected
|
||||
// for git commands to prevent interactive prompts.
|
||||
|
||||
// The bash tool always runs in a Unix-like shell (bash/sh), even on Windows
|
||||
// (via Git Bash, WSL, etc.), so always use unix export syntax.
|
||||
const envPrefix = buildEnvPrefix(NON_INTERACTIVE_ENV, "unix")
|
||||
|
||||
// Check if the command already starts with the prefix to avoid stacking.
|
||||
|
||||
82
src/hooks/openclaw.test.ts
Normal file
82
src/hooks/openclaw.test.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import { beforeEach, describe, expect, mock, test } from "bun:test"
|
||||
|
||||
const wakeOpenClawMock = mock(async () => null)
|
||||
|
||||
mock.module("../openclaw", () => ({
|
||||
wakeOpenClaw: wakeOpenClawMock,
|
||||
}))
|
||||
|
||||
describe("createOpenClawHook", () => {
|
||||
beforeEach(() => {
|
||||
wakeOpenClawMock.mockClear()
|
||||
})
|
||||
|
||||
test("maps session.created to session-start", async () => {
|
||||
const { createOpenClawHook } = await import("./openclaw")
|
||||
const hook = createOpenClawHook(
|
||||
{ directory: "/tmp/project" } as any,
|
||||
{ openclaw: { enabled: true } } as any,
|
||||
)
|
||||
|
||||
await hook?.event?.({
|
||||
event: {
|
||||
type: "session.created",
|
||||
properties: { sessionID: "session-1" },
|
||||
},
|
||||
})
|
||||
|
||||
expect(wakeOpenClawMock).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
"session-start",
|
||||
expect.objectContaining({
|
||||
projectPath: "/tmp/project",
|
||||
sessionId: "session-1",
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
test("uses tool.execute.before for question tools", async () => {
|
||||
const { createOpenClawHook } = await import("./openclaw")
|
||||
const hook = createOpenClawHook(
|
||||
{ directory: "/tmp/project" } as any,
|
||||
{ openclaw: { enabled: true } } as any,
|
||||
)
|
||||
|
||||
await hook?.["tool.execute.before"]?.(
|
||||
{ tool: "ask_user_question", sessionID: "session-2" },
|
||||
{ args: { questions: [{ question: "Need approval?", options: [{ label: "Yes" }] }] } },
|
||||
)
|
||||
|
||||
expect(wakeOpenClawMock).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
"ask-user-question",
|
||||
expect.objectContaining({
|
||||
projectPath: "/tmp/project",
|
||||
question: "Need approval?",
|
||||
sessionId: "session-2",
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
test("falls back to args.question string when questions array absent", async () => {
|
||||
const { createOpenClawHook } = await import("./openclaw")
|
||||
const hook = createOpenClawHook(
|
||||
{ directory: "/tmp/project" } as any,
|
||||
{ openclaw: { enabled: true } } as any,
|
||||
)
|
||||
|
||||
await hook?.["tool.execute.before"]?.(
|
||||
{ tool: "question", sessionID: "session-3" },
|
||||
{ args: { question: "Fallback?" } },
|
||||
)
|
||||
|
||||
expect(wakeOpenClawMock).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
"ask-user-question",
|
||||
expect.objectContaining({
|
||||
question: "Fallback?",
|
||||
sessionId: "session-3",
|
||||
}),
|
||||
)
|
||||
})
|
||||
})
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user