Compare commits

..

1 Commits

Author SHA1 Message Date
YeonGyu-Kim
ef8f22caba fix(boulder-state): treat plans without checkboxes as incomplete (fixes #2648)
GPT/Gemini Prometheus plans sometimes lack markdown checkboxes.
Previously getPlanProgress() returned isComplete=true for 0/0,
causing /start-work to skip Atlas execution.

Now total=0 correctly returns isComplete=false so start-work
detects the invalid plan format.

🤖 Generated with assistance of [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 12:06:24 +09:00
228 changed files with 2031 additions and 9433 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 278 KiB

View File

@@ -57,7 +57,6 @@ jobs:
bun test src/cli/doctor/format-default.test.ts bun test src/cli/doctor/format-default.test.ts
bun test src/tools/call-omo-agent/sync-executor.test.ts bun test src/tools/call-omo-agent/sync-executor.test.ts
bun test src/tools/call-omo-agent/session-creator.test.ts bun test src/tools/call-omo-agent/session-creator.test.ts
bun test src/tools/session-manager
bun test src/features/opencode-skill-loader/loader.test.ts bun test src/features/opencode-skill-loader/loader.test.ts
bun test src/hooks/anthropic-context-window-limit-recovery/recovery-hook.test.ts bun test src/hooks/anthropic-context-window-limit-recovery/recovery-hook.test.ts
bun test src/hooks/anthropic-context-window-limit-recovery/executor.test.ts bun test src/hooks/anthropic-context-window-limit-recovery/executor.test.ts
@@ -67,8 +66,9 @@ jobs:
# Enumerate subdirectories/files explicitly to EXCLUDE mock-heavy files # Enumerate subdirectories/files explicitly to EXCLUDE mock-heavy files
# that were already run in isolation above. # that were already run in isolation above.
# Excluded from src/cli: doctor/formatter.test.ts, doctor/format-default.test.ts # Excluded from src/cli: doctor/formatter.test.ts, doctor/format-default.test.ts
# Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts, session-manager (all) # Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts
# Excluded from src/hooks/anthropic-context-window-limit-recovery: recovery-hook.test.ts, executor.test.ts # Excluded from src/hooks/anthropic-context-window-limit-recovery: recovery-hook.test.ts, executor.test.ts
# Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts
bun test bin script src/config src/mcp src/index.test.ts \ bun test bin script src/config src/mcp src/index.test.ts \
src/agents src/shared \ src/agents src/shared \
src/cli/run src/cli/config-manager src/cli/mcp-oauth \ src/cli/run src/cli/config-manager src/cli/mcp-oauth \
@@ -77,7 +77,7 @@ jobs:
src/cli/doctor/runner.test.ts src/cli/doctor/checks \ src/cli/doctor/runner.test.ts src/cli/doctor/checks \
src/tools/ast-grep src/tools/background-task src/tools/delegate-task \ src/tools/ast-grep src/tools/background-task src/tools/delegate-task \
src/tools/glob src/tools/grep src/tools/interactive-bash \ src/tools/glob src/tools/grep src/tools/interactive-bash \
src/tools/look-at src/tools/lsp \ src/tools/look-at src/tools/lsp src/tools/session-manager \
src/tools/skill src/tools/skill-mcp src/tools/slashcommand src/tools/task \ src/tools/skill src/tools/skill-mcp src/tools/slashcommand src/tools/task \
src/tools/call-omo-agent/background-agent-executor.test.ts \ src/tools/call-omo-agent/background-agent-executor.test.ts \
src/tools/call-omo-agent/background-executor.test.ts \ src/tools/call-omo-agent/background-executor.test.ts \

1
.gitignore vendored
View File

@@ -36,4 +36,3 @@ test-injection/
notepad.md notepad.md
oauth-success.html oauth-success.html
*.bun-build *.bun-build
.omx/

View File

@@ -79,65 +79,47 @@ Pass `REPO`, `REPORT_DIR`, and `COMMIT_SHA` to every subagent.
--- ---
--- ## Phase 1: Fetch All Open Items
## Phase 1: Fetch All Open Items (CORRECTED) <fetch>
Paginate if 500 results returned.
**IMPORTANT:** `body` and `comments` fields may contain control characters that break jq parsing. Fetch basic metadata first, then fetch full details per-item in subagents.
```bash ```bash
# Step 1: Fetch basic metadata (without body/comments to avoid JSON parsing issues) ISSUES=$(gh issue list --repo $REPO --state open --limit 500 \
ISSUES_LIST=$(gh issue list --repo $REPO --state open --limit 500 \ --json number,title,state,createdAt,updatedAt,labels,author,body,comments)
--json number,title,labels,author,createdAt) ISSUE_LEN=$(echo "$ISSUES" | jq length)
ISSUE_COUNT=$(echo "$ISSUES_LIST" | jq length) if [ "$ISSUE_LEN" -eq 500 ]; then
LAST_DATE=$(echo "$ISSUES" | jq -r '.[-1].createdAt')
# Paginate if needed
if [ "$ISSUE_COUNT" -eq 500 ]; then
LAST_DATE=$(echo "$ISSUES_LIST" | jq -r '.[-1].createdAt')
while true; do while true; do
PAGE=$(gh issue list --repo $REPO --state open --limit 500 \ PAGE=$(gh issue list --repo $REPO --state open --limit 500 \
--search "created:<$LAST_DATE" \ --search "created:<$LAST_DATE" \
--json number,title,labels,author,createdAt) --json number,title,state,createdAt,updatedAt,labels,author,body,comments)
PAGE_COUNT=$(echo "$PAGE" | jq length) PAGE_LEN=$(echo "$PAGE" | jq length)
[ "$PAGE_COUNT" -eq 0 ] && break [ "$PAGE_LEN" -eq 0 ] && break
ISSUES_LIST=$(echo "$ISSUES_LIST" "$PAGE" | jq -s '.[0] + .[1] | unique_by(.number)') ISSUES=$(echo "[$ISSUES, $PAGE]" | jq -s 'add | unique_by(.number)')
ISSUE_COUNT=$(echo "$ISSUES_LIST" | jq length) [ "$PAGE_LEN" -lt 500 ] && break
[ "$PAGE_COUNT" -lt 500 ] && break
LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt') LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt')
done done
fi fi
# Same for PRs PRS=$(gh pr list --repo $REPO --state open --limit 500 \
PRS_LIST=$(gh pr list --repo $REPO --state open --limit 500 \ --json number,title,state,createdAt,updatedAt,labels,author,body,headRefName,baseRefName,isDraft,mergeable,reviewDecision,statusCheckRollup)
--json number,title,labels,author,headRefName,baseRefName,isDraft,createdAt) PR_LEN=$(echo "$PRS" | jq length)
PR_COUNT=$(echo "$PRS_LIST" | jq length) if [ "$PR_LEN" -eq 500 ]; then
LAST_DATE=$(echo "$PRS" | jq -r '.[-1].createdAt')
if [ "$PR_COUNT" -eq 500 ]; then
LAST_DATE=$(echo "$PRS_LIST" | jq -r '.[-1].createdAt')
while true; do while true; do
PAGE=$(gh pr list --repo $REPO --state open --limit 500 \ PAGE=$(gh pr list --repo $REPO --state open --limit 500 \
--search "created:<$LAST_DATE" \ --search "created:<$LAST_DATE" \
--json number,title,labels,author,headRefName,baseRefName,isDraft,createdAt) --json number,title,state,createdAt,updatedAt,labels,author,body,headRefName,baseRefName,isDraft,mergeable,reviewDecision,statusCheckRollup)
PAGE_COUNT=$(echo "$PAGE" | jq length) PAGE_LEN=$(echo "$PAGE" | jq length)
[ "$PAGE_COUNT" -eq 0 ] && break [ "$PAGE_LEN" -eq 0 ] && break
PRS_LIST=$(echo "$PRS_LIST" "$PAGE" | jq -s '.[0] + .[1] | unique_by(.number)') PRS=$(echo "[$PRS, $PAGE]" | jq -s 'add | unique_by(.number)')
PR_COUNT=$(echo "$PRS_LIST" | jq length) [ "$PAGE_LEN" -lt 500 ] && break
[ "$PAGE_COUNT" -lt 500 ] && break
LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt') LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt')
done done
fi fi
echo "Total issues: $ISSUE_COUNT, Total PRs: $PR_COUNT"
``` ```
</fetch>
**LARGE REPOSITORY HANDLING:**
If total items exceeds 50, you MUST process ALL items. Use the pagination code above to fetch every single open issue and PR.
**DO NOT** sample or limit to 50 items - process the entire backlog.
Example: If there are 500 open issues, spawn 500 subagents. If there are 1000 open PRs, spawn 1000 subagents.
**Note:** Background task system will queue excess tasks automatically.
--- ---
@@ -154,36 +136,7 @@ Example: If there are 500 open issues, spawn 500 subagents. If there are 1000 op
--- ---
## Phase 3: Spawn Subagents (Individual Tool Calls) ## Phase 3: Spawn Subagents
**CRITICAL: Create tasks ONE BY ONE using individual `task_create` tool calls. NEVER batch or script.**
For each item, execute these steps sequentially:
### Step 3.1: Create Task Record
```typescript
task_create(
subject="Triage: #{number} {title}",
description="GitHub {issue|PR} triage analysis - {type}",
metadata={"type": "{ISSUE_QUESTION|ISSUE_BUG|ISSUE_FEATURE|ISSUE_OTHER|PR_BUGFIX|PR_OTHER}", "number": {number}}
)
```
### Step 3.2: Spawn Analysis Subagent (Background)
```typescript
task(
category="quick",
run_in_background=true,
load_skills=[],
prompt=SUBAGENT_PROMPT
)
```
**ABSOLUTE RULES for Subagents:**
- **ONLY ANALYZE** - Never take action on GitHub (no comments, merges, closes)
- **READ-ONLY** - Use tools only for reading code/GitHub data
- **WRITE REPORT ONLY** - Output goes to `{REPORT_DIR}/{issue|pr}-{number}.md` via Write tool
- **EVIDENCE REQUIRED** - Every claim must have GitHub permalink as proof
``` ```
For each item: For each item:
@@ -217,7 +170,6 @@ ABSOLUTE RULES (violating ANY = critical failure):
- Your ONLY writable output: {REPORT_DIR}/{issue|pr}-{number}.md via the Write tool - Your ONLY writable output: {REPORT_DIR}/{issue|pr}-{number}.md via the Write tool
``` ```
--- ---
### ISSUE_QUESTION ### ISSUE_QUESTION

View File

@@ -4,7 +4,7 @@
## OVERVIEW ## OVERVIEW
OpenCode plugin (npm: `oh-my-opencode`) that extends Claude Code (OpenCode fork) with multi-agent orchestration, 48 lifecycle hooks, 26 tools, skill/command/MCP systems, and Claude Code compatibility. 1268 TypeScript files, 160k LOC. OpenCode plugin (npm: `oh-my-opencode`) that extends Claude Code (OpenCode fork) with multi-agent orchestration, 46 lifecycle hooks, 26 tools, skill/command/MCP systems, and Claude Code compatibility. 1268 TypeScript files, 160k LOC.
## STRUCTURE ## STRUCTURE
@@ -14,14 +14,14 @@ oh-my-opencode/
│ ├── index.ts # Plugin entry: loadConfig → createManagers → createTools → createHooks → createPluginInterface │ ├── index.ts # Plugin entry: loadConfig → createManagers → createTools → createHooks → createPluginInterface
│ ├── plugin-config.ts # JSONC multi-level config: user → project → defaults (Zod v4) │ ├── plugin-config.ts # JSONC multi-level config: user → project → defaults (Zod v4)
│ ├── agents/ # 11 agents (Sisyphus, Hephaestus, Oracle, Librarian, Explore, Atlas, Prometheus, Metis, Momus, Multimodal-Looker, Sisyphus-Junior) │ ├── agents/ # 11 agents (Sisyphus, Hephaestus, Oracle, Librarian, Explore, Atlas, Prometheus, Metis, Momus, Multimodal-Looker, Sisyphus-Junior)
│ ├── hooks/ # 48 lifecycle hooks across dedicated modules and standalone files │ ├── hooks/ # 46 hooks across 45 directories + 11 standalone files
│ ├── tools/ # 26 tools across 15 directories │ ├── tools/ # 26 tools across 15 directories
│ ├── features/ # 19 feature modules (background-agent, skill-loader, tmux, MCP-OAuth, etc.) │ ├── features/ # 19 feature modules (background-agent, skill-loader, tmux, MCP-OAuth, etc.)
│ ├── shared/ # 95+ utility files in 13 categories │ ├── shared/ # 95+ utility files in 13 categories
│ ├── config/ # Zod v4 schema system (24 files) │ ├── config/ # Zod v4 schema system (24 files)
│ ├── cli/ # CLI: install, run, doctor, mcp-oauth (Commander.js) │ ├── cli/ # CLI: install, run, doctor, mcp-oauth (Commander.js)
│ ├── mcp/ # 3 built-in remote MCPs (websearch, context7, grep_app) │ ├── mcp/ # 3 built-in remote MCPs (websearch, context7, grep_app)
│ ├── plugin/ # 8 OpenCode hook handlers + 48 hook composition │ ├── plugin/ # 8 OpenCode hook handlers + 46 hook composition
│ └── plugin-handlers/ # 6-phase config loading pipeline │ └── plugin-handlers/ # 6-phase config loading pipeline
├── packages/ # Monorepo: cli-runner, 12 platform binaries ├── packages/ # Monorepo: cli-runner, 12 platform binaries
└── local-ignore/ # Dev-only test fixtures └── local-ignore/ # Dev-only test fixtures
@@ -34,7 +34,7 @@ OhMyOpenCodePlugin(ctx)
├─→ loadPluginConfig() # JSONC parse → project/user merge → Zod validate → migrate ├─→ loadPluginConfig() # JSONC parse → project/user merge → Zod validate → migrate
├─→ createManagers() # TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler ├─→ createManagers() # TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler
├─→ createTools() # SkillContext + AvailableCategories + ToolRegistry (26 tools) ├─→ createTools() # SkillContext + AvailableCategories + ToolRegistry (26 tools)
├─→ createHooks() # 3-tier: Core(39) + Continuation(7) + Skill(2) = 48 hooks ├─→ createHooks() # 3-tier: Core(37) + Continuation(7) + Skill(2) = 46 hooks
└─→ createPluginInterface() # 8 OpenCode hook handlers → PluginInterface └─→ createPluginInterface() # 8 OpenCode hook handlers → PluginInterface
``` ```
@@ -97,7 +97,7 @@ Fields: agents (14 overridable, 21 fields each), categories (8 built-in + custom
- **Test pattern**: Bun test (`bun:test`), co-located `*.test.ts`, given/when/then style (nested describe with `#given`/`#when`/`#then` prefixes) - **Test pattern**: Bun test (`bun:test`), co-located `*.test.ts`, given/when/then style (nested describe with `#given`/`#when`/`#then` prefixes)
- **CI test split**: mock-heavy tests run in isolation (separate `bun test` processes), rest in batch - **CI test split**: mock-heavy tests run in isolation (separate `bun test` processes), rest in batch
- **Factory pattern**: `createXXX()` for all tools, hooks, agents - **Factory pattern**: `createXXX()` for all tools, hooks, agents
- **Hook tiers**: Session (23) → Tool-Guard (12) → Transform (4) → Continuation (7) → Skill (2) - **Hook tiers**: Session (23) → Tool-Guard (10) → Transform (4) → Continuation (7) → Skill (2)
- **Agent modes**: `primary` (respects UI model) vs `subagent` (own fallback chain) vs `all` - **Agent modes**: `primary` (respects UI model) vs `subagent` (own fallback chain) vs `all`
- **Model resolution**: 4-step: override → category-default → provider-fallback → system-default - **Model resolution**: 4-step: override → category-default → provider-fallback → system-default
- **Config format**: JSONC with comments, Zod v4 validation, snake_case keys - **Config format**: JSONC with comments, Zod v4 validation, snake_case keys

View File

@@ -4,17 +4,6 @@
> コアメンテナーのQが負傷したため、今週は Issue/PR への返信とリリースが遅れる可能性があります。 > コアメンテナーのQが負傷したため、今週は Issue/PR への返信とリリースが遅れる可能性があります。
> ご理解とご支援に感謝します。 > ご理解とご支援に感謝します。
> [!TIP]
> **Building in Public**
>
> メンテナーが Jobdori を使い、oh-my-opencode をリアルタイムで開発・メンテナンスしています。Jobdori は OpenClaw をベースに大幅カスタマイズされた AI アシスタントです。
> すべての機能開発、修正、Issue トリアージを Discord でライブでご覧いただけます。
>
> [![Building in Public](./.github/assets/building-in-public.png)](https://discord.gg/PUwSMR9XNk)
>
> [**→ #building-in-public で確認する**](https://discord.gg/PUwSMR9XNk)
> [!NOTE] > [!NOTE]
> >
> [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai) > [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)

View File

@@ -4,17 +4,6 @@
> 핵심 메인테이너 Q가 부상을 입어, 이번 주에는 이슈/PR 응답 및 릴리스가 지연될 수 있습니다. > 핵심 메인테이너 Q가 부상을 입어, 이번 주에는 이슈/PR 응답 및 릴리스가 지연될 수 있습니다.
> 양해와 응원에 감사드립니다. > 양해와 응원에 감사드립니다.
> [!TIP]
> **Building in Public**
>
> 메인테이너가 Jobdori를 통해 oh-my-opencode를 실시간으로 개발하고 있습니다. Jobdori는 OpenClaw를 기반으로 대폭 커스터마이징된 AI 어시스턴트입니다.
> 모든 기능 개발, 버그 수정, 이슈 트리아지를 Discord에서 실시간으로 확인하세요.
>
> [![Building in Public](./.github/assets/building-in-public.png)](https://discord.gg/PUwSMR9XNk)
>
> [**→ #building-in-public에서 확인하기**](https://discord.gg/PUwSMR9XNk)
> [!TIP] > [!TIP]
> 저희와 함께 하세요! > 저희와 함께 하세요!
> >

View File

@@ -1,13 +1,3 @@
> [!TIP]
> **Building in Public**
>
> The maintainer builds and maintains oh-my-opencode in real-time with Jobdori, an AI assistant built on a heavily customized fork of OpenClaw.
> Every feature, every fix, every issue triage — live in our Discord.
>
> [![Building in Public](./.github/assets/building-in-public.png)](https://discord.gg/PUwSMR9XNk)
>
> [**→ Watch it happen in #building-in-public**](https://discord.gg/PUwSMR9XNk)
> [!NOTE] > [!NOTE]
> >
> [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai) > [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
@@ -314,7 +304,7 @@ See full [Features Documentation](docs/reference/features.md).
- **Claude Code Compatibility**: Full hook system, commands, skills, agents, MCPs - **Claude Code Compatibility**: Full hook system, commands, skills, agents, MCPs
- **Built-in MCPs**: websearch (Exa), context7 (docs), grep_app (GitHub search) - **Built-in MCPs**: websearch (Exa), context7 (docs), grep_app (GitHub search)
- **Session Tools**: List, read, search, and analyze session history - **Session Tools**: List, read, search, and analyze session history
- **Productivity Features**: Ralph Loop, Todo Enforcer, Comment Checker, Think Mode, and more - **Productivity Features**: Ralph Loop, Todo Enforcer, GPT permission-tail continuation, Comment Checker, Think Mode, and more
- **Model Setup**: Agent-model matching is built into the [Installation Guide](docs/guide/installation.md#step-5-understand-your-model-setup) - **Model Setup**: Agent-model matching is built into the [Installation Guide](docs/guide/installation.md#step-5-understand-your-model-setup)
## Configuration ## Configuration
@@ -331,7 +321,7 @@ See [Configuration Documentation](docs/reference/configuration.md).
- **Sisyphus Agent**: Main orchestrator with Prometheus (Planner) and Metis (Plan Consultant) - **Sisyphus Agent**: Main orchestrator with Prometheus (Planner) and Metis (Plan Consultant)
- **Background Tasks**: Configure concurrency limits per provider/model - **Background Tasks**: Configure concurrency limits per provider/model
- **Categories**: Domain-specific task delegation (`visual`, `business-logic`, custom) - **Categories**: Domain-specific task delegation (`visual`, `business-logic`, custom)
- **Hooks**: 25+ built-in hooks, all configurable via `disabled_hooks` - **Hooks**: 25+ built-in hooks, including `gpt-permission-continuation`, all configurable via `disabled_hooks`
- **MCPs**: Built-in websearch (Exa), context7 (docs), grep_app (GitHub search) - **MCPs**: Built-in websearch (Exa), context7 (docs), grep_app (GitHub search)
- **LSP**: Full LSP support with refactoring tools - **LSP**: Full LSP support with refactoring tools
- **Experimental**: Aggressive truncation, auto-resume, and more - **Experimental**: Aggressive truncation, auto-resume, and more

View File

@@ -4,17 +4,6 @@
> Ключевой мейнтейнер Q получил травму, поэтому на этой неделе ответы по issue/PR и релизы могут задерживаться. > Ключевой мейнтейнер Q получил травму, поэтому на этой неделе ответы по issue/PR и релизы могут задерживаться.
> Спасибо за терпение и поддержку. > Спасибо за терпение и поддержку.
> [!TIP]
> **Building in Public**
>
> Мейнтейнер разрабатывает и поддерживает oh-my-opencode в режиме реального времени с помощью Jobdori — ИИ-ассистента на базе глубоко кастомизированной версии OpenClaw.
> Каждая фича, каждый фикс, каждый триаж issue — в прямом эфире в нашем Discord.
>
> [![Building in Public](./.github/assets/building-in-public.png)](https://discord.gg/PUwSMR9XNk)
>
> [**→ Смотрите в #building-in-public**](https://discord.gg/PUwSMR9XNk)
> [!NOTE] > [!NOTE]
> >
> [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai) > [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)

View File

@@ -4,17 +4,6 @@
> 核心维护者 Q 因受伤,本周 issue/PR 回复和发布可能会延迟。 > 核心维护者 Q 因受伤,本周 issue/PR 回复和发布可能会延迟。
> 感谢你的耐心与支持。 > 感谢你的耐心与支持。
> [!TIP]
> **Building in Public**
>
> 维护者正在使用 Jobdori 实时开发和维护 oh-my-opencode。Jobdori 是基于 OpenClaw 深度定制的 AI 助手。
> 每个功能开发、每次修复、每次 Issue 分类,都在 Discord 上实时进行。
>
> [![Building in Public](./.github/assets/building-in-public.png)](https://discord.gg/PUwSMR9XNk)
>
> [**→ 在 #building-in-public 频道中查看**](https://discord.gg/PUwSMR9XNk)
> [!NOTE] > [!NOTE]
> >
> [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai) > [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)

View File

@@ -3716,10 +3716,15 @@
"minimum": 10, "minimum": 10,
"maximum": 9007199254740991 "maximum": 9007199254740991
}, },
"consecutiveThreshold": { "windowSize": {
"type": "integer", "type": "integer",
"minimum": 5, "minimum": 5,
"maximum": 9007199254740991 "maximum": 9007199254740991
},
"repetitionThresholdPercent": {
"type": "number",
"exclusiveMinimum": 0,
"maximum": 100
} }
}, },
"additionalProperties": false "additionalProperties": false
@@ -3736,147 +3741,6 @@
}, },
"additionalProperties": false "additionalProperties": false
}, },
"openclaw": {
"type": "object",
"properties": {
"enabled": {
"default": false,
"type": "boolean"
},
"gateways": {
"default": {},
"type": "object",
"propertyNames": {
"type": "string"
},
"additionalProperties": {
"type": "object",
"properties": {
"type": {
"default": "http",
"type": "string",
"enum": [
"http",
"command"
]
},
"url": {
"type": "string"
},
"method": {
"default": "POST",
"type": "string"
},
"headers": {
"type": "object",
"propertyNames": {
"type": "string"
},
"additionalProperties": {
"type": "string"
}
},
"command": {
"type": "string"
},
"timeout": {
"type": "number"
}
},
"required": [
"type",
"method"
],
"additionalProperties": false
}
},
"hooks": {
"default": {},
"type": "object",
"propertyNames": {
"type": "string"
},
"additionalProperties": {
"type": "object",
"properties": {
"enabled": {
"default": true,
"type": "boolean"
},
"gateway": {
"type": "string"
},
"instruction": {
"type": "string"
}
},
"required": [
"enabled",
"gateway",
"instruction"
],
"additionalProperties": false
}
},
"replyListener": {
"type": "object",
"properties": {
"discordBotToken": {
"type": "string"
},
"discordChannelId": {
"type": "string"
},
"discordMention": {
"type": "string"
},
"authorizedDiscordUserIds": {
"default": [],
"type": "array",
"items": {
"type": "string"
}
},
"telegramBotToken": {
"type": "string"
},
"telegramChatId": {
"type": "string"
},
"pollIntervalMs": {
"default": 3000,
"type": "number"
},
"rateLimitPerMinute": {
"default": 10,
"type": "number"
},
"maxMessageLength": {
"default": 500,
"type": "number"
},
"includePrefix": {
"default": true,
"type": "boolean"
}
},
"required": [
"authorizedDiscordUserIds",
"pollIntervalMs",
"rateLimitPerMinute",
"maxMessageLength",
"includePrefix"
],
"additionalProperties": false
}
},
"required": [
"enabled",
"gateways",
"hooks"
],
"additionalProperties": false
},
"babysitting": { "babysitting": {
"type": "object", "type": "object",
"properties": { "properties": {

View File

@@ -8,7 +8,7 @@ Think of AI models as developers on a team. Each has a different brain, differen
This isn't a bug. It's the foundation of the entire system. This isn't a bug. It's the foundation of the entire system.
Oh My OpenAgent assigns each agent a model that matches its _working style_ — like building a team where each person is in the role that fits their personality. Oh My OpenCode assigns each agent a model that matches its _working style_ — like building a team where each person is in the role that fits their personality.
### Sisyphus: The Sociable Lead ### Sisyphus: The Sociable Lead
@@ -121,7 +121,6 @@ Principle-driven, explicit reasoning, deep technical capability. Best for agents
| ----------------- | ----------------------------------------------------------------------------------------------- | | ----------------- | ----------------------------------------------------------------------------------------------- |
| **GPT-5.3 Codex** | Deep coding powerhouse. Autonomous exploration. Required for Hephaestus. | | **GPT-5.3 Codex** | Deep coding powerhouse. Autonomous exploration. Required for Hephaestus. |
| **GPT-5.4** | High intelligence, strategic reasoning. Default for Oracle, Momus, and a key fallback for Prometheus / Atlas. Uses xhigh variant for Momus. | | **GPT-5.4** | High intelligence, strategic reasoning. Default for Oracle, Momus, and a key fallback for Prometheus / Atlas. Uses xhigh variant for Momus. |
| **GPT-5.4 Mini** | Fast + strong reasoning. Good for lightweight autonomous tasks. Default for quick category. |
| **GPT-5-Nano** | Ultra-cheap, fast. Good for simple utility tasks. | | **GPT-5-Nano** | Ultra-cheap, fast. Good for simple utility tasks. |
### Other Models ### Other Models
@@ -171,7 +170,7 @@ When agents delegate work, they don't pick a model name — they pick a **catego
| `ultrabrain` | Maximum reasoning needed | GPT-5.4 → Gemini 3.1 Pro → Claude Opus → opencode-go/glm-5 | | `ultrabrain` | Maximum reasoning needed | GPT-5.4 → Gemini 3.1 Pro → Claude Opus → opencode-go/glm-5 |
| `deep` | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3.1 Pro | | `deep` | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3.1 Pro |
| `artistry` | Creative, novel approaches | Gemini 3.1 Pro → Claude Opus → GPT-5.4 | | `artistry` | Creative, novel approaches | Gemini 3.1 Pro → Claude Opus → GPT-5.4 |
| `quick` | Simple, fast tasks | GPT-5.4 Mini → Claude Haiku → Gemini Flash → opencode-go/minimax-m2.5 → GPT-5-Nano | | `quick` | Simple, fast tasks | Claude Haiku → Gemini Flash → opencode-go/minimax-m2.5 → GPT-5-Nano |
| `unspecified-high` | General complex work | Claude Opus → GPT-5.4 → GLM 5 → K2P5 → opencode-go/glm-5 → Kimi K2.5 | | `unspecified-high` | General complex work | Claude Opus → GPT-5.4 → GLM 5 → K2P5 → opencode-go/glm-5 → Kimi K2.5 |
| `unspecified-low` | General standard work | Claude Sonnet → GPT-5.3 Codex → opencode-go/kimi-k2.5 → Gemini Flash | | `unspecified-low` | General standard work | Claude Sonnet → GPT-5.3 Codex → opencode-go/kimi-k2.5 → Gemini Flash |
| `writing` | Text, docs, prose | Gemini Flash → opencode-go/kimi-k2.5 → Claude Sonnet | | `writing` | Text, docs, prose | Gemini Flash → opencode-go/kimi-k2.5 → Claude Sonnet |
@@ -186,7 +185,7 @@ See the [Orchestration System Guide](./orchestration.md) for how agents dispatch
```jsonc ```jsonc
{ {
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json", "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": { "agents": {
// Main orchestrator: Claude Opus or Kimi K2.5 work best // Main orchestrator: Claude Opus or Kimi K2.5 work best

View File

@@ -5,7 +5,7 @@
Paste this into your llm agent session: Paste this into your llm agent session:
``` ```
Install and configure oh-my-openagent by following the instructions here: Install and configure oh-my-opencode by following the instructions here:
https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md
``` ```
@@ -14,8 +14,8 @@ https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/do
Run the interactive installer: Run the interactive installer:
```bash ```bash
bunx oh-my-openagent install # recommended bunx oh-my-opencode install # recommended
npx oh-my-openagent install # alternative npx oh-my-opencode install # alternative
``` ```
> **Note**: The CLI ships with standalone binaries for all major platforms. No runtime (Bun/Node.js) is required for CLI execution after installation. > **Note**: The CLI ships with standalone binaries for all major platforms. No runtime (Bun/Node.js) is required for CLI execution after installation.
@@ -34,7 +34,7 @@ After you install it, you can read this [overview guide](./overview.md) to under
> curl -fsSL https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md > curl -fsSL https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md
> ``` > ```
If you're an LLM Agent helping set up oh-my-openagent, Welcome! You'll now help your user to install the best agent harness, OhMyOpenAgent. Tell user this with greetings, with test 'oMoMoMoMo...' If you're an LLM Agent helping set up oh-my-opencode, Welcome! You'll now help your user to install the best agent harness, OhMyOpenCode. Tell user this with greetings, with test 'oMoMoMoMo...'
follow these steps: follow these steps:
@@ -96,19 +96,19 @@ Spawn a subagent to handle installation and report back - to save context.
Based on user's answers, run the CLI installer with appropriate flags: Based on user's answers, run the CLI installer with appropriate flags:
```bash ```bash
bunx oh-my-openagent install --no-tui --claude=<yes|no|max20> --gemini=<yes|no> --copilot=<yes|no> [--openai=<yes|no>] [--opencode-go=<yes|no>] [--opencode-zen=<yes|no>] [--zai-coding-plan=<yes|no>] bunx oh-my-opencode install --no-tui --claude=<yes|no|max20> --gemini=<yes|no> --copilot=<yes|no> [--openai=<yes|no>] [--opencode-go=<yes|no>] [--opencode-zen=<yes|no>] [--zai-coding-plan=<yes|no>]
``` ```
**Examples:** **Examples:**
- User has all native subscriptions: `bunx oh-my-openagent install --no-tui --claude=max20 --openai=yes --gemini=yes --copilot=no` - User has all native subscriptions: `bunx oh-my-opencode install --no-tui --claude=max20 --openai=yes --gemini=yes --copilot=no`
- User has only Claude: `bunx oh-my-openagent install --no-tui --claude=yes --gemini=no --copilot=no` - User has only Claude: `bunx oh-my-opencode install --no-tui --claude=yes --gemini=no --copilot=no`
- User has Claude + OpenAI: `bunx oh-my-openagent install --no-tui --claude=yes --openai=yes --gemini=no --copilot=no` - User has Claude + OpenAI: `bunx oh-my-opencode install --no-tui --claude=yes --openai=yes --gemini=no --copilot=no`
- User has only GitHub Copilot: `bunx oh-my-openagent install --no-tui --claude=no --gemini=no --copilot=yes` - User has only GitHub Copilot: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=yes`
- User has Z.ai for Librarian: `bunx oh-my-openagent install --no-tui --claude=yes --gemini=no --copilot=no --zai-coding-plan=yes` - User has Z.ai for Librarian: `bunx oh-my-opencode install --no-tui --claude=yes --gemini=no --copilot=no --zai-coding-plan=yes`
- User has only OpenCode Zen: `bunx oh-my-openagent install --no-tui --claude=no --gemini=no --copilot=no --opencode-zen=yes` - User has only OpenCode Zen: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=no --opencode-zen=yes`
- User has OpenCode Go only: `bunx oh-my-openagent install --no-tui --claude=no --openai=no --gemini=no --copilot=no --opencode-go=yes` - User has OpenCode Go only: `bunx oh-my-opencode install --no-tui --claude=no --openai=no --gemini=no --copilot=no --opencode-go=yes`
- User has no subscriptions: `bunx oh-my-openagent install --no-tui --claude=no --gemini=no --copilot=no` - User has no subscriptions: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=no`
The CLI will: The CLI will:
@@ -120,7 +120,7 @@ The CLI will:
```bash ```bash
opencode --version # Should be 1.0.150 or higher opencode --version # Should be 1.0.150 or higher
cat ~/.config/opencode/opencode.json # Should contain "oh-my-openagent" in plugin array cat ~/.config/opencode/opencode.json # Should contain "oh-my-opencode" in plugin array
``` ```
### Step 4: Configure Authentication ### Step 4: Configure Authentication
@@ -145,7 +145,7 @@ First, add the opencode-antigravity-auth plugin:
```json ```json
{ {
"plugin": ["oh-my-openagent", "opencode-antigravity-auth@latest"] "plugin": ["oh-my-opencode", "opencode-antigravity-auth@latest"]
} }
``` ```
@@ -154,9 +154,9 @@ First, add the opencode-antigravity-auth plugin:
You'll also need full model settings in `opencode.json`. You'll also need full model settings in `opencode.json`.
Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy the full model configuration from the README, and merge carefully to avoid breaking the user's existing setup. The plugin now uses a **variant system** — models like `antigravity-gemini-3-pro` support `low`/`high` variants instead of separate `-low`/`-high` model entries. Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy the full model configuration from the README, and merge carefully to avoid breaking the user's existing setup. The plugin now uses a **variant system** — models like `antigravity-gemini-3-pro` support `low`/`high` variants instead of separate `-low`/`-high` model entries.
##### oh-my-openagent Agent Model Override ##### oh-my-opencode Agent Model Override
The `opencode-antigravity-auth` plugin uses different model names than the built-in Google auth. Override the agent models in `oh-my-openagent.json` (or `.opencode/oh-my-openagent.json`): The `opencode-antigravity-auth` plugin uses different model names than the built-in Google auth. Override the agent models in `oh-my-opencode.json` (or `.opencode/oh-my-opencode.json`):
```json ```json
{ {
@@ -176,7 +176,7 @@ The `opencode-antigravity-auth` plugin uses different model names than the built
**Available models (Gemini CLI quota)**: **Available models (Gemini CLI quota)**:
- `google/gemini-2.5-flash`, `google/gemini-2.5-pro`, `google/gemini-3-flash-preview`, `google/gemini-3.1-pro-preview` - `google/gemini-2.5-flash`, `google/gemini-2.5-pro`, `google/gemini-3-flash-preview`, `google/gemini-3-pro-preview`
> **Note**: Legacy tier-suffixed names like `google/antigravity-gemini-3-pro-high` still work but variants are recommended. Use `--variant=high` with the base model name instead. > **Note**: Legacy tier-suffixed names like `google/antigravity-gemini-3-pro-high` still work but variants are recommended. Use `--variant=high` with the base model name instead.
@@ -201,7 +201,7 @@ GitHub Copilot is supported as a **fallback provider** when native providers are
##### Model Mappings ##### Model Mappings
When GitHub Copilot is the best available provider, oh-my-openagent uses these model assignments: When GitHub Copilot is the best available provider, oh-my-opencode uses these model assignments:
| Agent | Model | | Agent | Model |
| ------------- | --------------------------------- | | ------------- | --------------------------------- |
@@ -243,7 +243,7 @@ When OpenCode Zen is the best available provider (no native or Copilot), these m
Run the installer and select "Yes" for GitHub Copilot: Run the installer and select "Yes" for GitHub Copilot:
```bash ```bash
bunx oh-my-openagent install bunx oh-my-opencode install
# Select your subscriptions (Claude, ChatGPT, Gemini) # Select your subscriptions (Claude, ChatGPT, Gemini)
# When prompted: "Do you have a GitHub Copilot subscription?" → Select "Yes" # When prompted: "Do you have a GitHub Copilot subscription?" → Select "Yes"
``` ```
@@ -251,7 +251,7 @@ bunx oh-my-openagent install
Or use non-interactive mode: Or use non-interactive mode:
```bash ```bash
bunx oh-my-openagent install --no-tui --claude=no --openai=no --gemini=no --copilot=yes bunx oh-my-opencode install --no-tui --claude=no --openai=no --gemini=no --copilot=yes
``` ```
Then authenticate with GitHub: Then authenticate with GitHub:
@@ -263,7 +263,7 @@ opencode auth login
### Step 5: Understand Your Model Setup ### Step 5: Understand Your Model Setup
You've just configured oh-my-openagent. Here's what got set up and why. You've just configured oh-my-opencode. Here's what got set up and why.
#### Model Families: What You're Working With #### Model Families: What You're Working With
@@ -287,14 +287,13 @@ Not all models behave the same way. Understanding which models are "similar" hel
| ----------------- | -------------------------------- | ------------------------------------------------- | | ----------------- | -------------------------------- | ------------------------------------------------- |
| **GPT-5.3-codex** | openai, github-copilot, opencode | Deep coding powerhouse. Required for Hephaestus. | | **GPT-5.3-codex** | openai, github-copilot, opencode | Deep coding powerhouse. Required for Hephaestus. |
| **GPT-5.4** | openai, github-copilot, opencode | High intelligence. Default for Oracle. | | **GPT-5.4** | openai, github-copilot, opencode | High intelligence. Default for Oracle. |
| **GPT-5.4 Mini** | openai, github-copilot, opencode | Fast + strong reasoning. Default for quick category. |
| **GPT-5-Nano** | opencode | Ultra-cheap, fast. Good for simple utility tasks. | | **GPT-5-Nano** | opencode | Ultra-cheap, fast. Good for simple utility tasks. |
**Different-Behavior Models**: **Different-Behavior Models**:
| Model | Provider(s) | Notes | | Model | Provider(s) | Notes |
| --------------------- | -------------------------------- | ----------------------------------------------------------- | | --------------------- | -------------------------------- | ----------------------------------------------------------- |
| **Gemini 3.1 Pro** | google, github-copilot, opencode | Excels at visual/frontend tasks. Different reasoning style. | | **Gemini 3 Pro** | google, github-copilot, opencode | Excels at visual/frontend tasks. Different reasoning style. |
| **Gemini 3 Flash** | google, github-copilot, opencode | Fast, good for doc search and light tasks. | | **Gemini 3 Flash** | google, github-copilot, opencode | Fast, good for doc search and light tasks. |
| **MiniMax M2.5** | venice | Fast and smart. Good for utility tasks. | | **MiniMax M2.5** | venice | Fast and smart. Good for utility tasks. |
| **MiniMax M2.5 Free** | opencode | Free-tier MiniMax. Fast for search/retrieval. | | **MiniMax M2.5 Free** | opencode | Free-tier MiniMax. Fast for search/retrieval. |
@@ -306,7 +305,7 @@ Not all models behave the same way. Understanding which models are "similar" hel
| **Grok Code Fast 1** | github-copilot, venice | Very fast | Optimized for code grep/search. Default for Explore. | | **Grok Code Fast 1** | github-copilot, venice | Very fast | Optimized for code grep/search. Default for Explore. |
| **Claude Haiku 4.5** | anthropic, opencode | Fast | Good balance of speed and intelligence. | | **Claude Haiku 4.5** | anthropic, opencode | Fast | Good balance of speed and intelligence. |
| **MiniMax M2.5 (Free)** | opencode, venice | Fast | Smart for its speed class. | | **MiniMax M2.5 (Free)** | opencode, venice | Fast | Smart for its speed class. |
| **GPT-5.3-codex-spark** | openai | Extremely fast | Blazing fast but compacts so aggressively that oh-my-openagent's context management doesn't work well with it. Not recommended for omo agents. | | **GPT-5.3-codex-spark** | openai | Extremely fast | Blazing fast but compacts so aggressively that oh-my-opencode's context management doesn't work well with it. Not recommended for omo agents. |
#### What Each Agent Does and Which Model It Got #### What Each Agent Does and Which Model It Got
@@ -317,7 +316,7 @@ Based on your subscriptions, here's how the agents were configured:
| Agent | Role | Default Chain | What It Does | | Agent | Role | Default Chain | What It Does |
| ------------ | ---------------- | ----------------------------------------------- | ---------------------------------------------------------------------------------------- | | ------------ | ---------------- | ----------------------------------------------- | ---------------------------------------------------------------------------------------- |
| **Sisyphus** | Main ultraworker | Opus (max) → Kimi K2.5 → GLM 5 → Big Pickle | Primary coding agent. Orchestrates everything. **Never use GPT — no GPT prompt exists.** | | **Sisyphus** | Main ultraworker | Opus (max) → Kimi K2.5 → GLM 5 → Big Pickle | Primary coding agent. Orchestrates everything. **Never use GPT — no GPT prompt exists.** |
| **Metis** | Plan review | Opus (max) → Kimi K2.5 → GPT-5.4 → Gemini 3.1 Pro | Reviews Prometheus plans for gaps. | | **Metis** | Plan review | Opus (max) → Kimi K2.5 → GPT-5.4 → Gemini 3 Pro | Reviews Prometheus plans for gaps. |
**Dual-Prompt Agents** (auto-switch between Claude and GPT prompts): **Dual-Prompt Agents** (auto-switch between Claude and GPT prompts):
@@ -327,7 +326,7 @@ Priority: **Claude > GPT > Claude-like models**
| Agent | Role | Default Chain | GPT Prompt? | | Agent | Role | Default Chain | GPT Prompt? |
| -------------- | ----------------- | ---------------------------------------------------------- | ---------------------------------------------------------------- | | -------------- | ----------------- | ---------------------------------------------------------- | ---------------------------------------------------------------- |
| **Prometheus** | Strategic planner | Opus (max) → **GPT-5.4 (high)** → Kimi K2.5 → Gemini 3.1 Pro | Yes — XML-tagged, principle-driven (~300 lines vs ~1,100 Claude) | | **Prometheus** | Strategic planner | Opus (max) → **GPT-5.4 (high)** → Kimi K2.5 → Gemini 3 Pro | Yes — XML-tagged, principle-driven (~300 lines vs ~1,100 Claude) |
| **Atlas** | Todo orchestrator | **Kimi K2.5** → Sonnet → GPT-5.4 | Yes — GPT-optimized todo management | | **Atlas** | Todo orchestrator | **Kimi K2.5** → Sonnet → GPT-5.4 | Yes — GPT-optimized todo management |
**GPT-Native Agents** (built for GPT, don't override to Claude): **GPT-Native Agents** (built for GPT, don't override to Claude):
@@ -335,8 +334,8 @@ Priority: **Claude > GPT > Claude-like models**
| Agent | Role | Default Chain | Notes | | Agent | Role | Default Chain | Notes |
| -------------- | ---------------------- | -------------------------------------- | ------------------------------------------------------ | | -------------- | ---------------------- | -------------------------------------- | ------------------------------------------------------ |
| **Hephaestus** | Deep autonomous worker | GPT-5.3-codex (medium) only | "Codex on steroids." No fallback. Requires GPT access. | | **Hephaestus** | Deep autonomous worker | GPT-5.3-codex (medium) only | "Codex on steroids." No fallback. Requires GPT access. |
| **Oracle** | Architecture/debugging | GPT-5.4 (high) → Gemini 3.1 Pro → Opus | High-IQ strategic backup. GPT preferred. | | **Oracle** | Architecture/debugging | GPT-5.4 (high) → Gemini 3 Pro → Opus | High-IQ strategic backup. GPT preferred. |
| **Momus** | High-accuracy reviewer | GPT-5.4 (medium) → Opus → Gemini 3.1 Pro | Verification agent. GPT preferred. | | **Momus** | High-accuracy reviewer | GPT-5.4 (medium) → Opus → Gemini 3 Pro | Verification agent. GPT preferred. |
**Utility Agents** (speed over intelligence): **Utility Agents** (speed over intelligence):
@@ -365,7 +364,7 @@ This is why Prometheus and Atlas ship separate prompts per model family — they
#### Custom Model Configuration #### Custom Model Configuration
If the user wants to override which model an agent uses, you can customize in `oh-my-openagent.json`: If the user wants to override which model an agent uses, you can customize in `oh-my-opencode.json`:
```jsonc ```jsonc
{ {
@@ -407,7 +406,7 @@ GPT (5.3-codex, 5.2) > Claude Opus (decent fallback) > Gemini (acceptable)
#### Provider Priority Chain #### Provider Priority Chain
When multiple providers are available, oh-my-openagent uses this priority: When multiple providers are available, oh-my-opencode uses this priority:
``` ```
Native (anthropic/, openai/, google/) > Kimi for Coding > GitHub Copilot > Venice > OpenCode Zen > Z.ai Coding Plan Native (anthropic/, openai/, google/) > Kimi for Coding > GitHub Copilot > Venice > OpenCode Zen > Z.ai Coding Plan
@@ -425,7 +424,7 @@ Read this document again, think about you have done everything correctly.
#### Say 'Congratulations! 🎉' to the user #### Say 'Congratulations! 🎉' to the user
Say to user: Congratulations! 🎉 You have successfully set up OhMyOpenAgent! Type `opencode` in your terminal to start using it. Say to user: Congratulations! 🎉 You have successfully set up OhMyOpenCode! Type `opencode` in your terminal to start using it.
#### Free advertising #### Free advertising

View File

@@ -1,6 +1,6 @@
# Orchestration System Guide # Orchestration System Guide
Oh My OpenAgent's orchestration system transforms a simple AI agent into a coordinated development team through **separation of planning and execution**. Oh My OpenCode's orchestration system transforms a simple AI agent into a coordinated development team through **separation of planning and execution**.
--- ---
@@ -298,7 +298,7 @@ task({ category: "quick", prompt: "..." }); // "Just get it done fast"
| `visual-engineering` | Gemini 3.1 Pro | Frontend, UI/UX, design, styling, animation | | `visual-engineering` | Gemini 3.1 Pro | Frontend, UI/UX, design, styling, animation |
| `ultrabrain` | GPT-5.4 (xhigh) | Deep logical reasoning, complex architecture decisions | | `ultrabrain` | GPT-5.4 (xhigh) | Deep logical reasoning, complex architecture decisions |
| `artistry` | Gemini 3.1 Pro (high) | Highly creative or artistic tasks, novel ideas | | `artistry` | Gemini 3.1 Pro (high) | Highly creative or artistic tasks, novel ideas |
| `quick` | GPT-5.4 Mini | Trivial tasks - single file changes, typo fixes | | `quick` | Claude Haiku 4.5 | Trivial tasks - single file changes, typo fixes |
| `deep` | GPT-5.3 Codex (medium) | Goal-oriented autonomous problem-solving, thorough research | | `deep` | GPT-5.3 Codex (medium) | Goal-oriented autonomous problem-solving, thorough research |
| `unspecified-low` | Claude Sonnet 4.6 | Tasks that don't fit other categories, low effort | | `unspecified-low` | Claude Sonnet 4.6 | Tasks that don't fit other categories, low effort |
| `unspecified-high` | Claude Opus 4.6 (max) | Tasks that don't fit other categories, high effort | | `unspecified-high` | Claude Opus 4.6 (max) | Tasks that don't fit other categories, high effort |
@@ -475,7 +475,7 @@ Use the `ulw` keyword in Sisyphus when:
## Configuration ## Configuration
You can control related features in `oh-my-openagent.json`: You can control related features in `oh-my-opencode.json`:
```jsonc ```jsonc
{ {

View File

@@ -1,6 +1,6 @@
# What Is Oh My OpenAgent? # What Is Oh My OpenCode?
Oh My OpenAgent is a multi-model agent orchestration harness for OpenCode. It transforms a single AI agent into a coordinated development team that actually ships code. Oh My OpenCode is a multi-model agent orchestration harness for OpenCode. It transforms a single AI agent into a coordinated development team that actually ships code.
Not locked to Claude. Not locked to OpenAI. Not locked to anyone. Not locked to Claude. Not locked to OpenAI. Not locked to anyone.
@@ -15,7 +15,7 @@ Just better results, cheaper models, real orchestration.
Paste this into your LLM agent session: Paste this into your LLM agent session:
``` ```
Install and configure oh-my-openagent by following the instructions here: Install and configure oh-my-opencode by following the instructions here:
https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/refs/heads/dev/docs/guide/installation.md
``` ```
@@ -41,13 +41,13 @@ We used to call this "Claude Code on steroids." That was wrong.
This isn't about making Claude Code better. It's about breaking free from the idea that one model, one provider, one way of working is enough. Anthropic wants you locked in. OpenAI wants you locked in. Everyone wants you locked in. This isn't about making Claude Code better. It's about breaking free from the idea that one model, one provider, one way of working is enough. Anthropic wants you locked in. OpenAI wants you locked in. Everyone wants you locked in.
Oh My OpenAgent doesn't play that game. It orchestrates across models, picking the right brain for the right job. Claude for orchestration. GPT for deep reasoning. Gemini for frontend. GPT-5.4 Mini for quick tasks. All working together, automatically. Oh My OpenCode doesn't play that game. It orchestrates across models, picking the right brain for the right job. Claude for orchestration. GPT for deep reasoning. Gemini for frontend. Haiku for quick tasks. All working together, automatically.
--- ---
## How It Works: Agent Orchestration ## How It Works: Agent Orchestration
Instead of one agent doing everything, Oh My OpenAgent uses **specialized agents that delegate to each other** based on task type. Instead of one agent doing everything, Oh My OpenCode uses **specialized agents that delegate to each other** based on task type.
**The Architecture:** **The Architecture:**
@@ -99,9 +99,9 @@ Use Hephaestus when you need deep architectural reasoning, complex debugging acr
**Why this beats vanilla Codex CLI:** **Why this beats vanilla Codex CLI:**
- **Multi-model orchestration.** Pure Codex is single-model. OmO routes different tasks to different models automatically. GPT for deep reasoning. Gemini for frontend. GPT-5.4 Mini for speed. The right brain for the right job. - **Multi-model orchestration.** Pure Codex is single-model. OmO routes different tasks to different models automatically. GPT for deep reasoning. Gemini for frontend. Haiku for speed. The right brain for the right job.
- **Background agents.** Fire 5+ agents in parallel. Something Codex simply cannot do. While one agent writes code, another researches patterns, another checks documentation. Like a real dev team. - **Background agents.** Fire 5+ agents in parallel. Something Codex simply cannot do. While one agent writes code, another researches patterns, another checks documentation. Like a real dev team.
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.4. `quick` gets GPT-5.4 Mini. No manual juggling. - **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.4. `quick` gets Haiku. No manual juggling.
- **Accumulated wisdom.** Subagents learn from previous results. Conventions discovered in task 1 are passed to task 5. Mistakes made early aren't repeated. The system gets smarter as it works. - **Accumulated wisdom.** Subagents learn from previous results. Conventions discovered in task 1 are passed to task 5. Mistakes made early aren't repeated. The system gets smarter as it works.
### Prometheus: The Strategic Planner ### Prometheus: The Strategic Planner
@@ -154,7 +154,7 @@ Use Prometheus for multi-day projects, critical production changes, complex refa
## Agent Model Matching ## Agent Model Matching
Different agents work best with different models. Oh My OpenAgent automatically assigns optimal models, but you can customize everything. Different agents work best with different models. Oh My OpenCode automatically assigns optimal models, but you can customize everything.
### Default Configuration ### Default Configuration
@@ -168,7 +168,7 @@ You can override specific agents or categories in your config:
```jsonc ```jsonc
{ {
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json", "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": { "agents": {
// Main orchestrator: Claude Opus or Kimi K2.5 work best // Main orchestrator: Claude Opus or Kimi K2.5 work best
@@ -195,8 +195,8 @@ You can override specific agents or categories in your config:
// General high-effort work // General high-effort work
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" }, "unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
// Quick tasks: use GPT-5.4-mini (fast and cheap) // Quick tasks: use the cheapest models
"quick": { "model": "openai/gpt-5.4-mini" }, "quick": { "model": "anthropic/claude-haiku-4-5" },
// Deep reasoning: GPT-5.4 // Deep reasoning: GPT-5.4
"ultrabrain": { "model": "openai/gpt-5.4", "variant": "xhigh" }, "ultrabrain": { "model": "openai/gpt-5.4", "variant": "xhigh" },
@@ -220,7 +220,7 @@ You can override specific agents or categories in your config:
**Different-behavior models**: **Different-behavior models**:
- Gemini 3.1 Pro — excels at visual/frontend tasks - Gemini 3 Pro — excels at visual/frontend tasks
- MiniMax M2.5 — fast and smart for utility tasks - MiniMax M2.5 — fast and smart for utility tasks
- Grok Code Fast 1 — optimized for code grep/search - Grok Code Fast 1 — optimized for code grep/search
@@ -232,7 +232,7 @@ See the [Agent-Model Matching Guide](./agent-model-matching.md) for complete det
Claude Code is good. But it's a single agent running a single model doing everything alone. Claude Code is good. But it's a single agent running a single model doing everything alone.
Oh My OpenAgent turns that into a coordinated team: Oh My OpenCode turns that into a coordinated team:
**Parallel execution.** Claude Code processes one thing at a time. OmO fires background agents in parallel — research, implementation, and verification happening simultaneously. Like having 5 engineers instead of 1. **Parallel execution.** Claude Code processes one thing at a time. OmO fires background agents in parallel — research, implementation, and verification happening simultaneously. Like having 5 engineers instead of 1.
@@ -246,7 +246,7 @@ Oh My OpenAgent turns that into a coordinated team:
**Discipline enforcement.** Todo enforcer yanks idle agents back to work. Comment checker strips AI slop. Ralph Loop keeps going until 100% done. The system doesn't let the agent slack off. **Discipline enforcement.** Todo enforcer yanks idle agents back to work. Comment checker strips AI slop. Ralph Loop keeps going until 100% done. The system doesn't let the agent slack off.
**The fundamental advantage.** Models have different temperaments. Claude thinks deeply. GPT reasons architecturally. Gemini visualizes. Haiku moves fast. Single-model tools force you to pick one personality for all tasks. Oh My OpenAgent leverages them all, routing by task type. This isn't a temporary hack — it's the only architecture that makes sense as models specialize further. The gap between multi-model orchestration and single-model limitation widens every month. We're betting on that future. **The fundamental advantage.** Models have different temperaments. Claude thinks deeply. GPT reasons architecturally. Gemini visualizes. Haiku moves fast. Single-model tools force you to pick one personality for all tasks. Oh My OpenCode leverages them all, routing by task type. This isn't a temporary hack — it's the only architecture that makes sense as models specialize further. The gap between multi-model orchestration and single-model limitation widens every month. We're betting on that future.
--- ---
@@ -256,7 +256,7 @@ Before acting on any request, Sisyphus classifies your true intent.
Are you asking for research? Implementation? Investigation? A fix? The Intent Gate figures out what you actually want, not just the literal words you typed. This means the agent understands context, nuance, and the real goal behind your request. Are you asking for research? Implementation? Investigation? A fix? The Intent Gate figures out what you actually want, not just the literal words you typed. This means the agent understands context, nuance, and the real goal behind your request.
Claude Code doesn't have this. It takes your prompt and runs. Oh My OpenAgent thinks first, then acts. Claude Code doesn't have this. It takes your prompt and runs. Oh My OpenCode thinks first, then acts.
--- ---

View File

@@ -1,6 +1,6 @@
# Manifesto # Manifesto
The principles and philosophy behind Oh My OpenAgent. The principles and philosophy behind Oh My OpenCode.
--- ---
@@ -20,7 +20,7 @@ When you find yourself:
That's not "human-AI collaboration." That's the AI failing to do its job. That's not "human-AI collaboration." That's the AI failing to do its job.
**Oh My OpenAgent is built on this premise**: Human intervention during agentic work is fundamentally a wrong signal. If the system is designed correctly, the agent should complete the work without requiring you to babysit it. **Oh My OpenCode is built on this premise**: Human intervention during agentic work is fundamentally a wrong signal. If the system is designed correctly, the agent should complete the work without requiring you to babysit it.
--- ---
@@ -144,7 +144,7 @@ Human Intent → Agent Execution → Verified Result
(intervention only on true failure) (intervention only on true failure)
``` ```
Everything in Oh My OpenAgent is designed to make this loop work: Everything in Oh My OpenCode is designed to make this loop work:
| Feature | Purpose | | Feature | Purpose |
|---------|---------| |---------|---------|

View File

@@ -1,15 +1,15 @@
# CLI Reference # CLI Reference
Complete reference for the `oh-my-openagent` command-line interface. Complete reference for the `oh-my-opencode` command-line interface.
## Basic Usage ## Basic Usage
```bash ```bash
# Display help # Display help
bunx oh-my-openagent bunx oh-my-opencode
# Or with npx # Or with npx
npx oh-my-openagent npx oh-my-opencode
``` ```
## Commands ## Commands
@@ -27,20 +27,20 @@ npx oh-my-openagent
## install ## install
Interactive installation tool for initial Oh-My-OpenAgent setup. Provides a TUI based on `@clack/prompts`. Interactive installation tool for initial Oh-My-OpenCode setup. Provides a TUI based on `@clack/prompts`.
### Usage ### Usage
```bash ```bash
bunx oh-my-openagent install bunx oh-my-opencode install
``` ```
### Installation Process ### Installation Process
1. **Provider Selection**: Choose your AI provider (Claude, ChatGPT, or Gemini) 1. **Provider Selection**: Choose your AI provider (Claude, ChatGPT, or Gemini)
2. **API Key Input**: Enter the API key for your selected provider 2. **API Key Input**: Enter the API key for your selected provider
3. **Configuration File Creation**: Generates `opencode.json` or `oh-my-openagent.json` files 3. **Configuration File Creation**: Generates `opencode.json` or `oh-my-opencode.json` files
4. **Plugin Registration**: Automatically registers the oh-my-openagent plugin in OpenCode settings 4. **Plugin Registration**: Automatically registers the oh-my-opencode plugin in OpenCode settings
### Options ### Options
@@ -53,12 +53,12 @@ bunx oh-my-openagent install
## doctor ## doctor
Diagnoses your environment to ensure Oh-My-OpenAgent is functioning correctly. Performs 17+ health checks. Diagnoses your environment to ensure Oh-My-OpenCode is functioning correctly. Performs 17+ health checks.
### Usage ### Usage
```bash ```bash
bunx oh-my-openagent doctor bunx oh-my-opencode doctor
``` ```
### Diagnostic Categories ### Diagnostic Categories
@@ -83,10 +83,10 @@ bunx oh-my-openagent doctor
### Example Output ### Example Output
``` ```
oh-my-openagent doctor oh-my-opencode doctor
┌──────────────────────────────────────────────────┐ ┌──────────────────────────────────────────────────┐
│ Oh-My-OpenAgent Doctor │ │ Oh-My-OpenCode Doctor │
└──────────────────────────────────────────────────┘ └──────────────────────────────────────────────────┘
Installation Installation
@@ -94,7 +94,7 @@ Installation
✓ Plugin registered in opencode.json ✓ Plugin registered in opencode.json
Configuration Configuration
✓ oh-my-openagent.json is valid ✓ oh-my-opencode.json is valid
⚠ categories.visual-engineering: using default model ⚠ categories.visual-engineering: using default model
Authentication Authentication
@@ -119,7 +119,7 @@ Executes OpenCode sessions and monitors task completion.
### Usage ### Usage
```bash ```bash
bunx oh-my-openagent run [prompt] bunx oh-my-opencode run [prompt]
``` ```
### Options ### Options
@@ -148,16 +148,16 @@ Manages OAuth 2.1 authentication for remote MCP servers.
```bash ```bash
# Login to an OAuth-protected MCP server # Login to an OAuth-protected MCP server
bunx oh-my-openagent mcp oauth login <server-name> --server-url https://api.example.com bunx oh-my-opencode mcp oauth login <server-name> --server-url https://api.example.com
# Login with explicit client ID and scopes # Login with explicit client ID and scopes
bunx oh-my-openagent mcp oauth login my-api --server-url https://api.example.com --client-id my-client --scopes "read,write" bunx oh-my-opencode mcp oauth login my-api --server-url https://api.example.com --client-id my-client --scopes "read,write"
# Remove stored OAuth tokens # Remove stored OAuth tokens
bunx oh-my-openagent mcp oauth logout <server-name> bunx oh-my-opencode mcp oauth logout <server-name>
# Check OAuth token status # Check OAuth token status
bunx oh-my-openagent mcp oauth status [server-name] bunx oh-my-opencode mcp oauth status [server-name]
``` ```
### Options ### Options
@@ -178,8 +178,8 @@ Tokens are stored in `~/.config/opencode/mcp-oauth.json` with `0600` permissions
The CLI searches for configuration files in the following locations (in priority order): The CLI searches for configuration files in the following locations (in priority order):
1. **Project Level**: `.opencode/oh-my-openagent.json` 1. **Project Level**: `.opencode/oh-my-opencode.json`
2. **User Level**: `~/.config/opencode/oh-my-openagent.json` 2. **User Level**: `~/.config/opencode/oh-my-opencode.json`
### JSONC Support ### JSONC Support
@@ -219,17 +219,17 @@ bun install -g opencode@latest
```bash ```bash
# Reinstall plugin # Reinstall plugin
bunx oh-my-openagent install bunx oh-my-opencode install
``` ```
### Doctor Check Failures ### Doctor Check Failures
```bash ```bash
# Diagnose with detailed information # Diagnose with detailed information
bunx oh-my-openagent doctor --verbose bunx oh-my-opencode doctor --verbose
# Check specific category only # Check specific category only
bunx oh-my-openagent doctor --category authentication bunx oh-my-opencode doctor --category authentication
``` ```
--- ---
@@ -240,10 +240,10 @@ Use the `--no-tui` option for CI/CD environments.
```bash ```bash
# Run doctor in CI environment # Run doctor in CI environment
bunx oh-my-openagent doctor --no-tui --json bunx oh-my-opencode doctor --no-tui --json
# Save results to file # Save results to file
bunx oh-my-openagent doctor --json > doctor-report.json bunx oh-my-opencode doctor --json > doctor-report.json
``` ```
--- ---

View File

@@ -1,6 +1,6 @@
# Configuration Reference # Configuration Reference
Complete reference for `oh-my-openagent.jsonc` configuration. This document covers every available option with examples. Complete reference for `oh-my-opencode.jsonc` configuration. This document covers every available option with examples.
--- ---
@@ -44,13 +44,13 @@ Complete reference for `oh-my-openagent.jsonc` configuration. This document cove
Priority order (project overrides user): Priority order (project overrides user):
1. `.opencode/oh-my-openagent.jsonc` / `.opencode/oh-my-openagent.json` 1. `.opencode/oh-my-opencode.jsonc` / `.opencode/oh-my-opencode.json`
2. User config (`.jsonc` preferred over `.json`): 2. User config (`.jsonc` preferred over `.json`):
| Platform | Path | | Platform | Path |
| ----------- | ----------------------------------------- | | ----------- | ----------------------------------------- |
| macOS/Linux | `~/.config/opencode/oh-my-openagent.jsonc` | | macOS/Linux | `~/.config/opencode/oh-my-opencode.jsonc` |
| Windows | `%APPDATA%\opencode\oh-my-openagent.jsonc` | | Windows | `%APPDATA%\opencode\oh-my-opencode.jsonc` |
JSONC supports `// line comments`, `/* block comments */`, and trailing commas. JSONC supports `// line comments`, `/* block comments */`, and trailing commas.
@@ -58,11 +58,11 @@ Enable schema autocomplete:
```json ```json
{ {
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json" "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json"
} }
``` ```
Run `bunx oh-my-openagent install` for guided setup. Run `opencode models` to list available models. Run `bunx oh-my-opencode install` for guided setup. Run `opencode models` to list available models.
### Quick Start Example ### Quick Start Example
@@ -70,7 +70,7 @@ Here's a practical starting configuration:
```jsonc ```jsonc
{ {
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-openagent.schema.json", "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": { "agents": {
// Main orchestrator: Claude Opus or Kimi K2.5 work best // Main orchestrator: Claude Opus or Kimi K2.5 work best
@@ -228,7 +228,7 @@ Domain-specific model delegation used by the `task()` tool. When Sisyphus delega
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture | | `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture |
| `deep` | `openai/gpt-5.3-codex` (medium) | Autonomous problem-solving, thorough research | | `deep` | `openai/gpt-5.3-codex` (medium) | Autonomous problem-solving, thorough research |
| `artistry` | `google/gemini-3.1-pro` (high) | Creative/unconventional approaches | | `artistry` | `google/gemini-3.1-pro` (high) | Creative/unconventional approaches |
| `quick` | `openai/gpt-5.4-mini` | Trivial tasks, typo fixes, single-file changes | | `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks, typo fixes, single-file changes |
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | General tasks, low effort | | `unspecified-low` | `anthropic/claude-sonnet-4-6` | General tasks, low effort |
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | General tasks, high effort | | `unspecified-high` | `anthropic/claude-opus-4-6` (max) | General tasks, high effort |
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing | | `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
@@ -286,12 +286,12 @@ Disable categories: `{ "disabled_categories": ["ultrabrain"] }`
| **ultrabrain** | `gpt-5.4` | `gpt-5.4``gemini-3.1-pro``claude-opus-4-6` | | **ultrabrain** | `gpt-5.4` | `gpt-5.4``gemini-3.1-pro``claude-opus-4-6` |
| **deep** | `gpt-5.3-codex` | `gpt-5.3-codex``claude-opus-4-6``gemini-3.1-pro` | | **deep** | `gpt-5.3-codex` | `gpt-5.3-codex``claude-opus-4-6``gemini-3.1-pro` |
| **artistry** | `gemini-3.1-pro` | `gemini-3.1-pro``claude-opus-4-6``gpt-5.4` | | **artistry** | `gemini-3.1-pro` | `gemini-3.1-pro``claude-opus-4-6``gpt-5.4` |
| **quick** | `gpt-5.4-mini` | `gpt-5.4-mini` `claude-haiku-4-5``gemini-3-flash` `minimax-m2.5` `gpt-5-nano` | | **quick** | `claude-haiku-4-5` | `claude-haiku-4-5``gemini-3-flash``gpt-5-nano` |
| **unspecified-low** | `claude-sonnet-4-6` | `claude-sonnet-4-6``gpt-5.3-codex``gemini-3-flash` | | **unspecified-low** | `claude-sonnet-4-6` | `claude-sonnet-4-6``gpt-5.3-codex``gemini-3-flash` |
| **unspecified-high** | `claude-opus-4-6` | `claude-opus-4-6``gpt-5.4 (high)``glm-5``k2p5``kimi-k2.5` | | **unspecified-high** | `claude-opus-4-6` | `claude-opus-4-6``gpt-5.4 (high)``glm-5``k2p5``kimi-k2.5` |
| **writing** | `gemini-3-flash` | `gemini-3-flash``claude-sonnet-4-6` | | **writing** | `gemini-3-flash` | `gemini-3-flash``claude-sonnet-4-6` |
Run `bunx oh-my-openagent doctor --verbose` to see effective model resolution for your config. Run `bunx oh-my-opencode doctor --verbose` to see effective model resolution for your config.
--- ---
@@ -418,14 +418,15 @@ Disable built-in skills: `{ "disabled_skills": ["playwright"] }`
Disable built-in hooks via `disabled_hooks`: Disable built-in hooks via `disabled_hooks`:
```json ```json
{ "disabled_hooks": ["comment-checker"] } { "disabled_hooks": ["comment-checker", "gpt-permission-continuation"] }
``` ```
Available hooks: `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `no-sisyphus-gpt`, `start-work`, `runtime-fallback` Available hooks: `gpt-permission-continuation`, `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `no-sisyphus-gpt`, `start-work`, `runtime-fallback`
**Notes:** **Notes:**
- `directory-agents-injector` — auto-disabled on OpenCode 1.1.37+ (native AGENTS.md support) - `directory-agents-injector` — auto-disabled on OpenCode 1.1.37+ (native AGENTS.md support)
- `gpt-permission-continuation` — resumes GPT sessions only when the last assistant reply ends with a permission-seeking tail like `If you want, ...`. Disable it if you prefer GPT sessions to wait for explicit user follow-up.
- `no-sisyphus-gpt`**do not disable**. It blocks incompatible GPT models for Sisyphus while allowing the dedicated GPT-5.4 prompt path. - `no-sisyphus-gpt`**do not disable**. It blocks incompatible GPT models for Sisyphus while allowing the dedicated GPT-5.4 prompt path.
- `startup-toast` is a sub-feature of `auto-update-checker`. Disable just the toast by adding `startup-toast` to `disabled_hooks`. - `startup-toast` is a sub-feature of `auto-update-checker`. Disable just the toast by adding `startup-toast` to `disabled_hooks`.

View File

@@ -1,8 +1,8 @@
# Oh-My-OpenAgent Features Reference # Oh-My-OpenCode Features Reference
## Agents ## Agents
Oh-My-OpenAgent provides 11 specialized AI agents. Each has distinct expertise, optimized models, and tool permissions. Oh-My-OpenCode provides 11 specialized AI agents. Each has distinct expertise, optimized models, and tool permissions.
### Core Agents ### Core Agents
@@ -90,7 +90,7 @@ When running inside tmux:
- Each pane shows agent output live - Each pane shows agent output live
- Auto-cleanup when agents complete - Auto-cleanup when agents complete
Customize agent models, prompts, and permissions in `oh-my-openagent.json`. Customize agent models, prompts, and permissions in `oh-my-opencode.json`.
## Category System ## Category System
@@ -111,7 +111,7 @@ By combining these two concepts, you can generate optimal agents through `task`.
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis | | `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
| `deep` | `openai/gpt-5.3-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. | | `deep` | `openai/gpt-5.3-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. |
| `artistry` | `google/gemini-3.1-pro` (high) | Highly creative/artistic tasks, novel ideas | | `artistry` | `google/gemini-3.1-pro` (high) | Highly creative/artistic tasks, novel ideas |
| `quick` | `openai/gpt-5.4-mini` | Trivial tasks - single file changes, typo fixes, simple modifications | | `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | Tasks that don't fit other categories, low effort required | | `unspecified-low` | `anthropic/claude-sonnet-4-6` | Tasks that don't fit other categories, low effort required |
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | Tasks that don't fit other categories, high effort required | | `unspecified-high` | `anthropic/claude-opus-4-6` (max) | Tasks that don't fit other categories, high effort required |
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing | | `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
@@ -129,7 +129,7 @@ task({
### Custom Categories ### Custom Categories
You can define custom categories in `oh-my-openagent.json`. You can define custom categories in `oh-my-opencode.json`.
#### Category Configuration Schema #### Category Configuration Schema
@@ -237,7 +237,7 @@ Skills provide specialized workflows with embedded MCP servers and detailed inst
### Browser Automation Options ### Browser Automation Options
Oh-My-OpenAgent provides two browser automation providers, configurable via `browser_automation_engine.provider`. Oh-My-OpenCode provides two browser automation providers, configurable via `browser_automation_engine.provider`.
#### Option 1: Playwright MCP (Default) #### Option 1: Playwright MCP (Default)
@@ -558,7 +558,7 @@ Requires `experimental.task_system: true` in config.
#### Task System Details #### Task System Details
**Note on Claude Code Alignment**: This implementation follows Claude Code's internal Task tool signatures (`TaskCreate`, `TaskUpdate`, `TaskList`, `TaskGet`) and field naming conventions (`subject`, `blockedBy`, `blocks`, etc.). However, Anthropic has not published official documentation for these tools. This is Oh My OpenAgent's own implementation based on observed Claude Code behavior and internal specifications. **Note on Claude Code Alignment**: This implementation follows Claude Code's internal Task tool signatures (`TaskCreate`, `TaskUpdate`, `TaskList`, `TaskGet`) and field naming conventions (`subject`, `blockedBy`, `blocks`, etc.). However, Anthropic has not published official documentation for these tools. This is Oh My OpenCode's own implementation based on observed Claude Code behavior and internal specifications.
**Task Schema**: **Task Schema**:
@@ -680,6 +680,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle across
| **ralph-loop** | Event + Message | Manages self-referential loop continuation. | | **ralph-loop** | Event + Message | Manages self-referential loop continuation. |
| **start-work** | Message | Handles /start-work command execution. | | **start-work** | Message | Handles /start-work command execution. |
| **auto-slash-command** | Message | Automatically executes slash commands from prompts. | | **auto-slash-command** | Message | Automatically executes slash commands from prompts. |
| **gpt-permission-continuation** | Event | Auto-continues GPT sessions when the final assistant reply ends with a permission-seeking tail such as `If you want, ...`. |
| **stop-continuation-guard** | Event + Message | Guards the stop-continuation mechanism. | | **stop-continuation-guard** | Event + Message | Guards the stop-continuation mechanism. |
| **category-skill-reminder** | Event + PostToolUse | Reminds agents about available category skills for delegation. | | **category-skill-reminder** | Event + PostToolUse | Reminds agents about available category skills for delegation. |
| **anthropic-effort** | Params | Adjusts Anthropic API effort level based on context. | | **anthropic-effort** | Params | Adjusts Anthropic API effort level based on context. |
@@ -734,6 +735,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle across
| Hook | Event | Description | | Hook | Event | Description |
| ------------------------------ | ----- | ---------------------------------------------------------- | | ------------------------------ | ----- | ---------------------------------------------------------- |
| **gpt-permission-continuation** | Event | Continues GPT replies that end in a permission-seeking tail. |
| **todo-continuation-enforcer** | Event | Enforces todo completion — yanks idle agents back to work. | | **todo-continuation-enforcer** | Event | Enforces todo completion — yanks idle agents back to work. |
| **compaction-todo-preserver** | Event | Preserves todo state during session compaction. | | **compaction-todo-preserver** | Event | Preserves todo state during session compaction. |
| **unstable-agent-babysitter** | Event | Handles unstable agent behavior with recovery strategies. | | **unstable-agent-babysitter** | Event | Handles unstable agent behavior with recovery strategies. |
@@ -785,10 +787,12 @@ Disable specific hooks in config:
```json ```json
{ {
"disabled_hooks": ["comment-checker"] "disabled_hooks": ["comment-checker", "gpt-permission-continuation"]
} }
``` ```
Use `gpt-permission-continuation` when you want GPT sessions to stop at permission-seeking endings instead of auto-resuming.
## MCPs ## MCPs
### Built-in MCPs ### Built-in MCPs
@@ -844,7 +848,7 @@ When a skill MCP has `oauth` configured:
Pre-authenticate via CLI: Pre-authenticate via CLI:
```bash ```bash
bunx oh-my-openagent mcp oauth login <server-name> --server-url https://api.example.com bunx oh-my-opencode mcp oauth login <server-name> --server-url https://api.example.com
``` ```
## Context Injection ## Context Injection

View File

@@ -4,7 +4,7 @@
### Problem ### Problem
When using Ollama as a provider with oh-my-openagent agents, you may encounter: When using Ollama as a provider with oh-my-opencode agents, you may encounter:
``` ```
JSON Parse error: Unexpected EOF JSON Parse error: Unexpected EOF
@@ -26,7 +26,7 @@ Claude Code SDK expects a single JSON object, not multiple NDJSON lines, causing
**Why this happens:** **Why this happens:**
- **Ollama API**: Returns streaming responses as NDJSON by design - **Ollama API**: Returns streaming responses as NDJSON by design
- **Claude Code SDK**: Doesn't properly handle NDJSON responses for tool calls - **Claude Code SDK**: Doesn't properly handle NDJSON responses for tool calls
- **oh-my-openagent**: Passes through the SDK's behavior (can't fix at this layer) - **oh-my-opencode**: Passes through the SDK's behavior (can't fix at this layer)
## Solutions ## Solutions
@@ -114,7 +114,7 @@ curl -s http://localhost:11434/api/chat \
## Related Issues ## Related Issues
- **oh-my-openagent**: https://github.com/code-yeongyu/oh-my-openagent/issues/1124 - **oh-my-opencode**: https://github.com/code-yeongyu/oh-my-openagent/issues/1124
- **Ollama API Docs**: https://github.com/ollama/ollama/blob/main/docs/api.md - **Ollama API Docs**: https://github.com/ollama/ollama/blob/main/docs/api.md
## Getting Help ## Getting Help

View File

@@ -101,9 +101,7 @@ async function main() {
console.log("\n✅ All platform binaries built successfully!\n"); console.log("\n✅ All platform binaries built successfully!\n");
} }
if (import.meta.main) { main().catch((error) => {
main().catch((error) => { console.error("Fatal error:", error);
console.error("Fatal error:", error); process.exit(1);
process.exit(1); });
});
}

View File

@@ -2239,70 +2239,6 @@
"created_at": "2026-03-17T20:42:42Z", "created_at": "2026-03-17T20:42:42Z",
"repoId": 1108837393, "repoId": 1108837393,
"pullRequestNo": 2656 "pullRequestNo": 2656
},
{
"name": "walioo",
"id": 25835823,
"comment_id": 4087098221,
"created_at": "2026-03-19T02:13:02Z",
"repoId": 1108837393,
"pullRequestNo": 2688
},
{
"name": "trafgals",
"id": 6454757,
"comment_id": 4087725932,
"created_at": "2026-03-19T04:22:32Z",
"repoId": 1108837393,
"pullRequestNo": 2690
},
{
"name": "tonymfer",
"id": 66512584,
"comment_id": 4091847232,
"created_at": "2026-03-19T17:13:49Z",
"repoId": 1108837393,
"pullRequestNo": 2701
},
{
"name": "nguyentamdat",
"id": 16253213,
"comment_id": 4096267323,
"created_at": "2026-03-20T07:34:22Z",
"repoId": 1108837393,
"pullRequestNo": 2718
},
{
"name": "whackur",
"id": 26926041,
"comment_id": 4102330445,
"created_at": "2026-03-21T05:27:17Z",
"repoId": 1108837393,
"pullRequestNo": 2733
},
{
"name": "ndaemy",
"id": 18691542,
"comment_id": 4103008804,
"created_at": "2026-03-21T10:18:22Z",
"repoId": 1108837393,
"pullRequestNo": 2734
},
{
"name": "0xYiliu",
"id": 3838688,
"comment_id": 4104738337,
"created_at": "2026-03-21T22:59:33Z",
"repoId": 1108837393,
"pullRequestNo": 2738
},
{
"name": "hunghoang3011",
"id": 65234777,
"comment_id": 4107900881,
"created_at": "2026-03-23T04:28:20Z",
"repoId": 1108837393,
"pullRequestNo": 2758
} }
] ]
} }

View File

@@ -14,7 +14,7 @@ Entry point `index.ts` orchestrates 5-step initialization: loadConfig → create
| `plugin-config.ts` | JSONC parse, multi-level merge, Zod v4 validation | | `plugin-config.ts` | JSONC parse, multi-level merge, Zod v4 validation |
| `create-managers.ts` | TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler | | `create-managers.ts` | TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler |
| `create-tools.ts` | SkillContext + AvailableCategories + ToolRegistry (26 tools) | | `create-tools.ts` | SkillContext + AvailableCategories + ToolRegistry (26 tools) |
| `create-hooks.ts` | 3-tier: Core(39) + Continuation(7) + Skill(2) = 48 hooks | | `create-hooks.ts` | 3-tier: Core(37) + Continuation(7) + Skill(2) = 46 hooks |
| `plugin-interface.ts` | 8 OpenCode hook handlers: config, tool, chat.message, chat.params, chat.headers, event, tool.execute.before, tool.execute.after | | `plugin-interface.ts` | 8 OpenCode hook handlers: config, tool, chat.message, chat.params, chat.headers, event, tool.execute.before, tool.execute.after |
## CONFIG LOADING ## CONFIG LOADING
@@ -32,10 +32,10 @@ loadPluginConfig(directory, ctx)
``` ```
createHooks() createHooks()
├─→ createCoreHooks() # 39 hooks ├─→ createCoreHooks() # 37 hooks
│ ├─ createSessionHooks() # 23: contextWindowMonitor, thinkMode, ralphLoop, modelFallback, runtimeFallback, noSisyphusGpt, noHephaestusNonGpt, anthropicEffort, intentGate... │ ├─ createSessionHooks() # 23: contextWindowMonitor, thinkMode, ralphLoop, modelFallback, runtimeFallback, noSisyphusGpt, noHephaestusNonGpt, anthropicEffort, intentGate...
│ ├─ createToolGuardHooks() # 12: commentChecker, rulesInjector, writeExistingFileGuard, jsonErrorRecovery, hashlineReadEnhancer... │ ├─ createToolGuardHooks() # 10: commentChecker, rulesInjector, writeExistingFileGuard, jsonErrorRecovery, hashlineReadEnhancer...
│ └─ createTransformHooks() # 4: claudeCodeHooks, keywordDetector, contextInjector, thinkingBlockValidator │ └─ createTransformHooks() # 4: claudeCodeHooks, keywordDetector, contextInjector, thinkingBlockValidator
├─→ createContinuationHooks() # 7: todoContinuationEnforcer, atlas, stopContinuationGuard, compactionContextInjector... ├─→ createContinuationHooks() # 7: todoContinuationEnforcer, atlas, stopContinuationGuard, ralphLoopActivator...
└─→ createSkillHooks() # 2: categorySkillReminder, autoSlashCommand └─→ createSkillHooks() # 2: categorySkillReminder, autoSlashCommand
``` ```

View File

@@ -39,7 +39,7 @@ export function maybeCreateAtlasConfig(input: {
const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"] const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]
const atlasResolution = applyModelResolution({ const atlasResolution = applyModelResolution({
uiSelectedModel: orchestratorOverride?.model !== undefined ? undefined : uiSelectedModel, uiSelectedModel: orchestratorOverride?.model ? undefined : uiSelectedModel,
userModel: orchestratorOverride?.model, userModel: orchestratorOverride?.model,
requirement: atlasRequirement, requirement: atlasRequirement,
availableModels, availableModels,

View File

@@ -69,7 +69,7 @@ export function collectPendingBuiltinAgents(input: {
const isPrimaryAgent = isFactory(source) && source.mode === "primary" const isPrimaryAgent = isFactory(source) && source.mode === "primary"
let resolution = applyModelResolution({ let resolution = applyModelResolution({
uiSelectedModel: (isPrimaryAgent && override?.model === undefined) ? uiSelectedModel : undefined, uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
userModel: override?.model, userModel: override?.model,
requirement, requirement,
availableModels, availableModels,

View File

@@ -1,32 +1,20 @@
import { afterAll, beforeAll, describe, expect, mock, test } from "bun:test" import { afterAll, beforeAll, describe, expect, test } from "bun:test"
import { mkdirSync, rmSync, writeFileSync } from "node:fs" import { mkdirSync, rmSync, writeFileSync } from "node:fs"
import * as os from "node:os" import { homedir, tmpdir } from "node:os"
import { tmpdir } from "node:os"
import { join } from "node:path" import { join } from "node:path"
import { resolvePromptAppend } from "./resolve-file-uri"
const originalHomedir = os.homedir.bind(os)
let mockedHomeDir = ""
let moduleImportCounter = 0
let resolvePromptAppend: typeof import("./resolve-file-uri").resolvePromptAppend
mock.module("node:os", () => ({
...os,
homedir: () => mockedHomeDir || originalHomedir(),
}))
describe("resolvePromptAppend", () => { describe("resolvePromptAppend", () => {
const fixtureRoot = join(tmpdir(), `resolve-file-uri-${Date.now()}`) const fixtureRoot = join(tmpdir(), `resolve-file-uri-${Date.now()}`)
const configDir = join(fixtureRoot, "config") const configDir = join(fixtureRoot, "config")
const homeFixtureRoot = join(fixtureRoot, "home") const homeFixtureDir = join(homedir(), `.resolve-file-uri-home-${Date.now()}`)
const homeFixtureDir = join(homeFixtureRoot, "fixture-home")
const absoluteFilePath = join(fixtureRoot, "absolute.txt") const absoluteFilePath = join(fixtureRoot, "absolute.txt")
const relativeFilePath = join(configDir, "relative.txt") const relativeFilePath = join(configDir, "relative.txt")
const spacedFilePath = join(fixtureRoot, "with space.txt") const spacedFilePath = join(fixtureRoot, "with space.txt")
const homeFilePath = join(homeFixtureDir, "home.txt") const homeFilePath = join(homeFixtureDir, "home.txt")
beforeAll(async () => { beforeAll(() => {
mockedHomeDir = homeFixtureRoot
mkdirSync(fixtureRoot, { recursive: true }) mkdirSync(fixtureRoot, { recursive: true })
mkdirSync(configDir, { recursive: true }) mkdirSync(configDir, { recursive: true })
mkdirSync(homeFixtureDir, { recursive: true }) mkdirSync(homeFixtureDir, { recursive: true })
@@ -35,14 +23,11 @@ describe("resolvePromptAppend", () => {
writeFileSync(relativeFilePath, "relative-content", "utf8") writeFileSync(relativeFilePath, "relative-content", "utf8")
writeFileSync(spacedFilePath, "encoded-content", "utf8") writeFileSync(spacedFilePath, "encoded-content", "utf8")
writeFileSync(homeFilePath, "home-content", "utf8") writeFileSync(homeFilePath, "home-content", "utf8")
moduleImportCounter += 1
;({ resolvePromptAppend } = await import(`./resolve-file-uri?test=${moduleImportCounter}`))
}) })
afterAll(() => { afterAll(() => {
rmSync(fixtureRoot, { recursive: true, force: true }) rmSync(fixtureRoot, { recursive: true, force: true })
mock.restore() rmSync(homeFixtureDir, { recursive: true, force: true })
}) })
test("returns non-file URI strings unchanged", () => { test("returns non-file URI strings unchanged", () => {
@@ -80,7 +65,7 @@ describe("resolvePromptAppend", () => {
test("resolves home directory URI path", () => { test("resolves home directory URI path", () => {
//#given //#given
const input = "file://~/fixture-home/home.txt" const input = `file://~/${homeFixtureDir.split("/").pop()}/home.txt`
//#when //#when
const resolved = resolvePromptAppend(input) const resolved = resolvePromptAppend(input)

View File

@@ -52,7 +52,7 @@ export function maybeCreateSisyphusConfig(input: {
if (disabledAgents.includes("sisyphus") || !meetsSisyphusAnyModelRequirement) return undefined if (disabledAgents.includes("sisyphus") || !meetsSisyphusAnyModelRequirement) return undefined
let sisyphusResolution = applyModelResolution({ let sisyphusResolution = applyModelResolution({
uiSelectedModel: sisyphusOverride?.model !== undefined ? undefined : uiSelectedModel, uiSelectedModel: sisyphusOverride?.model ? undefined : uiSelectedModel,
userModel: sisyphusOverride?.model, userModel: sisyphusOverride?.model,
requirement: sisyphusRequirement, requirement: sisyphusRequirement,
availableModels, availableModels,

View File

@@ -181,7 +181,7 @@ describe("buildParallelDelegationSection", () => {
it("#given non-Claude model with deep category #when building #then returns aggressive delegation section", () => { it("#given non-Claude model with deep category #when building #then returns aggressive delegation section", () => {
//#given //#given
const model = "google/gemini-3.1-pro" const model = "google/gemini-3-pro"
const categories = [deepCategory, otherCategory] const categories = [deepCategory, otherCategory]
//#when //#when
@@ -237,7 +237,7 @@ describe("buildParallelDelegationSection", () => {
describe("buildNonClaudePlannerSection", () => { describe("buildNonClaudePlannerSection", () => {
it("#given non-Claude model #when building #then returns plan agent section", () => { it("#given non-Claude model #when building #then returns plan agent section", () => {
//#given //#given
const model = "google/gemini-3.1-pro" const model = "google/gemini-3-pro"
//#when //#when
const result = buildNonClaudePlannerSection(model) const result = buildNonClaudePlannerSection(model)
@@ -272,3 +272,4 @@ describe("buildNonClaudePlannerSection", () => {
}) })
}) })

View File

@@ -162,10 +162,6 @@ Asking the user is the LAST resort after exhausting creative alternatives.
- User asks a question implying work → Answer briefly, DO the implied work in the same turn - User asks a question implying work → Answer briefly, DO the implied work in the same turn
- You wrote a plan in your response → EXECUTE the plan before ending turn — plans are starting lines, not finish lines - You wrote a plan in your response → EXECUTE the plan before ending turn — plans are starting lines, not finish lines
### Task Scope Clarification
You handle multi-step sub-tasks of a SINGLE GOAL. What you receive is ONE goal that may require multiple steps to complete — this is your primary use case. Only reject when given MULTIPLE INDEPENDENT goals in one request.
## Hard Constraints ## Hard Constraints
${hardBlocks} ${hardBlocks}

View File

@@ -121,10 +121,6 @@ When blocked: try a different approach → decompose the problem → challenge a
- User asks a question implying work → Answer briefly, DO the implied work in the same turn - User asks a question implying work → Answer briefly, DO the implied work in the same turn
- You wrote a plan in your response → EXECUTE the plan before ending turn — plans are starting lines, not finish lines - You wrote a plan in your response → EXECUTE the plan before ending turn — plans are starting lines, not finish lines
### Task Scope Clarification
You handle multi-step sub-tasks of a SINGLE GOAL. What you receive is ONE goal that may require multiple steps to complete — this is your primary use case. Only reject when given MULTIPLE INDEPENDENT goals in one request.
## Hard Constraints ## Hard Constraints
${hardBlocks} ${hardBlocks}

View File

@@ -112,10 +112,6 @@ Asking the user is the LAST resort after exhausting creative alternatives.
- Note assumptions in final message, not as questions mid-work - Note assumptions in final message, not as questions mid-work
- Need context? Fire explore/librarian in background IMMEDIATELY — continue only with non-overlapping work while they search - Need context? Fire explore/librarian in background IMMEDIATELY — continue only with non-overlapping work while they search
### Task Scope Clarification
You handle multi-step sub-tasks of a SINGLE GOAL. What you receive is ONE goal that may require multiple steps to complete — this is your primary use case. Only reject when given MULTIPLE INDEPENDENT goals in one request.
## Hard Constraints ## Hard Constraints
${hardBlocks} ${hardBlocks}

View File

@@ -1,42 +0,0 @@
import { describe, it, expect } from "bun:test"
import { getPrometheusPrompt } from "./system-prompt"
describe("getPrometheusPrompt", () => {
describe("#given question tool is not disabled", () => {
describe("#when generating prompt", () => {
it("#then should include Question tool references", () => {
const prompt = getPrometheusPrompt(undefined, [])
expect(prompt).toContain("Question({")
})
})
})
describe("#given question tool is disabled via disabled_tools", () => {
describe("#when generating prompt", () => {
it("#then should strip Question tool code examples", () => {
const prompt = getPrometheusPrompt(undefined, ["question"])
expect(prompt).not.toContain("Question({")
})
})
describe("#when disabled_tools includes question among other tools", () => {
it("#then should strip Question tool code examples", () => {
const prompt = getPrometheusPrompt(undefined, ["todowrite", "question", "interactive_bash"])
expect(prompt).not.toContain("Question({")
})
})
})
describe("#given no disabled_tools provided", () => {
describe("#when generating prompt with undefined", () => {
it("#then should include Question tool references", () => {
const prompt = getPrometheusPrompt(undefined, undefined)
expect(prompt).toContain("Question({")
})
})
})
})

View File

@@ -52,34 +52,16 @@ export function getPrometheusPromptSource(model?: string): PrometheusPromptSourc
* Gemini models → Gemini-optimized prompt (aggressive tool-call enforcement, thinking checkpoints) * Gemini models → Gemini-optimized prompt (aggressive tool-call enforcement, thinking checkpoints)
* Default (Claude, etc.) → Claude-optimized prompt (modular sections) * Default (Claude, etc.) → Claude-optimized prompt (modular sections)
*/ */
export function getPrometheusPrompt(model?: string, disabledTools?: readonly string[]): string { export function getPrometheusPrompt(model?: string): string {
const source = getPrometheusPromptSource(model) const source = getPrometheusPromptSource(model)
const isQuestionDisabled = disabledTools?.includes("question") ?? false
let prompt: string
switch (source) { switch (source) {
case "gpt": case "gpt":
prompt = getGptPrometheusPrompt() return getGptPrometheusPrompt()
break
case "gemini": case "gemini":
prompt = getGeminiPrometheusPrompt() return getGeminiPrometheusPrompt()
break
case "default": case "default":
default: default:
prompt = PROMETHEUS_SYSTEM_PROMPT return PROMETHEUS_SYSTEM_PROMPT
} }
if (isQuestionDisabled) {
prompt = stripQuestionToolReferences(prompt)
}
return prompt
}
/**
* Removes Question tool usage examples from prompt text when question tool is disabled.
*/
function stripQuestionToolReferences(prompt: string): string {
// Remove Question({...}) code blocks (multi-line)
return prompt.replace(/```typescript\n\s*Question\(\{[\s\S]*?\}\)\s*\n```/g, "")
} }

View File

@@ -248,7 +248,8 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "openai/gpt-5.4-mini", "model": "openai/gpt-5.3-codex",
"variant": "low",
}, },
"ultrabrain": { "ultrabrain": {
"model": "openai/gpt-5.4", "model": "openai/gpt-5.4",
@@ -333,7 +334,8 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "openai/gpt-5.4-mini", "model": "openai/gpt-5.3-codex",
"variant": "low",
}, },
"ultrabrain": { "ultrabrain": {
"model": "openai/gpt-5.4", "model": "openai/gpt-5.4",
@@ -531,7 +533,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "openai/gpt-5.4-mini", "model": "anthropic/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "openai/gpt-5.4", "model": "openai/gpt-5.4",
@@ -606,7 +608,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "openai/gpt-5.4-mini", "model": "anthropic/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "openai/gpt-5.4", "model": "openai/gpt-5.4",
@@ -682,7 +684,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "opencode/gpt-5.4-mini", "model": "opencode/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "opencode/gpt-5.4", "model": "opencode/gpt-5.4",
@@ -757,7 +759,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "opencode/gpt-5.4-mini", "model": "opencode/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "opencode/gpt-5.4", "model": "opencode/gpt-5.4",
@@ -828,7 +830,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"variant": "high", "variant": "high",
}, },
"quick": { "quick": {
"model": "github-copilot/gpt-5.4-mini", "model": "github-copilot/claude-haiku-4.5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "github-copilot/gemini-3.1-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
@@ -898,7 +900,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"variant": "high", "variant": "high",
}, },
"quick": { "quick": {
"model": "github-copilot/gpt-5.4-mini", "model": "github-copilot/claude-haiku-4.5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "github-copilot/gemini-3.1-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
@@ -1090,7 +1092,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "opencode/gpt-5.4-mini", "model": "anthropic/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "opencode/gpt-5.4", "model": "opencode/gpt-5.4",
@@ -1165,7 +1167,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "openai/gpt-5.4-mini", "model": "github-copilot/claude-haiku-4.5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "openai/gpt-5.4", "model": "openai/gpt-5.4",
@@ -1373,7 +1375,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "github-copilot/gpt-5.4-mini", "model": "github-copilot/claude-haiku-4.5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "opencode/gpt-5.4", "model": "opencode/gpt-5.4",
@@ -1451,7 +1453,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "openai/gpt-5.4-mini", "model": "anthropic/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "openai/gpt-5.4", "model": "openai/gpt-5.4",
@@ -1529,7 +1531,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
"variant": "medium", "variant": "medium",
}, },
"quick": { "quick": {
"model": "openai/gpt-5.4-mini", "model": "anthropic/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "openai/gpt-5.4", "model": "openai/gpt-5.4",

View File

@@ -42,7 +42,7 @@ Examples:
Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi): Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
Claude Native anthropic/ models (Opus, Sonnet, Haiku) Claude Native anthropic/ models (Opus, Sonnet, Haiku)
OpenAI Native openai/ models (GPT-5.4 for Oracle) OpenAI Native openai/ models (GPT-5.4 for Oracle)
Gemini Native google/ models (Gemini 3.1 Pro, Flash) Gemini Native google/ models (Gemini 3 Pro, Flash)
Copilot github-copilot/ models (fallback) Copilot github-copilot/ models (fallback)
OpenCode Zen opencode/ models (opencode/claude-opus-4-6, etc.) OpenCode Zen opencode/ models (opencode/claude-opus-4-6, etc.)
Z.ai zai-coding-plan/glm-5 (visual-engineering fallback) Z.ai zai-coding-plan/glm-5 (visual-engineering fallback)

View File

@@ -2,15 +2,15 @@ import { readFileSync } from "node:fs"
import { join } from "node:path" import { join } from "node:path"
import { OhMyOpenCodeConfigSchema } from "../../../config" import { OhMyOpenCodeConfigSchema } from "../../../config"
import { detectPluginConfigFile, getOpenCodeConfigDir, parseJsonc } from "../../../shared" import { detectConfigFile, getOpenCodeConfigDir, parseJsonc } from "../../../shared"
import { CHECK_IDS, CHECK_NAMES, PACKAGE_NAME } from "../constants" import { CHECK_IDS, CHECK_NAMES, PACKAGE_NAME } from "../constants"
import type { CheckResult, DoctorIssue } from "../types" import type { CheckResult, DoctorIssue } from "../types"
import { loadAvailableModelsFromCache } from "./model-resolution-cache" import { loadAvailableModelsFromCache } from "./model-resolution-cache"
import { getModelResolutionInfoWithOverrides } from "./model-resolution" import { getModelResolutionInfoWithOverrides } from "./model-resolution"
import type { OmoConfig } from "./model-resolution-types" import type { OmoConfig } from "./model-resolution-types"
const USER_CONFIG_DIR = getOpenCodeConfigDir({ binary: "opencode" }) const USER_CONFIG_BASE = join(getOpenCodeConfigDir({ binary: "opencode" }), PACKAGE_NAME)
const PROJECT_CONFIG_DIR = join(process.cwd(), ".opencode") const PROJECT_CONFIG_BASE = join(process.cwd(), ".opencode", PACKAGE_NAME)
interface ConfigValidationResult { interface ConfigValidationResult {
exists: boolean exists: boolean
@@ -21,10 +21,10 @@ interface ConfigValidationResult {
} }
function findConfigPath(): string | null { function findConfigPath(): string | null {
const projectConfig = detectPluginConfigFile(PROJECT_CONFIG_DIR) const projectConfig = detectConfigFile(PROJECT_CONFIG_BASE)
if (projectConfig.format !== "none") return projectConfig.path if (projectConfig.format !== "none") return projectConfig.path
const userConfig = detectPluginConfigFile(USER_CONFIG_DIR) const userConfig = detectConfigFile(USER_CONFIG_BASE)
if (userConfig.format !== "none") return userConfig.path if (userConfig.format !== "none") return userConfig.path
return null return null

View File

@@ -1,13 +1,17 @@
import { readFileSync } from "node:fs" import { readFileSync } from "node:fs"
import { join } from "node:path" import { join } from "node:path"
import { detectPluginConfigFile, getOpenCodeConfigPaths, parseJsonc } from "../../../shared" import { detectConfigFile, getOpenCodeConfigPaths, parseJsonc } from "../../../shared"
import type { OmoConfig } from "./model-resolution-types" import type { OmoConfig } from "./model-resolution-types"
const USER_CONFIG_DIR = getOpenCodeConfigPaths({ binary: "opencode", version: null }).configDir const PACKAGE_NAME = "oh-my-opencode"
const PROJECT_CONFIG_DIR = join(process.cwd(), ".opencode") const USER_CONFIG_BASE = join(
getOpenCodeConfigPaths({ binary: "opencode", version: null }).configDir,
PACKAGE_NAME
)
const PROJECT_CONFIG_BASE = join(process.cwd(), ".opencode", PACKAGE_NAME)
export function loadOmoConfig(): OmoConfig | null { export function loadOmoConfig(): OmoConfig | null {
const projectDetected = detectPluginConfigFile(PROJECT_CONFIG_DIR) const projectDetected = detectConfigFile(PROJECT_CONFIG_BASE)
if (projectDetected.format !== "none") { if (projectDetected.format !== "none") {
try { try {
const content = readFileSync(projectDetected.path, "utf-8") const content = readFileSync(projectDetected.path, "utf-8")
@@ -17,7 +21,7 @@ export function loadOmoConfig(): OmoConfig | null {
} }
} }
const userDetected = detectPluginConfigFile(USER_CONFIG_DIR) const userDetected = detectConfigFile(USER_CONFIG_BASE)
if (userDetected.format !== "none") { if (userDetected.format !== "none") {
try { try {
const content = readFileSync(userDetected.path, "utf-8") const content = readFileSync(userDetected.path, "utf-8")

View File

@@ -53,14 +53,6 @@ describe("install CLI - binary check behavior", () => {
isOpenCodeInstalledSpy = spyOn(configManager, "isOpenCodeInstalled").mockResolvedValue(false) isOpenCodeInstalledSpy = spyOn(configManager, "isOpenCodeInstalled").mockResolvedValue(false)
getOpenCodeVersionSpy = spyOn(configManager, "getOpenCodeVersion").mockResolvedValue(null) getOpenCodeVersionSpy = spyOn(configManager, "getOpenCodeVersion").mockResolvedValue(null)
// given mock npm fetch
globalThis.fetch = mock(() =>
Promise.resolve({
ok: true,
json: () => Promise.resolve({ latest: "3.0.0" }),
} as Response)
) as unknown as typeof fetch
const args: InstallArgs = { const args: InstallArgs = {
tui: false, tui: false,
claude: "yes", claude: "yes",

View File

@@ -40,7 +40,7 @@ describe("generateModelConfig OpenAI-only model catalog", () => {
// #then // #then
expect(result.categories?.artistry).toEqual({ model: "openai/gpt-5.4", variant: "xhigh" }) expect(result.categories?.artistry).toEqual({ model: "openai/gpt-5.4", variant: "xhigh" })
expect(result.categories?.quick).toEqual({ model: "openai/gpt-5.4-mini" }) expect(result.categories?.quick).toEqual({ model: "openai/gpt-5.3-codex", variant: "low" })
expect(result.categories?.["visual-engineering"]).toEqual({ model: "openai/gpt-5.4", variant: "high" }) expect(result.categories?.["visual-engineering"]).toEqual({ model: "openai/gpt-5.4", variant: "high" })
expect(result.categories?.writing).toEqual({ model: "openai/gpt-5.4", variant: "medium" }) expect(result.categories?.writing).toEqual({ model: "openai/gpt-5.4", variant: "medium" })
}) })
@@ -55,6 +55,6 @@ describe("generateModelConfig OpenAI-only model catalog", () => {
// #then // #then
expect(result.agents?.explore).toEqual({ model: "opencode-go/minimax-m2.5" }) expect(result.agents?.explore).toEqual({ model: "opencode-go/minimax-m2.5" })
expect(result.agents?.librarian).toEqual({ model: "opencode-go/minimax-m2.5" }) expect(result.agents?.librarian).toEqual({ model: "opencode-go/minimax-m2.5" })
expect(result.categories?.quick).toEqual({ model: "openai/gpt-5.4-mini" }) expect(result.categories?.quick).toEqual({ model: "opencode-go/minimax-m2.5" })
}) })
}) })

View File

@@ -7,7 +7,7 @@ const OPENAI_ONLY_AGENT_OVERRIDES: Record<string, AgentConfig> = {
const OPENAI_ONLY_CATEGORY_OVERRIDES: Record<string, CategoryConfig> = { const OPENAI_ONLY_CATEGORY_OVERRIDES: Record<string, CategoryConfig> = {
artistry: { model: "openai/gpt-5.4", variant: "xhigh" }, artistry: { model: "openai/gpt-5.4", variant: "xhigh" },
quick: { model: "openai/gpt-5.4-mini" }, quick: { model: "openai/gpt-5.3-codex", variant: "low" },
"visual-engineering": { model: "openai/gpt-5.4", variant: "high" }, "visual-engineering": { model: "openai/gpt-5.4", variant: "high" },
writing: { model: "openai/gpt-5.4", variant: "medium" }, writing: { model: "openai/gpt-5.4", variant: "medium" },
} }

View File

@@ -45,26 +45,26 @@ export function writePaddedText(
return { output: text, atLineStart: text.endsWith("\n") } return { output: text, atLineStart: text.endsWith("\n") }
} }
const parts: string[] = [] let output = ""
let lineStart = atLineStart let lineStart = atLineStart
for (let i = 0; i < text.length; i++) { for (let i = 0; i < text.length; i++) {
const ch = text[i] const ch = text[i]
if (lineStart) { if (lineStart) {
parts.push(" ") output += " "
lineStart = false lineStart = false
} }
if (ch === "\n") { if (ch === "\n") {
parts.push(" \n") output += " \n"
lineStart = true lineStart = true
continue continue
} }
parts.push(ch) output += ch
} }
return { output: parts.join(""), atLineStart: lineStart } return { output, atLineStart: lineStart }
} }
function colorizeWithProfileColor(text: string, hexColor?: string): string { function colorizeWithProfileColor(text: string, hexColor?: string): string {

View File

@@ -115,42 +115,6 @@ describe("waitForEventProcessorShutdown", () => {
}) })
}) })
describe("run environment setup", () => {
let originalClient: string | undefined
let originalRunMode: string | undefined
beforeEach(() => {
originalClient = process.env.OPENCODE_CLIENT
originalRunMode = process.env.OPENCODE_CLI_RUN_MODE
})
afterEach(() => {
if (originalClient === undefined) {
delete process.env.OPENCODE_CLIENT
} else {
process.env.OPENCODE_CLIENT = originalClient
}
if (originalRunMode === undefined) {
delete process.env.OPENCODE_CLI_RUN_MODE
} else {
process.env.OPENCODE_CLI_RUN_MODE = originalRunMode
}
})
it("sets OPENCODE_CLIENT to 'run' to exclude question tool from registry", async () => {
//#given
delete process.env.OPENCODE_CLIENT
//#when - run() sets env vars synchronously before any async work
const { run } = await import(`./runner?env-setup-${Date.now()}`)
run({ message: "test" }).catch(() => {})
//#then
expect(String(process.env.OPENCODE_CLIENT)).toBe("run")
expect(String(process.env.OPENCODE_CLI_RUN_MODE)).toBe("true")
})
})
describe("run with invalid model", () => { describe("run with invalid model", () => {
it("given invalid --model value, when run, then returns exit code 1 with error message", async () => { it("given invalid --model value, when run, then returns exit code 1 with error message", async () => {
// given // given

View File

@@ -31,7 +31,6 @@ export async function waitForEventProcessorShutdown(
export async function run(options: RunOptions): Promise<number> { export async function run(options: RunOptions): Promise<number> {
process.env.OPENCODE_CLI_RUN_MODE = "true" process.env.OPENCODE_CLI_RUN_MODE = "true"
process.env.OPENCODE_CLIENT = "run"
const startTime = Date.now() const startTime = Date.now()
const { const {

View File

@@ -54,7 +54,7 @@ export async function promptInstallConfig(detected: DetectedConfig): Promise<Ins
message: "Will you integrate Google Gemini?", message: "Will you integrate Google Gemini?",
options: [ options: [
{ value: "no", label: "No", hint: "Frontend/docs agents will use fallback" }, { value: "no", label: "No", hint: "Frontend/docs agents will use fallback" },
{ value: "yes", label: "Yes", hint: "Beautiful UI generation with Gemini 3.1 Pro" }, { value: "yes", label: "Yes", hint: "Beautiful UI generation with Gemini 3 Pro" },
], ],
initialValue: initial.gemini, initialValue: initial.gemini,
}) })

View File

@@ -14,7 +14,7 @@ config/schema/
├── agent-names.ts # BuiltinAgentNameSchema (11), OverridableAgentNameSchema (14) ├── agent-names.ts # BuiltinAgentNameSchema (11), OverridableAgentNameSchema (14)
├── agent-overrides.ts # AgentOverrideConfigSchema (21 fields per agent) ├── agent-overrides.ts # AgentOverrideConfigSchema (21 fields per agent)
├── categories.ts # 8 built-in + custom categories ├── categories.ts # 8 built-in + custom categories
├── hooks.ts # HookNameSchema (48 hooks) ├── hooks.ts # HookNameSchema (46 hooks)
├── skills.ts # SkillsConfigSchema (sources, paths, recursive) ├── skills.ts # SkillsConfigSchema (sources, paths, recursive)
├── commands.ts # BuiltinCommandNameSchema ├── commands.ts # BuiltinCommandNameSchema
├── experimental.ts # Feature flags (plugin_load_timeout_ms min 1000) ├── experimental.ts # Feature flags (plugin_load_timeout_ms min 1000)

View File

@@ -8,24 +8,27 @@ describe("BackgroundTaskConfigSchema.circuitBreaker", () => {
const result = BackgroundTaskConfigSchema.parse({ const result = BackgroundTaskConfigSchema.parse({
circuitBreaker: { circuitBreaker: {
maxToolCalls: 150, maxToolCalls: 150,
consecutiveThreshold: 10, windowSize: 10,
repetitionThresholdPercent: 70,
}, },
}) })
expect(result.circuitBreaker).toEqual({ expect(result.circuitBreaker).toEqual({
maxToolCalls: 150, maxToolCalls: 150,
consecutiveThreshold: 10, windowSize: 10,
repetitionThresholdPercent: 70,
}) })
}) })
}) })
describe("#given consecutiveThreshold below minimum", () => { describe("#given windowSize below minimum", () => {
test("#when parsed #then throws ZodError", () => { test("#when parsed #then throws ZodError", () => {
let thrownError: unknown let thrownError: unknown
try { try {
BackgroundTaskConfigSchema.parse({ BackgroundTaskConfigSchema.parse({
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 4, windowSize: 4,
}, },
}) })
} catch (error) { } catch (error) {
@@ -36,14 +39,14 @@ describe("BackgroundTaskConfigSchema.circuitBreaker", () => {
}) })
}) })
describe("#given consecutiveThreshold is zero", () => { describe("#given repetitionThresholdPercent is zero", () => {
test("#when parsed #then throws ZodError", () => { test("#when parsed #then throws ZodError", () => {
let thrownError: unknown let thrownError: unknown
try { try {
BackgroundTaskConfigSchema.parse({ BackgroundTaskConfigSchema.parse({
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 0, repetitionThresholdPercent: 0,
}, },
}) })
} catch (error) { } catch (error) {

View File

@@ -3,7 +3,8 @@ import { z } from "zod"
const CircuitBreakerConfigSchema = z.object({ const CircuitBreakerConfigSchema = z.object({
enabled: z.boolean().optional(), enabled: z.boolean().optional(),
maxToolCalls: z.number().int().min(10).optional(), maxToolCalls: z.number().int().min(10).optional(),
consecutiveThreshold: z.number().int().min(5).optional(), windowSize: z.number().int().min(5).optional(),
repetitionThresholdPercent: z.number().gt(0).max(100).optional(),
}) })
export const BackgroundTaskConfigSchema = z.object({ export const BackgroundTaskConfigSchema = z.object({

View File

@@ -1,6 +1,7 @@
import { z } from "zod" import { z } from "zod"
export const HookNameSchema = z.enum([ export const HookNameSchema = z.enum([
"gpt-permission-continuation",
"todo-continuation-enforcer", "todo-continuation-enforcer",
"context-window-monitor", "context-window-monitor",
"session-recovery", "session-recovery",
@@ -51,7 +52,6 @@ export const HookNameSchema = z.enum([
"hashline-read-enhancer", "hashline-read-enhancer",
"read-image-resizer", "read-image-resizer",
"todo-description-override", "todo-description-override",
"webfetch-redirect-guard",
]) ])
export type HookName = z.infer<typeof HookNameSchema> export type HookName = z.infer<typeof HookNameSchema>

View File

@@ -12,7 +12,6 @@ import { BuiltinCommandNameSchema } from "./commands"
import { ExperimentalConfigSchema } from "./experimental" import { ExperimentalConfigSchema } from "./experimental"
import { GitMasterConfigSchema } from "./git-master" import { GitMasterConfigSchema } from "./git-master"
import { NotificationConfigSchema } from "./notification" import { NotificationConfigSchema } from "./notification"
import { OpenClawConfigSchema } from "./openclaw"
import { RalphLoopConfigSchema } from "./ralph-loop" import { RalphLoopConfigSchema } from "./ralph-loop"
import { RuntimeFallbackConfigSchema } from "./runtime-fallback" import { RuntimeFallbackConfigSchema } from "./runtime-fallback"
import { SkillsConfigSchema } from "./skills" import { SkillsConfigSchema } from "./skills"
@@ -56,7 +55,6 @@ export const OhMyOpenCodeConfigSchema = z.object({
runtime_fallback: z.union([z.boolean(), RuntimeFallbackConfigSchema]).optional(), runtime_fallback: z.union([z.boolean(), RuntimeFallbackConfigSchema]).optional(),
background_task: BackgroundTaskConfigSchema.optional(), background_task: BackgroundTaskConfigSchema.optional(),
notification: NotificationConfigSchema.optional(), notification: NotificationConfigSchema.optional(),
openclaw: OpenClawConfigSchema.optional(),
babysitting: BabysittingConfigSchema.optional(), babysitting: BabysittingConfigSchema.optional(),
git_master: GitMasterConfigSchema.optional(), git_master: GitMasterConfigSchema.optional(),
browser_automation_engine: BrowserAutomationConfigSchema.optional(), browser_automation_engine: BrowserAutomationConfigSchema.optional(),

View File

@@ -1,50 +0,0 @@
import { z } from "zod"
export const OpenClawGatewaySchema = z.object({
type: z.enum(["http", "command"]).default("http"),
// HTTP specific
url: z.string().optional(),
method: z.string().default("POST"),
headers: z.record(z.string(), z.string()).optional(),
// Command specific
command: z.string().optional(),
// Shared
timeout: z.number().optional(),
})
export const OpenClawHookSchema = z.object({
enabled: z.boolean().default(true),
gateway: z.string(),
instruction: z.string(),
})
export const OpenClawReplyListenerConfigSchema = z.object({
discordBotToken: z.string().optional(),
discordChannelId: z.string().optional(),
discordMention: z.string().optional(), // For allowed_mentions
authorizedDiscordUserIds: z.array(z.string()).default([]),
telegramBotToken: z.string().optional(),
telegramChatId: z.string().optional(),
pollIntervalMs: z.number().default(3000),
rateLimitPerMinute: z.number().default(10),
maxMessageLength: z.number().default(500),
includePrefix: z.boolean().default(true),
})
export const OpenClawConfigSchema = z.object({
enabled: z.boolean().default(false),
// Outbound Configuration
gateways: z.record(z.string(), OpenClawGatewaySchema).default({}),
hooks: z.record(z.string(), OpenClawHookSchema).default({}),
// Inbound Configuration (Reply Listener)
replyListener: OpenClawReplyListenerConfigSchema.optional(),
})
export type OpenClawConfig = z.infer<typeof OpenClawConfigSchema>
export type OpenClawGateway = z.infer<typeof OpenClawGatewaySchema>
export type OpenClawHook = z.infer<typeof OpenClawHookSchema>
export type OpenClawReplyListenerConfig = z.infer<typeof OpenClawReplyListenerConfigSchema>

View File

@@ -4,10 +4,11 @@ import type { BackgroundTask, LaunchInput } from "./types"
export const TASK_TTL_MS = 30 * 60 * 1000 export const TASK_TTL_MS = 30 * 60 * 1000
export const TERMINAL_TASK_TTL_MS = 30 * 60 * 1000 export const TERMINAL_TASK_TTL_MS = 30 * 60 * 1000
export const MIN_STABILITY_TIME_MS = 10 * 1000 export const MIN_STABILITY_TIME_MS = 10 * 1000
export const DEFAULT_STALE_TIMEOUT_MS = 2_700_000 export const DEFAULT_STALE_TIMEOUT_MS = 1_200_000
export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 3_600_000 export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
export const DEFAULT_MAX_TOOL_CALLS = 4000 export const DEFAULT_MAX_TOOL_CALLS = 200
export const DEFAULT_CIRCUIT_BREAKER_CONSECUTIVE_THRESHOLD = 20 export const DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE = 20
export const DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT = 80
export const DEFAULT_CIRCUIT_BREAKER_ENABLED = true export const DEFAULT_CIRCUIT_BREAKER_ENABLED = true
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000 export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
export const MIN_IDLE_TIME_MS = 5000 export const MIN_IDLE_TIME_MS = 5000

View File

@@ -21,9 +21,9 @@ function createRunningTask(startedAt: Date): BackgroundTask {
} }
describe("DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS", () => { describe("DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS", () => {
test("uses a 60 minute default", () => { test("uses a 30 minute default", () => {
// #given // #given
const expectedTimeout = 60 * 60 * 1000 const expectedTimeout = 30 * 60 * 1000
// #when // #when
const timeout = DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS const timeout = DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS

View File

@@ -4,9 +4,9 @@ const { describe, expect, test } = require("bun:test")
import { DEFAULT_STALE_TIMEOUT_MS } from "./constants" import { DEFAULT_STALE_TIMEOUT_MS } from "./constants"
describe("DEFAULT_STALE_TIMEOUT_MS", () => { describe("DEFAULT_STALE_TIMEOUT_MS", () => {
test("uses a 45 minute default", () => { test("uses a 20 minute default", () => {
// #given // #given
const expectedTimeout = 45 * 60 * 1000 const expectedTimeout = 20 * 60 * 1000
// #when // #when
const timeout = DEFAULT_STALE_TIMEOUT_MS const timeout = DEFAULT_STALE_TIMEOUT_MS

View File

@@ -19,8 +19,6 @@ mock.module("../../shared/provider-model-id-transform", () => ({
import { tryFallbackRetry } from "./fallback-retry-handler" import { tryFallbackRetry } from "./fallback-retry-handler"
import { shouldRetryError } from "../../shared/model-error-classifier" import { shouldRetryError } from "../../shared/model-error-classifier"
import { selectFallbackProvider } from "../../shared/model-error-classifier"
import { readProviderModelsCache } from "../../shared"
import type { BackgroundTask } from "./types" import type { BackgroundTask } from "./types"
import type { ConcurrencyManager } from "./concurrency" import type { ConcurrencyManager } from "./concurrency"
@@ -84,8 +82,6 @@ function createDefaultArgs(taskOverrides: Partial<BackgroundTask> = {}) {
describe("tryFallbackRetry", () => { describe("tryFallbackRetry", () => {
beforeEach(() => { beforeEach(() => {
;(shouldRetryError as any).mockImplementation(() => true) ;(shouldRetryError as any).mockImplementation(() => true)
;(selectFallbackProvider as any).mockImplementation((providers: string[]) => providers[0])
;(readProviderModelsCache as any).mockReturnValue(null)
}) })
describe("#given retryable error with fallback chain", () => { describe("#given retryable error with fallback chain", () => {
@@ -271,24 +267,4 @@ describe("tryFallbackRetry", () => {
expect(args.task.attemptCount).toBe(2) expect(args.task.attemptCount).toBe(2)
}) })
}) })
describe("#given disconnected fallback providers with connected preferred provider", () => {
test("keeps fallback entry and selects connected preferred provider", () => {
;(readProviderModelsCache as any).mockReturnValue({ connected: ["provider-a"] })
;(selectFallbackProvider as any).mockImplementation(
(_providers: string[], preferredProviderID?: string) => preferredProviderID ?? "provider-b",
)
const args = createDefaultArgs({
fallbackChain: [{ model: "fallback-model-1", providers: ["provider-b"], variant: undefined }],
model: { providerID: "provider-a", modelID: "original-model" },
})
const result = tryFallbackRetry(args)
expect(result).toBe(true)
expect(args.task.model?.providerID).toBe("provider-a")
expect(args.task.model?.modelID).toBe("fallback-model-1")
})
})
}) })

View File

@@ -35,14 +35,10 @@ export function tryFallbackRetry(args: {
const providerModelsCache = readProviderModelsCache() const providerModelsCache = readProviderModelsCache()
const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache() const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
const connectedSet = connectedProviders ? new Set(connectedProviders.map(p => p.toLowerCase())) : null const connectedSet = connectedProviders ? new Set(connectedProviders.map(p => p.toLowerCase())) : null
const preferredProvider = task.model?.providerID?.toLowerCase()
const isReachable = (entry: FallbackEntry): boolean => { const isReachable = (entry: FallbackEntry): boolean => {
if (!connectedSet) return true if (!connectedSet) return true
if (entry.providers.some((provider) => connectedSet.has(provider.toLowerCase()))) { return entry.providers.some((p) => connectedSet.has(p.toLowerCase()))
return true
}
return preferredProvider ? connectedSet.has(preferredProvider) : false
} }
let selectedAttemptCount = attemptCount let selectedAttemptCount = attemptCount

View File

@@ -1,5 +1,3 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test" import { describe, expect, test } from "bun:test"
import { import {
createToolCallSignature, createToolCallSignature,
@@ -21,7 +19,7 @@ function buildWindow(
} }
function buildWindowWithInputs( function buildWindowWithInputs(
calls: Array<{ tool: string; input?: Record<string, unknown> | null }>, calls: Array<{ tool: string; input?: Record<string, unknown> }>,
override?: Parameters<typeof resolveCircuitBreakerSettings>[0] override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
) { ) {
const settings = resolveCircuitBreakerSettings(override) const settings = resolveCircuitBreakerSettings(override)
@@ -39,14 +37,16 @@ describe("loop-detector", () => {
maxToolCalls: 200, maxToolCalls: 200,
circuitBreaker: { circuitBreaker: {
maxToolCalls: 120, maxToolCalls: 120,
consecutiveThreshold: 7, windowSize: 10,
repetitionThresholdPercent: 70,
}, },
}) })
expect(result).toEqual({ expect(result).toEqual({
enabled: true, enabled: true,
maxToolCalls: 120, maxToolCalls: 120,
consecutiveThreshold: 7, windowSize: 10,
repetitionThresholdPercent: 70,
}) })
}) })
}) })
@@ -56,7 +56,8 @@ describe("loop-detector", () => {
const result = resolveCircuitBreakerSettings({ const result = resolveCircuitBreakerSettings({
circuitBreaker: { circuitBreaker: {
maxToolCalls: 100, maxToolCalls: 100,
consecutiveThreshold: 5, windowSize: 5,
repetitionThresholdPercent: 60,
}, },
}) })
@@ -70,7 +71,8 @@ describe("loop-detector", () => {
circuitBreaker: { circuitBreaker: {
enabled: false, enabled: false,
maxToolCalls: 100, maxToolCalls: 100,
consecutiveThreshold: 5, windowSize: 5,
repetitionThresholdPercent: 60,
}, },
}) })
@@ -84,7 +86,8 @@ describe("loop-detector", () => {
circuitBreaker: { circuitBreaker: {
enabled: true, enabled: true,
maxToolCalls: 100, maxToolCalls: 100,
consecutiveThreshold: 5, windowSize: 5,
repetitionThresholdPercent: 60,
}, },
}) })
@@ -148,67 +151,55 @@ describe("loop-detector", () => {
}) })
}) })
describe("#given the same tool is called consecutively", () => { describe("#given the same tool dominates the recent window", () => {
test("#when evaluated #then it triggers", () => { test("#when evaluated #then it triggers", () => {
const window = buildWindowWithInputs( const window = buildWindow([
Array.from({ length: 20 }, () => ({ "read",
tool: "read", "read",
input: { filePath: "/src/same.ts" }, "read",
})) "edit",
) "read",
"read",
"read",
"read",
"grep",
"read",
], {
circuitBreaker: {
windowSize: 10,
repetitionThresholdPercent: 80,
},
})
const result = detectRepetitiveToolUse(window) const result = detectRepetitiveToolUse(window)
expect(result).toEqual({ expect(result).toEqual({
triggered: true, triggered: true,
toolName: "read", toolName: "read",
repeatedCount: 20, repeatedCount: 8,
sampleSize: 10,
thresholdPercent: 80,
}) })
}) })
}) })
describe("#given consecutive calls are interrupted by different tool", () => { describe("#given the window is not full yet", () => {
test("#when evaluated #then it does not trigger", () => { test("#when the current sample crosses the threshold #then it still triggers", () => {
const window = buildWindow([ const window = buildWindow(["read", "read", "edit", "read", "read", "read", "read", "read"], {
...Array.from({ length: 19 }, () => "read"), circuitBreaker: {
"edit", windowSize: 10,
"read", repetitionThresholdPercent: 80,
]) },
})
const result = detectRepetitiveToolUse(window) const result = detectRepetitiveToolUse(window)
expect(result).toEqual({ triggered: false })
})
})
describe("#given threshold boundary", () => {
test("#when below threshold #then it does not trigger", () => {
const belowThresholdWindow = buildWindowWithInputs(
Array.from({ length: 19 }, () => ({
tool: "read",
input: { filePath: "/src/same.ts" },
}))
)
const result = detectRepetitiveToolUse(belowThresholdWindow)
expect(result).toEqual({ triggered: false })
})
test("#when equal to threshold #then it triggers", () => {
const atThresholdWindow = buildWindowWithInputs(
Array.from({ length: 20 }, () => ({
tool: "read",
input: { filePath: "/src/same.ts" },
}))
)
const result = detectRepetitiveToolUse(atThresholdWindow)
expect(result).toEqual({ expect(result).toEqual({
triggered: true, triggered: true,
toolName: "read", toolName: "read",
repeatedCount: 20, repeatedCount: 7,
sampleSize: 8,
thresholdPercent: 80,
}) })
}) })
}) })
@@ -219,7 +210,9 @@ describe("loop-detector", () => {
tool: "read", tool: "read",
input: { filePath: `/src/file-${i}.ts` }, input: { filePath: `/src/file-${i}.ts` },
})) }))
const window = buildWindowWithInputs(calls) const window = buildWindowWithInputs(calls, {
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
})
const result = detectRepetitiveToolUse(window) const result = detectRepetitiveToolUse(window)
expect(result.triggered).toBe(false) expect(result.triggered).toBe(false)
}) })
@@ -227,36 +220,38 @@ describe("loop-detector", () => {
describe("#given same tool with identical file inputs", () => { describe("#given same tool with identical file inputs", () => {
test("#when evaluated #then it triggers with bare tool name", () => { test("#when evaluated #then it triggers with bare tool name", () => {
const calls = Array.from({ length: 20 }, () => ({ const calls = [
tool: "read", ...Array.from({ length: 16 }, () => ({ tool: "read", input: { filePath: "/src/same.ts" } })),
input: { filePath: "/src/same.ts" }, { tool: "grep", input: { pattern: "foo" } },
})) { tool: "edit", input: { filePath: "/src/other.ts" } },
const window = buildWindowWithInputs(calls) { tool: "bash", input: { command: "ls" } },
const result = detectRepetitiveToolUse(window) { tool: "glob", input: { pattern: "**/*.ts" } },
expect(result).toEqual({ ]
triggered: true, const window = buildWindowWithInputs(calls, {
toolName: "read", circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
repeatedCount: 20,
}) })
const result = detectRepetitiveToolUse(window)
expect(result.triggered).toBe(true)
expect(result.toolName).toBe("read")
expect(result.repeatedCount).toBe(16)
}) })
}) })
describe("#given tool calls with undefined input", () => { describe("#given tool calls with no input", () => {
test("#when evaluated #then it does not trigger", () => { test("#when the same tool dominates #then falls back to name-only detection", () => {
const calls = Array.from({ length: 20 }, () => ({ tool: "read" })) const calls = [
const window = buildWindowWithInputs(calls) ...Array.from({ length: 16 }, () => ({ tool: "read" })),
{ tool: "grep" },
{ tool: "edit" },
{ tool: "bash" },
{ tool: "glob" },
]
const window = buildWindowWithInputs(calls, {
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
})
const result = detectRepetitiveToolUse(window) const result = detectRepetitiveToolUse(window)
expect(result).toEqual({ triggered: false }) expect(result.triggered).toBe(true)
}) expect(result.toolName).toBe("read")
})
describe("#given tool calls with null input", () => {
test("#when evaluated #then it does not trigger", () => {
const calls = Array.from({ length: 20 }, () => ({ tool: "read", input: null }))
const window = buildWindowWithInputs(calls)
const result = detectRepetitiveToolUse(window)
expect(result).toEqual({ triggered: false })
}) })
}) })
}) })

View File

@@ -1,7 +1,8 @@
import type { BackgroundTaskConfig } from "../../config/schema" import type { BackgroundTaskConfig } from "../../config/schema"
import { import {
DEFAULT_CIRCUIT_BREAKER_ENABLED, DEFAULT_CIRCUIT_BREAKER_ENABLED,
DEFAULT_CIRCUIT_BREAKER_CONSECUTIVE_THRESHOLD, DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT,
DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
DEFAULT_MAX_TOOL_CALLS, DEFAULT_MAX_TOOL_CALLS,
} from "./constants" } from "./constants"
import type { ToolCallWindow } from "./types" import type { ToolCallWindow } from "./types"
@@ -9,13 +10,16 @@ import type { ToolCallWindow } from "./types"
export interface CircuitBreakerSettings { export interface CircuitBreakerSettings {
enabled: boolean enabled: boolean
maxToolCalls: number maxToolCalls: number
consecutiveThreshold: number windowSize: number
repetitionThresholdPercent: number
} }
export interface ToolLoopDetectionResult { export interface ToolLoopDetectionResult {
triggered: boolean triggered: boolean
toolName?: string toolName?: string
repeatedCount?: number repeatedCount?: number
sampleSize?: number
thresholdPercent?: number
} }
export function resolveCircuitBreakerSettings( export function resolveCircuitBreakerSettings(
@@ -25,8 +29,10 @@ export function resolveCircuitBreakerSettings(
enabled: config?.circuitBreaker?.enabled ?? DEFAULT_CIRCUIT_BREAKER_ENABLED, enabled: config?.circuitBreaker?.enabled ?? DEFAULT_CIRCUIT_BREAKER_ENABLED,
maxToolCalls: maxToolCalls:
config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS, config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS,
consecutiveThreshold: windowSize: config?.circuitBreaker?.windowSize ?? DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
config?.circuitBreaker?.consecutiveThreshold ?? DEFAULT_CIRCUIT_BREAKER_CONSECUTIVE_THRESHOLD, repetitionThresholdPercent:
config?.circuitBreaker?.repetitionThresholdPercent ??
DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT,
} }
} }
@@ -36,28 +42,14 @@ export function recordToolCall(
settings: CircuitBreakerSettings, settings: CircuitBreakerSettings,
toolInput?: Record<string, unknown> | null toolInput?: Record<string, unknown> | null
): ToolCallWindow { ): ToolCallWindow {
if (toolInput === undefined || toolInput === null) { const previous = window?.toolSignatures ?? []
return {
lastSignature: `${toolName}::__unknown-input__`,
consecutiveCount: 1,
threshold: settings.consecutiveThreshold,
}
}
const signature = createToolCallSignature(toolName, toolInput) const signature = createToolCallSignature(toolName, toolInput)
const toolSignatures = [...previous, signature].slice(-settings.windowSize)
if (window && window.lastSignature === signature) {
return {
lastSignature: signature,
consecutiveCount: window.consecutiveCount + 1,
threshold: settings.consecutiveThreshold,
}
}
return { return {
lastSignature: signature, toolSignatures,
consecutiveCount: 1, windowSize: settings.windowSize,
threshold: settings.consecutiveThreshold, thresholdPercent: settings.repetitionThresholdPercent,
} }
} }
@@ -90,13 +82,46 @@ export function createToolCallSignature(
export function detectRepetitiveToolUse( export function detectRepetitiveToolUse(
window: ToolCallWindow | undefined window: ToolCallWindow | undefined
): ToolLoopDetectionResult { ): ToolLoopDetectionResult {
if (!window || window.consecutiveCount < window.threshold) { if (!window || window.toolSignatures.length === 0) {
return { triggered: false }
}
const counts = new Map<string, number>()
for (const signature of window.toolSignatures) {
counts.set(signature, (counts.get(signature) ?? 0) + 1)
}
let repeatedTool: string | undefined
let repeatedCount = 0
for (const [toolName, count] of counts.entries()) {
if (count > repeatedCount) {
repeatedTool = toolName
repeatedCount = count
}
}
const sampleSize = window.toolSignatures.length
const minimumSampleSize = Math.min(
window.windowSize,
Math.ceil((window.windowSize * window.thresholdPercent) / 100)
)
if (sampleSize < minimumSampleSize) {
return { triggered: false }
}
const thresholdCount = Math.ceil((sampleSize * window.thresholdPercent) / 100)
if (!repeatedTool || repeatedCount < thresholdCount) {
return { triggered: false } return { triggered: false }
} }
return { return {
triggered: true, triggered: true,
toolName: window.lastSignature.split("::")[0], toolName: repeatedTool.split("::")[0],
repeatedCount: window.consecutiveCount, repeatedCount,
sampleSize,
thresholdPercent: window.thresholdPercent,
} }
} }

View File

@@ -1,5 +1,3 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test" import { describe, expect, test } from "bun:test"
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import { tmpdir } from "node:os" import { tmpdir } from "node:os"
@@ -40,11 +38,12 @@ async function flushAsyncWork() {
} }
describe("BackgroundManager circuit breaker", () => { describe("BackgroundManager circuit breaker", () => {
describe("#given flat-format tool events have no state.input", () => { describe("#given the same tool dominates the recent window", () => {
test("#when 20 consecutive read events arrive #then the task keeps running", async () => { test("#when tool events arrive #then the task is cancelled early", async () => {
const manager = createManager({ const manager = createManager({
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 20, windowSize: 20,
repetitionThresholdPercent: 80,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {
@@ -64,17 +63,38 @@ describe("BackgroundManager circuit breaker", () => {
} }
getTaskMap(manager).set(task.id, task) getTaskMap(manager).set(task.id, task)
for (let i = 0; i < 20; i++) { for (const toolName of [
"read",
"read",
"grep",
"read",
"edit",
"read",
"read",
"bash",
"read",
"read",
"read",
"glob",
"read",
"read",
"read",
"read",
"read",
"read",
"read",
"read",
]) {
manager.handleEvent({ manager.handleEvent({
type: "message.part.updated", type: "message.part.updated",
properties: { sessionID: task.sessionID, type: "tool", tool: "read" }, properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
}) })
} }
await flushAsyncWork() await flushAsyncWork()
expect(task.status).toBe("running") expect(task.status).toBe("cancelled")
expect(task.progress?.toolCalls).toBe(20) expect(task.error).toContain("repeatedly called read 16/20 times")
}) })
}) })
@@ -82,7 +102,8 @@ describe("BackgroundManager circuit breaker", () => {
test("#when the window fills #then the task keeps running", async () => { test("#when the window fills #then the task keeps running", async () => {
const manager = createManager({ const manager = createManager({
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 10, windowSize: 10,
repetitionThresholdPercent: 80,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {
@@ -128,11 +149,12 @@ describe("BackgroundManager circuit breaker", () => {
}) })
describe("#given the absolute cap is configured lower than the repetition detector needs", () => { describe("#given the absolute cap is configured lower than the repetition detector needs", () => {
test("#when repeated flat-format tool events reach maxToolCalls #then the backstop still cancels the task", async () => { test("#when the raw tool-call cap is reached #then the backstop still cancels the task", async () => {
const manager = createManager({ const manager = createManager({
maxToolCalls: 3, maxToolCalls: 3,
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 95, windowSize: 10,
repetitionThresholdPercent: 95,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {
@@ -152,10 +174,10 @@ describe("BackgroundManager circuit breaker", () => {
} }
getTaskMap(manager).set(task.id, task) getTaskMap(manager).set(task.id, task)
for (let i = 0; i < 3; i++) { for (const toolName of ["read", "grep", "edit"]) {
manager.handleEvent({ manager.handleEvent({
type: "message.part.updated", type: "message.part.updated",
properties: { sessionID: task.sessionID, type: "tool", tool: "read" }, properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
}) })
} }
@@ -171,7 +193,8 @@ describe("BackgroundManager circuit breaker", () => {
const manager = createManager({ const manager = createManager({
maxToolCalls: 2, maxToolCalls: 2,
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 5, windowSize: 5,
repetitionThresholdPercent: 80,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {
@@ -210,7 +233,7 @@ describe("BackgroundManager circuit breaker", () => {
expect(task.status).toBe("running") expect(task.status).toBe("running")
expect(task.progress?.toolCalls).toBe(1) expect(task.progress?.toolCalls).toBe(1)
expect(task.progress?.countedToolPartIDs).toEqual(new Set(["tool-1"])) expect(task.progress?.countedToolPartIDs).toEqual(["tool-1"])
}) })
}) })
@@ -218,7 +241,8 @@ describe("BackgroundManager circuit breaker", () => {
test("#when tool events arrive with state.input #then task keeps running", async () => { test("#when tool events arrive with state.input #then task keeps running", async () => {
const manager = createManager({ const manager = createManager({
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 20, windowSize: 20,
repetitionThresholdPercent: 80,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {
@@ -263,7 +287,8 @@ describe("BackgroundManager circuit breaker", () => {
test("#when tool events arrive with state.input #then task is cancelled with bare tool name in error", async () => { test("#when tool events arrive with state.input #then task is cancelled with bare tool name in error", async () => {
const manager = createManager({ const manager = createManager({
circuitBreaker: { circuitBreaker: {
consecutiveThreshold: 20, windowSize: 20,
repetitionThresholdPercent: 80,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {
@@ -300,7 +325,7 @@ describe("BackgroundManager circuit breaker", () => {
await flushAsyncWork() await flushAsyncWork()
expect(task.status).toBe("cancelled") expect(task.status).toBe("cancelled")
expect(task.error).toContain("read 20 consecutive times") expect(task.error).toContain("repeatedly called read")
expect(task.error).not.toContain("::") expect(task.error).not.toContain("::")
}) })
}) })
@@ -310,7 +335,8 @@ describe("BackgroundManager circuit breaker", () => {
const manager = createManager({ const manager = createManager({
circuitBreaker: { circuitBreaker: {
enabled: false, enabled: false,
consecutiveThreshold: 20, windowSize: 20,
repetitionThresholdPercent: 80,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {
@@ -353,7 +379,8 @@ describe("BackgroundManager circuit breaker", () => {
maxToolCalls: 3, maxToolCalls: 3,
circuitBreaker: { circuitBreaker: {
enabled: false, enabled: false,
consecutiveThreshold: 95, windowSize: 10,
repetitionThresholdPercent: 95,
}, },
}) })
const task: BackgroundTask = { const task: BackgroundTask = {

View File

@@ -153,42 +153,4 @@ describe("BackgroundManager pollRunningTasks", () => {
expect(task.status).toBe("running") expect(task.status).toBe("running")
}) })
}) })
describe("#given a running task whose session has terminal non-idle status", () => {
test('#when session status is "interrupted" #then completes the task', async () => {
//#given
const manager = createManagerWithClient({
status: async () => ({ data: { "ses-interrupted": { type: "interrupted" } } }),
})
const task = createRunningTask("ses-interrupted")
injectTask(manager, task)
//#when
const poll = (manager as unknown as { pollRunningTasks: () => Promise<void> }).pollRunningTasks
await poll.call(manager)
manager.shutdown()
//#then
expect(task.status).toBe("completed")
expect(task.completedAt).toBeDefined()
})
test('#when session status is an unknown type #then completes the task', async () => {
//#given
const manager = createManagerWithClient({
status: async () => ({ data: { "ses-unknown": { type: "some-weird-status" } } }),
})
const task = createRunningTask("ses-unknown")
injectTask(manager, task)
//#when
const poll = (manager as unknown as { pollRunningTasks: () => Promise<void> }).pollRunningTasks
await poll.call(manager)
manager.shutdown()
//#then
expect(task.status).toBe("completed")
expect(task.completedAt).toBeDefined()
})
})
}) })

View File

@@ -1,5 +1,5 @@
declare const require: (name: string) => any declare const require: (name: string) => any
const { describe, test, expect, beforeEach, afterEach, spyOn } = require("bun:test") const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
import { tmpdir } from "node:os" import { tmpdir } from "node:os"
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import type { BackgroundTask, ResumeInput } from "./types" import type { BackgroundTask, ResumeInput } from "./types"
@@ -1806,9 +1806,9 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
expect(task.sessionID).toBeUndefined() expect(task.sessionID).toBeUndefined()
}) })
test("should return immediately even with concurrency limit", async () => { test("should return immediately even with concurrency limit", async () => {
// given // given
const config = { defaultConcurrency: 1 } const config = { defaultConcurrency: 1 }
manager.shutdown() manager.shutdown()
manager = new BackgroundManager({ client: mockClient, directory: tmpdir() } as unknown as PluginInput, config) manager = new BackgroundManager({ client: mockClient, directory: tmpdir() } as unknown as PluginInput, config)
@@ -1828,76 +1828,9 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
// then // then
expect(endTime - startTime).toBeLessThan(100) // Should be instant expect(endTime - startTime).toBeLessThan(100) // Should be instant
expect(task1.status).toBe("pending") expect(task1.status).toBe("pending")
expect(task2.status).toBe("pending") expect(task2.status).toBe("pending")
})
test("should keep agent when launch has model and keep agent without model", async () => {
// given
const promptBodies: Array<Record<string, unknown>> = []
let resolveFirstPromptStarted: (() => void) | undefined
let resolveSecondPromptStarted: (() => void) | undefined
const firstPromptStarted = new Promise<void>((resolve) => {
resolveFirstPromptStarted = resolve
}) })
const secondPromptStarted = new Promise<void>((resolve) => {
resolveSecondPromptStarted = resolve
})
const customClient = {
session: {
create: async (_args?: unknown) => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
get: async () => ({ data: { directory: "/test/dir" } }),
prompt: async () => ({}),
promptAsync: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
promptBodies.push(args.body)
if (promptBodies.length === 1) {
resolveFirstPromptStarted?.()
}
if (promptBodies.length === 2) {
resolveSecondPromptStarted?.()
}
return {}
},
messages: async () => ({ data: [] }),
todo: async () => ({ data: [] }),
status: async () => ({ data: {} }),
abort: async () => ({}),
},
}
manager.shutdown()
manager = new BackgroundManager({ client: customClient, directory: tmpdir() } as unknown as PluginInput)
const launchInputWithModel = {
description: "Test task with model",
prompt: "Do something",
agent: "test-agent",
parentSessionID: "parent-session",
parentMessageID: "parent-message",
model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
}
const launchInputWithoutModel = {
description: "Test task without model",
prompt: "Do something else",
agent: "test-agent",
parentSessionID: "parent-session",
parentMessageID: "parent-message",
}
// when
const taskWithModel = await manager.launch(launchInputWithModel)
await firstPromptStarted
const taskWithoutModel = await manager.launch(launchInputWithoutModel)
await secondPromptStarted
// then
expect(taskWithModel.status).toBe("pending")
expect(taskWithoutModel.status).toBe("pending")
expect(promptBodies).toHaveLength(2)
expect(promptBodies[0].model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" })
expect(promptBodies[0].agent).toBe("test-agent")
expect(promptBodies[1].agent).toBe("test-agent")
expect("model" in promptBodies[1]).toBe(false)
})
test("should queue multiple tasks without blocking", async () => { test("should queue multiple tasks without blocking", async () => {
// given // given
@@ -2848,18 +2781,6 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
}) })
describe("BackgroundManager.checkAndInterruptStaleTasks", () => { describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
const originalDateNow = Date.now
let fixedTime: number
beforeEach(() => {
fixedTime = Date.now()
spyOn(globalThis.Date, "now").mockReturnValue(fixedTime)
})
afterEach(() => {
Date.now = originalDateNow
})
test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => { test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
const client = { const client = {
session: { session: {
@@ -3106,10 +3027,10 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
prompt: "Test", prompt: "Test",
agent: "test-agent", agent: "test-agent",
status: "running", status: "running",
startedAt: new Date(Date.now() - 50 * 60 * 1000), startedAt: new Date(Date.now() - 25 * 60 * 1000),
progress: { progress: {
toolCalls: 1, toolCalls: 1,
lastUpdate: new Date(Date.now() - 46 * 60 * 1000), lastUpdate: new Date(Date.now() - 21 * 60 * 1000),
}, },
} }
@@ -4752,53 +4673,6 @@ describe("BackgroundManager - tool permission spread order", () => {
manager.shutdown() manager.shutdown()
}) })
test("startTask keeps agent when explicit model is configured", async () => {
//#given
const promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }> = []
const client = {
session: {
get: async () => ({ data: { directory: "/test/dir" } }),
create: async () => ({ data: { id: "session-1" } }),
promptAsync: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
promptCalls.push(args)
return {}
},
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
const task: BackgroundTask = {
id: "task-explicit-model",
status: "pending",
queuedAt: new Date(),
description: "test task",
prompt: "test prompt",
agent: "sisyphus-junior",
parentSessionID: "parent-session",
parentMessageID: "parent-message",
model: { providerID: "openai", modelID: "gpt-5.4", variant: "medium" },
}
const input: import("./types").LaunchInput = {
description: task.description,
prompt: task.prompt,
agent: task.agent,
parentSessionID: task.parentSessionID,
parentMessageID: task.parentMessageID,
model: task.model,
}
//#when
await (manager as unknown as { startTask: (item: { task: BackgroundTask; input: import("./types").LaunchInput }) => Promise<void> })
.startTask({ task, input })
//#then
expect(promptCalls).toHaveLength(1)
expect(promptCalls[0].body.agent).toBe("sisyphus-junior")
expect(promptCalls[0].body.model).toEqual({ providerID: "openai", modelID: "gpt-5.4" })
expect(promptCalls[0].body.variant).toBe("medium")
manager.shutdown()
})
test("resume respects explore agent restrictions", async () => { test("resume respects explore agent restrictions", async () => {
//#given //#given
let capturedTools: Record<string, unknown> | undefined let capturedTools: Record<string, unknown> | undefined
@@ -4843,48 +4717,4 @@ describe("BackgroundManager - tool permission spread order", () => {
manager.shutdown() manager.shutdown()
}) })
test("resume keeps agent when explicit model is configured", async () => {
//#given
let promptCall: { path: { id: string }; body: Record<string, unknown> } | undefined
const client = {
session: {
promptAsync: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
promptCall = args
return {}
},
abort: async () => ({}),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
const task: BackgroundTask = {
id: "task-explicit-model-resume",
sessionID: "session-3",
parentSessionID: "parent-session",
parentMessageID: "parent-message",
description: "resume task",
prompt: "resume prompt",
agent: "explore",
status: "completed",
startedAt: new Date(),
completedAt: new Date(),
model: { providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
}
getTaskMap(manager).set(task.id, task)
//#when
await manager.resume({
sessionId: "session-3",
prompt: "continue",
parentSessionID: "parent-session",
parentMessageID: "parent-message",
})
//#then
expect(promptCall).toBeDefined()
expect(promptCall?.body.agent).toBe("explore")
expect(promptCall?.body.model).toEqual({ providerID: "anthropic", modelID: "claude-sonnet-4-20250514" })
manager.shutdown()
})
}) })

View File

@@ -52,12 +52,10 @@ import { join } from "node:path"
import { pruneStaleTasksAndNotifications } from "./task-poller" import { pruneStaleTasksAndNotifications } from "./task-poller"
import { checkAndInterruptStaleTasks } from "./task-poller" import { checkAndInterruptStaleTasks } from "./task-poller"
import { removeTaskToastTracking } from "./remove-task-toast-tracking" import { removeTaskToastTracking } from "./remove-task-toast-tracking"
import { isActiveSessionStatus, isTerminalSessionStatus } from "./session-status-classifier"
import { import {
detectRepetitiveToolUse, detectRepetitiveToolUse,
recordToolCall, recordToolCall,
resolveCircuitBreakerSettings, resolveCircuitBreakerSettings,
type CircuitBreakerSettings,
} from "./loop-detector" } from "./loop-detector"
import { import {
createSubagentDepthLimitError, createSubagentDepthLimitError,
@@ -153,7 +151,6 @@ export class BackgroundManager {
private preStartDescendantReservations: Set<string> private preStartDescendantReservations: Set<string>
private enableParentSessionNotifications: boolean private enableParentSessionNotifications: boolean
readonly taskHistory = new TaskHistory() readonly taskHistory = new TaskHistory()
private cachedCircuitBreakerSettings?: CircuitBreakerSettings
constructor( constructor(
ctx: PluginInput, ctx: PluginInput,
@@ -903,24 +900,23 @@ export class BackgroundManager {
task.progress.lastUpdate = new Date() task.progress.lastUpdate = new Date()
if (partInfo?.type === "tool" || partInfo?.tool) { if (partInfo?.type === "tool" || partInfo?.tool) {
const countedToolPartIDs = task.progress.countedToolPartIDs ?? new Set<string>() const countedToolPartIDs = task.progress.countedToolPartIDs ?? []
const shouldCountToolCall = const shouldCountToolCall =
!partInfo.id || !partInfo.id ||
partInfo.state?.status !== "running" || partInfo.state?.status !== "running" ||
!countedToolPartIDs.has(partInfo.id) !countedToolPartIDs.includes(partInfo.id)
if (!shouldCountToolCall) { if (!shouldCountToolCall) {
return return
} }
if (partInfo.id && partInfo.state?.status === "running") { if (partInfo.id && partInfo.state?.status === "running") {
countedToolPartIDs.add(partInfo.id) task.progress.countedToolPartIDs = [...countedToolPartIDs, partInfo.id]
task.progress.countedToolPartIDs = countedToolPartIDs
} }
task.progress.toolCalls += 1 task.progress.toolCalls += 1
task.progress.lastTool = partInfo.tool task.progress.lastTool = partInfo.tool
const circuitBreaker = this.cachedCircuitBreakerSettings ?? (this.cachedCircuitBreakerSettings = resolveCircuitBreakerSettings(this.config)) const circuitBreaker = resolveCircuitBreakerSettings(this.config)
if (partInfo.tool) { if (partInfo.tool) {
task.progress.toolCallWindow = recordToolCall( task.progress.toolCallWindow = recordToolCall(
task.progress.toolCallWindow, task.progress.toolCallWindow,
@@ -932,16 +928,18 @@ export class BackgroundManager {
if (circuitBreaker.enabled) { if (circuitBreaker.enabled) {
const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow) const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
if (loopDetection.triggered) { if (loopDetection.triggered) {
log("[background-agent] Circuit breaker: consecutive tool usage detected", { log("[background-agent] Circuit breaker: repetitive tool usage detected", {
taskId: task.id, taskId: task.id,
agent: task.agent, agent: task.agent,
sessionID, sessionID,
toolName: loopDetection.toolName, toolName: loopDetection.toolName,
repeatedCount: loopDetection.repeatedCount, repeatedCount: loopDetection.repeatedCount,
sampleSize: loopDetection.sampleSize,
thresholdPercent: loopDetection.thresholdPercent,
}) })
void this.cancelTask(task.id, { void this.cancelTask(task.id, {
source: "circuit-breaker", source: "circuit-breaker",
reason: `Subagent called ${loopDetection.toolName} ${loopDetection.repeatedCount} consecutive times (threshold: ${circuitBreaker.consecutiveThreshold}). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`, reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
}) })
return return
} }
@@ -1784,9 +1782,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
} }
} }
// Only skip completion when session status is actively running. // Match sync-session-poller pattern: only skip completion check when
// Unknown or terminal statuses (like "interrupted") fall through to completion. // status EXISTS and is not idle (i.e., session is actively running).
if (sessionStatus && isActiveSessionStatus(sessionStatus.type)) { // When sessionStatus is undefined, the session has completed and dropped
// from the status response — fall through to completion detection.
if (sessionStatus && sessionStatus.type !== "idle") {
log("[background-agent] Session still running, relying on event-based progress:", { log("[background-agent] Session still running, relying on event-based progress:", {
taskId: task.id, taskId: task.id,
sessionID, sessionID,
@@ -1796,24 +1796,6 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
continue continue
} }
// Explicit terminal non-idle status (e.g., "interrupted") — complete immediately,
// skipping output validation (session will never produce more output).
// Unknown statuses fall through to the idle/gone path with output validation.
if (sessionStatus && isTerminalSessionStatus(sessionStatus.type)) {
await this.tryCompleteTask(task, `polling (terminal session status: ${sessionStatus.type})`)
continue
}
// Unknown non-idle status — not active, not terminal, not idle.
// Fall through to idle/gone completion path with output validation.
if (sessionStatus && sessionStatus.type !== "idle") {
log("[background-agent] Unknown session status, treating as potentially idle:", {
taskId: task.id,
sessionID,
sessionStatus: sessionStatus.type,
})
}
// Session is idle or no longer in status response (completed/disappeared) // Session is idle or no longer in status response (completed/disappeared)
const completionSource = sessionStatus?.type === "idle" const completionSource = sessionStatus?.type === "idle"
? "polling (idle status)" ? "polling (idle status)"

View File

@@ -1,66 +0,0 @@
import { describe, test, expect, mock } from "bun:test"
import { isActiveSessionStatus, isTerminalSessionStatus } from "./session-status-classifier"
const mockLog = mock()
mock.module("../../shared", () => ({ log: mockLog }))
describe("isActiveSessionStatus", () => {
describe("#given a known active session status", () => {
test('#when type is "busy" #then returns true', () => {
expect(isActiveSessionStatus("busy")).toBe(true)
})
test('#when type is "retry" #then returns true', () => {
expect(isActiveSessionStatus("retry")).toBe(true)
})
test('#when type is "running" #then returns true', () => {
expect(isActiveSessionStatus("running")).toBe(true)
})
})
describe("#given a known terminal session status", () => {
test('#when type is "idle" #then returns false', () => {
expect(isActiveSessionStatus("idle")).toBe(false)
})
test('#when type is "interrupted" #then returns false and does not log', () => {
mockLog.mockClear()
expect(isActiveSessionStatus("interrupted")).toBe(false)
expect(mockLog).not.toHaveBeenCalled()
})
})
describe("#given an unknown session status", () => {
test('#when type is an arbitrary unknown string #then returns false and logs warning', () => {
mockLog.mockClear()
expect(isActiveSessionStatus("some-unknown-status")).toBe(false)
expect(mockLog).toHaveBeenCalledWith(
"[background-agent] Unknown session status type encountered:",
"some-unknown-status",
)
})
test('#when type is empty string #then returns false', () => {
expect(isActiveSessionStatus("")).toBe(false)
})
})
})
describe("isTerminalSessionStatus", () => {
test('#when type is "interrupted" #then returns true', () => {
expect(isTerminalSessionStatus("interrupted")).toBe(true)
})
test('#when type is "idle" #then returns false (idle is handled separately)', () => {
expect(isTerminalSessionStatus("idle")).toBe(false)
})
test('#when type is "busy" #then returns false', () => {
expect(isTerminalSessionStatus("busy")).toBe(false)
})
test('#when type is an unknown string #then returns false', () => {
expect(isTerminalSessionStatus("some-unknown")).toBe(false)
})
})

View File

@@ -1,20 +0,0 @@
import { log } from "../../shared"
const ACTIVE_SESSION_STATUSES = new Set(["busy", "retry", "running"])
const KNOWN_TERMINAL_STATUSES = new Set(["idle", "interrupted"])
export function isActiveSessionStatus(type: string): boolean {
if (ACTIVE_SESSION_STATUSES.has(type)) {
return true
}
if (!KNOWN_TERMINAL_STATUSES.has(type)) {
log("[background-agent] Unknown session status type encountered:", type)
}
return false
}
export function isTerminalSessionStatus(type: string): boolean {
return KNOWN_TERMINAL_STATUSES.has(type) && type !== "idle"
}

View File

@@ -64,63 +64,4 @@ describe("background-agent spawner.startTask", () => {
{ permission: "question", action: "deny", pattern: "*" }, { permission: "question", action: "deny", pattern: "*" },
]) ])
}) })
test("keeps agent when explicit model is configured", async () => {
//#given
const promptCalls: any[] = []
const client = {
session: {
get: async () => ({ data: { directory: "/parent/dir" } }),
create: async () => ({ data: { id: "ses_child" } }),
promptAsync: async (args?: any) => {
promptCalls.push(args)
return {}
},
},
}
const task = createTask({
description: "Test task",
prompt: "Do work",
agent: "sisyphus-junior",
parentSessionID: "ses_parent",
parentMessageID: "msg_parent",
model: { providerID: "openai", modelID: "gpt-5.4", variant: "medium" },
})
const item = {
task,
input: {
description: task.description,
prompt: task.prompt,
agent: task.agent,
parentSessionID: task.parentSessionID,
parentMessageID: task.parentMessageID,
parentModel: task.parentModel,
parentAgent: task.parentAgent,
model: task.model,
},
}
const ctx = {
client,
directory: "/fallback",
concurrencyManager: { release: () => {} },
tmuxEnabled: false,
onTaskError: () => {},
}
//#when
await startTask(item as any, ctx as any)
//#then
expect(promptCalls).toHaveLength(1)
expect(promptCalls[0]?.body?.agent).toBe("sisyphus-junior")
expect(promptCalls[0]?.body?.model).toEqual({
providerID: "openai",
modelID: "gpt-5.4",
})
expect(promptCalls[0]?.body?.variant).toBe("medium")
})
}) })

View File

@@ -1,5 +1,5 @@
declare const require: (name: string) => any declare const require: (name: string) => any
const { describe, it, expect, mock, spyOn, beforeEach, afterEach } = require("bun:test") const { describe, it, expect, mock } = require("bun:test")
import { checkAndInterruptStaleTasks, pruneStaleTasksAndNotifications } from "./task-poller" import { checkAndInterruptStaleTasks, pruneStaleTasksAndNotifications } from "./task-poller"
import type { BackgroundTask } from "./types" import type { BackgroundTask } from "./types"
@@ -29,18 +29,6 @@ describe("checkAndInterruptStaleTasks", () => {
...overrides, ...overrides,
} }
} }
const originalDateNow = Date.now
let fixedTime: number
beforeEach(() => {
fixedTime = Date.now()
spyOn(globalThis.Date, "now").mockReturnValue(fixedTime)
})
afterEach(() => {
Date.now = originalDateNow
})
it("should interrupt tasks with lastUpdate exceeding stale timeout", async () => { it("should interrupt tasks with lastUpdate exceeding stale timeout", async () => {
//#given //#given
@@ -129,13 +117,13 @@ describe("checkAndInterruptStaleTasks", () => {
}) })
it("should use DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS when messageStalenessTimeoutMs is not configured", async () => { it("should use DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS when messageStalenessTimeoutMs is not configured", async () => {
//#given — task started 65 minutes ago, no config for messageStalenessTimeoutMs //#given — task started 35 minutes ago, no config for messageStalenessTimeoutMs
const task = createRunningTask({ const task = createRunningTask({
startedAt: new Date(Date.now() - 65 * 60 * 1000), startedAt: new Date(Date.now() - 35 * 60 * 1000),
progress: undefined, progress: undefined,
}) })
//#when — default is 60 minutes (3_600_000ms) //#when — default is 30 minutes (1_800_000ms)
await checkAndInterruptStaleTasks({ await checkAndInterruptStaleTasks({
tasks: [task], tasks: [task],
client: mockClient as never, client: mockClient as never,
@@ -429,56 +417,6 @@ describe("checkAndInterruptStaleTasks", () => {
expect(task.status).toBe("cancelled") expect(task.status).toBe("cancelled")
expect(onTaskInterrupted).toHaveBeenCalledWith(task) expect(onTaskInterrupted).toHaveBeenCalledWith(task)
}) })
it('should NOT protect task when session has terminal non-idle status like "interrupted"', async () => {
//#given — lastUpdate is 5min old, session is "interrupted" (terminal, not active)
const task = createRunningTask({
startedAt: new Date(Date.now() - 300_000),
progress: {
toolCalls: 2,
lastUpdate: new Date(Date.now() - 300_000),
},
})
//#when — session status is "interrupted" (terminal)
await checkAndInterruptStaleTasks({
tasks: [task],
client: mockClient as never,
config: { staleTimeoutMs: 180_000 },
concurrencyManager: mockConcurrencyManager as never,
notifyParentSession: mockNotify,
sessionStatuses: { "ses-1": { type: "interrupted" } },
})
//#then — terminal statuses should not protect from stale timeout
expect(task.status).toBe("cancelled")
expect(task.error).toContain("Stale timeout")
})
it('should NOT protect task when session has unknown status type', async () => {
//#given — lastUpdate is 5min old, session has an unknown status
const task = createRunningTask({
startedAt: new Date(Date.now() - 300_000),
progress: {
toolCalls: 2,
lastUpdate: new Date(Date.now() - 300_000),
},
})
//#when — session has unknown status type
await checkAndInterruptStaleTasks({
tasks: [task],
client: mockClient as never,
config: { staleTimeoutMs: 180_000 },
concurrencyManager: mockConcurrencyManager as never,
notifyParentSession: mockNotify,
sessionStatuses: { "ses-1": { type: "some-weird-status" } },
})
//#then — unknown statuses should not protect from stale timeout
expect(task.status).toBe("cancelled")
expect(task.error).toContain("Stale timeout")
})
}) })
describe("pruneStaleTasksAndNotifications", () => { describe("pruneStaleTasksAndNotifications", () => {

View File

@@ -14,7 +14,6 @@ import {
} from "./constants" } from "./constants"
import { removeTaskToastTracking } from "./remove-task-toast-tracking" import { removeTaskToastTracking } from "./remove-task-toast-tracking"
import { isActiveSessionStatus } from "./session-status-classifier"
const TERMINAL_TASK_STATUSES = new Set<BackgroundTask["status"]>([ const TERMINAL_TASK_STATUSES = new Set<BackgroundTask["status"]>([
"completed", "completed",
"error", "error",
@@ -121,7 +120,7 @@ export async function checkAndInterruptStaleTasks(args: {
if (!startedAt || !sessionID) continue if (!startedAt || !sessionID) continue
const sessionStatus = sessionStatuses?.[sessionID]?.type const sessionStatus = sessionStatuses?.[sessionID]?.type
const sessionIsRunning = sessionStatus !== undefined && isActiveSessionStatus(sessionStatus) const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
const runtime = now - startedAt.getTime() const runtime = now - startedAt.getTime()
if (!task.progress?.lastUpdate) { if (!task.progress?.lastUpdate) {
@@ -130,7 +129,7 @@ export async function checkAndInterruptStaleTasks(args: {
const staleMinutes = Math.round(runtime / 60000) const staleMinutes = Math.round(runtime / 60000)
task.status = "cancelled" task.status = "cancelled"
task.error = `Stale timeout (no activity for ${staleMinutes}min since start). This is a FINAL cancellation - do NOT create a replacement task. If the timeout is too short, increase 'background_task.staleTimeoutMs' in .opencode/oh-my-opencode.json.` task.error = `Stale timeout (no activity for ${staleMinutes}min since start)`
task.completedAt = new Date() task.completedAt = new Date()
if (task.concurrencyKey) { if (task.concurrencyKey) {
@@ -159,10 +158,10 @@ export async function checkAndInterruptStaleTasks(args: {
if (timeSinceLastUpdate <= staleTimeoutMs) continue if (timeSinceLastUpdate <= staleTimeoutMs) continue
if (task.status !== "running") continue if (task.status !== "running") continue
const staleMinutes = Math.round(timeSinceLastUpdate / 60000) const staleMinutes = Math.round(timeSinceLastUpdate / 60000)
task.status = "cancelled" task.status = "cancelled"
task.error = `Stale timeout (no activity for ${staleMinutes}min). This is a FINAL cancellation - do NOT create a replacement task. If the timeout is too short, increase 'background_task.staleTimeoutMs' in .opencode/oh-my-opencode.json.` task.error = `Stale timeout (no activity for ${staleMinutes}min)`
task.completedAt = new Date() task.completedAt = new Date()
if (task.concurrencyKey) { if (task.concurrencyKey) {
concurrencyManager.release(task.concurrencyKey) concurrencyManager.release(task.concurrencyKey)

View File

@@ -10,16 +10,16 @@ export type BackgroundTaskStatus =
| "interrupt" | "interrupt"
export interface ToolCallWindow { export interface ToolCallWindow {
lastSignature: string toolSignatures: string[]
consecutiveCount: number windowSize: number
threshold: number thresholdPercent: number
} }
export interface TaskProgress { export interface TaskProgress {
toolCalls: number toolCalls: number
lastTool?: string lastTool?: string
toolCallWindow?: ToolCallWindow toolCallWindow?: ToolCallWindow
countedToolPartIDs?: Set<string> countedToolPartIDs?: string[]
lastUpdate: Date lastUpdate: Date
lastMessage?: string lastMessage?: string
lastMessageAt?: Date lastMessageAt?: Date

View File

@@ -1,4 +1,3 @@
export * from "./types" export * from "./types"
export * from "./constants" export * from "./constants"
export * from "./storage" export * from "./storage"
export * from "./top-level-task"

View File

@@ -11,11 +11,8 @@ import {
getPlanName, getPlanName,
createBoulderState, createBoulderState,
findPrometheusPlans, findPrometheusPlans,
getTaskSessionState,
upsertTaskSessionState,
} from "./storage" } from "./storage"
import type { BoulderState } from "./types" import type { BoulderState } from "./types"
import { readCurrentTopLevelTask } from "./top-level-task"
describe("boulder-state", () => { describe("boulder-state", () => {
const TEST_DIR = join(tmpdir(), "boulder-state-test-" + Date.now()) const TEST_DIR = join(tmpdir(), "boulder-state-test-" + Date.now())
@@ -137,24 +134,6 @@ describe("boulder-state", () => {
expect(result?.session_ids).toEqual(["session-1", "session-2"]) expect(result?.session_ids).toEqual(["session-1", "session-2"])
expect(result?.plan_name).toBe("my-plan") expect(result?.plan_name).toBe("my-plan")
}) })
test("should default task_sessions to empty object when missing from JSON", () => {
// given - boulder.json without task_sessions field
const boulderFile = join(SISYPHUS_DIR, "boulder.json")
writeFileSync(boulderFile, JSON.stringify({
active_plan: "/path/to/plan.md",
started_at: "2026-01-01T00:00:00Z",
session_ids: ["session-1"],
plan_name: "plan",
}))
// when
const result = readBoulderState(TEST_DIR)
// then
expect(result).not.toBeNull()
expect(result!.task_sessions).toEqual({})
})
}) })
describe("writeBoulderState", () => { describe("writeBoulderState", () => {
@@ -270,115 +249,6 @@ describe("boulder-state", () => {
}) })
}) })
describe("task session state", () => {
test("should persist and read preferred session for a top-level plan task", () => {
// given - existing boulder state
const state: BoulderState = {
active_plan: "/plan.md",
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "plan",
}
writeBoulderState(TEST_DIR, state)
// when
upsertTaskSessionState(TEST_DIR, {
taskKey: "todo:1",
taskLabel: "1",
taskTitle: "Implement auth flow",
sessionId: "ses_task_123",
agent: "sisyphus-junior",
category: "deep",
})
const result = getTaskSessionState(TEST_DIR, "todo:1")
// then
expect(result).not.toBeNull()
expect(result?.session_id).toBe("ses_task_123")
expect(result?.task_title).toBe("Implement auth flow")
expect(result?.agent).toBe("sisyphus-junior")
expect(result?.category).toBe("deep")
})
test("should overwrite preferred session for the same top-level plan task", () => {
// given - existing boulder state with prior preferred session
const state: BoulderState = {
active_plan: "/plan.md",
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "plan",
task_sessions: {
"todo:1": {
task_key: "todo:1",
task_label: "1",
task_title: "Implement auth flow",
session_id: "ses_old",
updated_at: "2026-01-02T10:00:00Z",
},
},
}
writeBoulderState(TEST_DIR, state)
// when
upsertTaskSessionState(TEST_DIR, {
taskKey: "todo:1",
taskLabel: "1",
taskTitle: "Implement auth flow",
sessionId: "ses_new",
})
const result = getTaskSessionState(TEST_DIR, "todo:1")
// then
expect(result?.session_id).toBe("ses_new")
})
})
describe("readCurrentTopLevelTask", () => {
test("should return the first unchecked top-level task in TODOs", () => {
// given - plan with nested and top-level unchecked tasks
const planPath = join(TEST_DIR, "current-task-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [x] 1. Finished task
- [ ] nested acceptance checkbox
- [ ] 2. Current task
## Final Verification Wave
- [ ] F1. Final review
`)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).not.toBeNull()
expect(result?.key).toBe("todo:2")
expect(result?.title).toBe("Current task")
})
test("should fall back to final-wave task when implementation tasks are complete", () => {
// given - plan with only final-wave work remaining
const planPath = join(TEST_DIR, "final-wave-current-task-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [x] 1. Finished task
## Final Verification Wave
- [ ] F1. Final review
`)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).not.toBeNull()
expect(result?.key).toBe("final-wave:f1")
expect(result?.title).toBe("Final review")
})
})
describe("getPlanProgress", () => { describe("getPlanProgress", () => {
test("should count completed and uncompleted checkboxes", () => { test("should count completed and uncompleted checkboxes", () => {
// given - plan file with checkboxes // given - plan file with checkboxes
@@ -481,7 +351,7 @@ describe("boulder-state", () => {
expect(progress.isComplete).toBe(true) expect(progress.isComplete).toBe(true)
}) })
test("should return isComplete false for empty plan", () => { test("should return isComplete false for plan with content but no checkboxes", () => {
// given - plan with no checkboxes // given - plan with no checkboxes
const planPath = join(TEST_DIR, "empty-plan.md") const planPath = join(TEST_DIR, "empty-plan.md")
writeFileSync(planPath, "# Plan\nNo tasks here") writeFileSync(planPath, "# Plan\nNo tasks here")

View File

@@ -6,11 +6,9 @@
import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs" import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs"
import { dirname, join, basename } from "node:path" import { dirname, join, basename } from "node:path"
import type { BoulderState, PlanProgress, TaskSessionState } from "./types" import type { BoulderState, PlanProgress } from "./types"
import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants" import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants"
const RESERVED_KEYS = new Set(["__proto__", "prototype", "constructor"])
export function getBoulderFilePath(directory: string): string { export function getBoulderFilePath(directory: string): string {
return join(directory, BOULDER_DIR, BOULDER_FILE) return join(directory, BOULDER_DIR, BOULDER_FILE)
} }
@@ -31,9 +29,6 @@ export function readBoulderState(directory: string): BoulderState | null {
if (!Array.isArray(parsed.session_ids)) { if (!Array.isArray(parsed.session_ids)) {
parsed.session_ids = [] parsed.session_ids = []
} }
if (!parsed.task_sessions || typeof parsed.task_sessions !== "object" || Array.isArray(parsed.task_sessions)) {
parsed.task_sessions = {}
}
return parsed as BoulderState return parsed as BoulderState
} catch { } catch {
return null return null
@@ -90,54 +85,6 @@ export function clearBoulderState(directory: string): boolean {
} }
} }
export function getTaskSessionState(directory: string, taskKey: string): TaskSessionState | null {
const state = readBoulderState(directory)
if (!state?.task_sessions) {
return null
}
return state.task_sessions[taskKey] ?? null
}
export function upsertTaskSessionState(
directory: string,
input: {
taskKey: string
taskLabel: string
taskTitle: string
sessionId: string
agent?: string
category?: string
},
): BoulderState | null {
const state = readBoulderState(directory)
if (!state) {
return null
}
if (RESERVED_KEYS.has(input.taskKey)) {
return null
}
const taskSessions = state.task_sessions ?? {}
taskSessions[input.taskKey] = {
task_key: input.taskKey,
task_label: input.taskLabel,
task_title: input.taskTitle,
session_id: input.sessionId,
...(input.agent !== undefined ? { agent: input.agent } : {}),
...(input.category !== undefined ? { category: input.category } : {}),
updated_at: new Date().toISOString(),
}
state.task_sessions = taskSessions
if (writeBoulderState(directory, state)) {
return state
}
return null
}
/** /**
* Find Prometheus plan files for this project. * Find Prometheus plan files for this project.
* Prometheus stores plans at: {project}/.sisyphus/plans/{name}.md * Prometheus stores plans at: {project}/.sisyphus/plans/{name}.md

View File

@@ -1,268 +0,0 @@
import { describe, expect, test } from "bun:test"
import { writeFileSync } from "node:fs"
import { join } from "node:path"
import { tmpdir } from "node:os"
import { readCurrentTopLevelTask } from "./top-level-task"
function writePlanFile(fileName: string, content: string): string {
const planPath = join(tmpdir(), fileName)
writeFileSync(planPath, content, "utf-8")
return planPath
}
describe("readCurrentTopLevelTask", () => {
test("returns first unchecked top-level task in TODOs", () => {
// given
const planPath = writePlanFile(
`top-level-task-happy-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [ ] 2. Current task
## Final Verification Wave
- [ ] F1. Final review
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "todo:2",
section: "todo",
label: "2",
title: "Current task",
})
})
test("returns null when all tasks are checked", () => {
// given
const planPath = writePlanFile(
`top-level-task-all-checked-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [x] 2. Another done task
## Final Verification Wave
- [x] F1. Final done review
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toBeNull()
})
test("returns null for empty plan file", () => {
// given
const planPath = writePlanFile(`top-level-task-empty-${Date.now()}.md`, "")
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toBeNull()
})
test("returns null when plan file does not exist", () => {
// given
const planPath = join(tmpdir(), `top-level-task-missing-${Date.now()}.md`)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toBeNull()
})
test("skips nested or indented checkboxes", () => {
// given
const planPath = writePlanFile(
`top-level-task-nested-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [ ] nested should be ignored
- [ ] 2. Top-level pending
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("todo:2")
})
test("falls back to Final Verification Wave when TODOs are all checked", () => {
// given
const planPath = writePlanFile(
`top-level-task-fallback-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [x] 2. Done task
## Final Verification Wave
- [ ] F1. Final review pending
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "final-wave:f1",
section: "final-wave",
label: "F1",
title: "Final review pending",
})
})
test("selects the first unchecked task among mixed checked and unchecked TODOs", () => {
// given
const planPath = writePlanFile(
`top-level-task-mixed-${Date.now()}.md`,
`# Plan
## TODOs
- [x] 1. Done task
- [ ] 2. First unchecked
- [ ] 3. Second unchecked
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("todo:2")
expect(result?.title).toBe("First unchecked")
})
test("ignores malformed labels and continues to next unchecked task", () => {
// given
const planPath = writePlanFile(
`top-level-task-malformed-${Date.now()}.md`,
`# Plan
## TODOs
- [ ] no number prefix
- [ ] 2. Valid task after malformed label
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "todo:2",
section: "todo",
label: "2",
title: "Valid task after malformed label",
})
})
test("supports unchecked tasks with asterisk bullets", () => {
// given
const planPath = writePlanFile(
`top-level-task-asterisk-${Date.now()}.md`,
`# Plan
## TODOs
* [ ] 1. Task using asterisk bullet
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("todo:1")
expect(result?.title).toBe("Task using asterisk bullet")
})
test("returns final-wave task when plan has only Final Verification Wave section", () => {
// given
const planPath = writePlanFile(
`top-level-task-final-only-${Date.now()}.md`,
`# Plan
## Final Verification Wave
- [ ] F2. Final-only task
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result).toEqual({
key: "final-wave:f2",
section: "final-wave",
label: "F2",
title: "Final-only task",
})
})
test("returns the first unchecked task when multiple unchecked tasks exist", () => {
// given
const planPath = writePlanFile(
`top-level-task-multiple-${Date.now()}.md`,
`# Plan
## TODOs
- [ ] 1. First unchecked task
- [ ] 2. Second unchecked task
- [ ] 3. Third unchecked task
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.label).toBe("1")
expect(result?.title).toBe("First unchecked task")
})
test("ignores unchecked content in non-target sections during section transitions", () => {
// given
const planPath = writePlanFile(
`top-level-task-sections-${Date.now()}.md`,
`# Plan
## Notes
- [ ] 99. Should be ignored because section is not tracked
## TODOs
- [x] 1. Done implementation task
## Decisions
- [ ] 100. Should also be ignored
## Final Verification Wave
- [ ] F3. Final verification task
`,
)
// when
const result = readCurrentTopLevelTask(planPath)
// then
expect(result?.key).toBe("final-wave:f3")
expect(result?.section).toBe("final-wave")
})
})

View File

@@ -1,77 +0,0 @@
import { existsSync, readFileSync } from "node:fs"
import type { TopLevelTaskRef } from "./types"
const TODO_HEADING_PATTERN = /^##\s+TODOs\b/i
const FINAL_VERIFICATION_HEADING_PATTERN = /^##\s+Final Verification Wave\b/i
const SECOND_LEVEL_HEADING_PATTERN = /^##\s+/
const UNCHECKED_CHECKBOX_PATTERN = /^(\s*)[-*]\s*\[\s*\]\s*(.+)$/
const TODO_TASK_PATTERN = /^(\d+)\.\s+(.+)$/
const FINAL_WAVE_TASK_PATTERN = /^(F\d+)\.\s+(.+)$/i
type PlanSection = "todo" | "final-wave" | "other"
function buildTaskRef(
section: "todo" | "final-wave",
taskLabel: string,
): TopLevelTaskRef | null {
const pattern = section === "todo" ? TODO_TASK_PATTERN : FINAL_WAVE_TASK_PATTERN
const match = taskLabel.match(pattern)
if (!match) {
return null
}
const rawLabel = match[1]
const title = match[2].trim()
return {
key: `${section}:${rawLabel.toLowerCase()}`,
section,
label: rawLabel,
title,
}
}
export function readCurrentTopLevelTask(planPath: string): TopLevelTaskRef | null {
if (!existsSync(planPath)) {
return null
}
try {
const content = readFileSync(planPath, "utf-8")
const lines = content.split(/\r?\n/)
let section: PlanSection = "other"
for (const line of lines) {
if (SECOND_LEVEL_HEADING_PATTERN.test(line)) {
section = TODO_HEADING_PATTERN.test(line)
? "todo"
: FINAL_VERIFICATION_HEADING_PATTERN.test(line)
? "final-wave"
: "other"
}
const uncheckedTaskMatch = line.match(UNCHECKED_CHECKBOX_PATTERN)
if (!uncheckedTaskMatch) {
continue
}
if (uncheckedTaskMatch[1].length > 0) {
continue
}
if (section !== "todo" && section !== "final-wave") {
continue
}
const taskRef = buildTaskRef(section, uncheckedTaskMatch[2].trim())
if (taskRef) {
return taskRef
}
}
return null
} catch {
return null
}
}

View File

@@ -18,8 +18,6 @@ export interface BoulderState {
agent?: string agent?: string
/** Absolute path to the git worktree root where work happens */ /** Absolute path to the git worktree root where work happens */
worktree_path?: string worktree_path?: string
/** Preferred reusable subagent sessions keyed by current top-level plan task */
task_sessions?: Record<string, TaskSessionState>
} }
export interface PlanProgress { export interface PlanProgress {
@@ -30,31 +28,3 @@ export interface PlanProgress {
/** Whether all tasks are done */ /** Whether all tasks are done */
isComplete: boolean isComplete: boolean
} }
export interface TaskSessionState {
/** Stable identifier for the current top-level plan task (e.g. todo:1 / final-wave:F1) */
task_key: string
/** Original task label from the plan file */
task_label: string
/** Full task title from the plan file */
task_title: string
/** Preferred reusable subagent session */
session_id: string
/** Agent associated with the task session, when known */
agent?: string
/** Category associated with the task session, when known */
category?: string
/** Last update timestamp */
updated_at: string
}
export interface TopLevelTaskRef {
/** Stable identifier for the current top-level plan task */
key: string
/** Task section in the Prometheus plan */
section: "todo" | "final-wave"
/** Original label token (e.g. 1 / F1) */
label: string
/** Full task title extracted from the checkbox line */
title: string
}

View File

@@ -1,112 +0,0 @@
import { describe, it, expect, beforeEach, afterEach } from "bun:test"
import type { PluginComponentsResult } from "./loader"
describe("loadAllPluginComponents", () => {
const originalEnv = { ...process.env }
beforeEach(() => {
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS
})
afterEach(() => {
process.env = { ...originalEnv }
})
describe("when OPENCODE_DISABLE_CLAUDE_CODE is set to 'true'", () => {
it("returns empty result without loading any plugins", async () => {
// given
process.env.OPENCODE_DISABLE_CLAUDE_CODE = "true"
// when
const { loadAllPluginComponents } = await import("./loader")
const result: PluginComponentsResult = await loadAllPluginComponents()
// then
expect(result.commands).toEqual({})
expect(result.skills).toEqual({})
expect(result.agents).toEqual({})
expect(result.mcpServers).toEqual({})
expect(result.hooksConfigs).toEqual([])
expect(result.plugins).toEqual([])
expect(result.errors).toEqual([])
})
})
describe("when OPENCODE_DISABLE_CLAUDE_CODE is set to '1'", () => {
it("returns empty result without loading any plugins", async () => {
// given
process.env.OPENCODE_DISABLE_CLAUDE_CODE = "1"
// when
const { loadAllPluginComponents } = await import("./loader")
const result: PluginComponentsResult = await loadAllPluginComponents()
// then
expect(result.commands).toEqual({})
expect(result.plugins).toEqual([])
})
})
describe("when OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS is set to 'true'", () => {
it("returns empty result without loading any plugins", async () => {
// given
process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS = "true"
// when
const { loadAllPluginComponents } = await import("./loader")
const result: PluginComponentsResult = await loadAllPluginComponents()
// then
expect(result.commands).toEqual({})
expect(result.plugins).toEqual([])
})
})
describe("when OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS is set to '1'", () => {
it("returns empty result without loading any plugins", async () => {
// given
process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS = "1"
// when
const { loadAllPluginComponents } = await import("./loader")
const result: PluginComponentsResult = await loadAllPluginComponents()
// then
expect(result.commands).toEqual({})
expect(result.plugins).toEqual([])
})
})
describe("when neither env var is set", () => {
it("does not skip plugin loading", async () => {
// given
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE
delete process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS
// when
const { loadAllPluginComponents } = await import("./loader")
const result: PluginComponentsResult = await loadAllPluginComponents()
// then — should attempt to load (may find 0 plugins, but shouldn't early-return)
expect(result).toBeDefined()
expect(result).toHaveProperty("commands")
expect(result).toHaveProperty("plugins")
})
})
describe("when env var is set to unrecognized value", () => {
it("does not skip plugin loading", async () => {
// given
process.env.OPENCODE_DISABLE_CLAUDE_CODE = "yes"
// when
const { loadAllPluginComponents } = await import("./loader")
const result: PluginComponentsResult = await loadAllPluginComponents()
// then — "yes" is not "true" or "1", should not skip
expect(result).toBeDefined()
expect(result).toHaveProperty("plugins")
})
})
})

View File

@@ -27,26 +27,7 @@ export interface PluginComponentsResult {
errors: PluginLoadError[] errors: PluginLoadError[]
} }
function isClaudeCodePluginsDisabled(): boolean {
const disableFlag = process.env.OPENCODE_DISABLE_CLAUDE_CODE
const disablePluginsFlag = process.env.OPENCODE_DISABLE_CLAUDE_CODE_PLUGINS
return disableFlag === "true" || disableFlag === "1" || disablePluginsFlag === "true" || disablePluginsFlag === "1"
}
export async function loadAllPluginComponents(options?: PluginLoaderOptions): Promise<PluginComponentsResult> { export async function loadAllPluginComponents(options?: PluginLoaderOptions): Promise<PluginComponentsResult> {
if (isClaudeCodePluginsDisabled()) {
log("Claude Code plugin loading disabled via OPENCODE_DISABLE_CLAUDE_CODE env var")
return {
commands: {},
skills: {},
agents: {},
mcpServers: {},
hooksConfigs: [],
plugins: [],
errors: [],
}
}
const { plugins, errors } = discoverInstalledPlugins(options) const { plugins, errors } = discoverInstalledPlugins(options)
const [commands, skills, agents, mcpServers, hooksConfigs] = await Promise.all([ const [commands, skills, agents, mcpServers, hooksConfigs] = await Promise.all([

View File

@@ -1,112 +1,44 @@
import { afterEach, beforeEach, describe, expect, it, spyOn } from "bun:test" import { afterEach, describe, expect, it } from "bun:test"
import { startCallbackServer, type CallbackServer } from "./callback-server" import { startCallbackServer, type CallbackServer } from "./callback-server"
const HOSTNAME = "127.0.0.1"
const nativeFetch = Bun.fetch.bind(Bun) const nativeFetch = Bun.fetch.bind(Bun)
function supportsRealSocketBinding(): boolean {
try {
const server = Bun.serve({
port: 0,
hostname: HOSTNAME,
fetch: () => new Response("probe"),
})
server.stop(true)
return true
} catch {
return false
}
}
const canBindRealSockets = supportsRealSocketBinding()
type MockServerState = {
port: number
stopped: boolean
fetch: (request: Request) => Response | Promise<Response>
}
describe("startCallbackServer", () => { describe("startCallbackServer", () => {
let server: CallbackServer | null = null let server: CallbackServer | null = null
let serveSpy: ReturnType<typeof spyOn> | null = null
let activeServer: MockServerState | null = null
async function request(url: string): Promise<Response> {
if (canBindRealSockets) {
return nativeFetch(url)
}
if (!activeServer || activeServer.stopped) {
throw new Error("Connection refused")
}
return await activeServer.fetch(new Request(url))
}
beforeEach(() => {
if (canBindRealSockets) {
return
}
activeServer = null
serveSpy = spyOn(Bun, "serve").mockImplementation((options: {
port: number
hostname?: string
fetch: (request: Request) => Response | Promise<Response>
}) => {
const state: MockServerState = {
port: options.port === 0 ? 19877 : options.port,
stopped: false,
fetch: options.fetch,
}
const handle = {
port: state.port,
stop: (_force?: boolean) => {
state.stopped = true
if (activeServer === state) {
activeServer = null
}
},
}
activeServer = state
return handle as ReturnType<typeof Bun.serve>
})
})
afterEach(async () => { afterEach(async () => {
server?.close() server?.close()
server = null server = null
// Allow time for port to be released before next test
if (serveSpy) { await Bun.sleep(10)
serveSpy.mockRestore()
serveSpy = null
}
activeServer = null
if (canBindRealSockets) {
await Bun.sleep(10)
}
}) })
it("starts server and returns port", async () => { it("starts server and returns port", async () => {
// given - no preconditions
// when
server = await startCallbackServer() server = await startCallbackServer()
// then
expect(server.port).toBeGreaterThanOrEqual(19877) expect(server.port).toBeGreaterThanOrEqual(19877)
expect(typeof server.waitForCallback).toBe("function") expect(typeof server.waitForCallback).toBe("function")
expect(typeof server.close).toBe("function") expect(typeof server.close).toBe("function")
}) })
it("resolves callback with code and state from query params", async () => { it("resolves callback with code and state from query params", async () => {
// given
server = await startCallbackServer() server = await startCallbackServer()
const callbackUrl = `http://${HOSTNAME}:${server.port}/oauth/callback?code=test-code&state=test-state` const callbackUrl = `http://127.0.0.1:${server.port}/oauth/callback?code=test-code&state=test-state`
// when
// Use Promise.all to ensure fetch and waitForCallback run concurrently
// This prevents race condition where waitForCallback blocks before fetch starts
const [result, response] = await Promise.all([ const [result, response] = await Promise.all([
server.waitForCallback(), server.waitForCallback(),
request(callbackUrl), nativeFetch(callbackUrl)
]) ])
// then
expect(result).toEqual({ code: "test-code", state: "test-state" }) expect(result).toEqual({ code: "test-code", state: "test-state" })
expect(response.status).toBe(200) expect(response.status).toBe(200)
const html = await response.text() const html = await response.text()
@@ -114,19 +46,25 @@ describe("startCallbackServer", () => {
}) })
it("returns 404 for non-callback routes", async () => { it("returns 404 for non-callback routes", async () => {
// given
server = await startCallbackServer() server = await startCallbackServer()
const response = await request(`http://${HOSTNAME}:${server.port}/other`) // when
const response = await nativeFetch(`http://127.0.0.1:${server.port}/other`)
// then
expect(response.status).toBe(404) expect(response.status).toBe(404)
}) })
it("returns 400 and rejects when code is missing", async () => { it("returns 400 and rejects when code is missing", async () => {
// given
server = await startCallbackServer() server = await startCallbackServer()
const callbackRejection = server.waitForCallback().catch((error: Error) => error) const callbackRejection = server.waitForCallback().catch((e: Error) => e)
const response = await request(`http://${HOSTNAME}:${server.port}/oauth/callback?state=s`) // when
const response = await nativeFetch(`http://127.0.0.1:${server.port}/oauth/callback?state=s`)
// then
expect(response.status).toBe(400) expect(response.status).toBe(400)
const error = await callbackRejection const error = await callbackRejection
expect(error).toBeInstanceOf(Error) expect(error).toBeInstanceOf(Error)
@@ -134,11 +72,14 @@ describe("startCallbackServer", () => {
}) })
it("returns 400 and rejects when state is missing", async () => { it("returns 400 and rejects when state is missing", async () => {
// given
server = await startCallbackServer() server = await startCallbackServer()
const callbackRejection = server.waitForCallback().catch((error: Error) => error) const callbackRejection = server.waitForCallback().catch((e: Error) => e)
const response = await request(`http://${HOSTNAME}:${server.port}/oauth/callback?code=c`) // when
const response = await nativeFetch(`http://127.0.0.1:${server.port}/oauth/callback?code=c`)
// then
expect(response.status).toBe(400) expect(response.status).toBe(400)
const error = await callbackRejection const error = await callbackRejection
expect(error).toBeInstanceOf(Error) expect(error).toBeInstanceOf(Error)
@@ -146,15 +87,18 @@ describe("startCallbackServer", () => {
}) })
it("close stops the server immediately", async () => { it("close stops the server immediately", async () => {
// given
server = await startCallbackServer() server = await startCallbackServer()
const port = server.port const port = server.port
// when
server.close() server.close()
server = null server = null
// then
try { try {
await request(`http://${HOSTNAME}:${port}/oauth/callback?code=c&state=s`) await nativeFetch(`http://127.0.0.1:${port}/oauth/callback?code=c&state=s`)
expect.unreachable("request should fail after close") expect(true).toBe(false)
} catch (error) { } catch (error) {
expect(error).toBeDefined() expect(error).toBeDefined()
} }

View File

@@ -39,7 +39,7 @@ export async function findAvailablePort(startPort: number = DEFAULT_PORT): Promi
} }
export async function startCallbackServer(startPort: number = DEFAULT_PORT): Promise<CallbackServer> { export async function startCallbackServer(startPort: number = DEFAULT_PORT): Promise<CallbackServer> {
const requestedPort = await findAvailablePort(startPort).catch(() => 0) const port = await findAvailablePort(startPort)
let resolveCallback: ((result: OAuthCallbackResult) => void) | null = null let resolveCallback: ((result: OAuthCallbackResult) => void) | null = null
let rejectCallback: ((error: Error) => void) | null = null let rejectCallback: ((error: Error) => void) | null = null
@@ -55,7 +55,7 @@ export async function startCallbackServer(startPort: number = DEFAULT_PORT): Pro
}, TIMEOUT_MS) }, TIMEOUT_MS)
const server = Bun.serve({ const server = Bun.serve({
port: requestedPort, port,
hostname: "127.0.0.1", hostname: "127.0.0.1",
fetch(request: Request): Response { fetch(request: Request): Response {
const url = new URL(request.url) const url = new URL(request.url)
@@ -93,10 +93,9 @@ export async function startCallbackServer(startPort: number = DEFAULT_PORT): Pro
}) })
}, },
}) })
const activePort = server.port ?? requestedPort
return { return {
port: activePort, port,
waitForCallback: () => callbackPromise, waitForCallback: () => callbackPromise,
close: () => { close: () => {
clearTimeout(timeoutId) clearTimeout(timeoutId)

View File

@@ -226,29 +226,6 @@ describe('TmuxSessionManager', () => {
// then // then
expect(manager).toBeDefined() expect(manager).toBeDefined()
}) })
test('falls back to default port when serverUrl has port 0', async () => {
// given
mockIsInsideTmux.mockReturnValue(true)
const { TmuxSessionManager } = await import('./manager')
const ctx = {
...createMockContext(),
serverUrl: new URL('http://127.0.0.1:0/'),
}
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
// when
const manager = new TmuxSessionManager(ctx, config, mockTmuxDeps)
// then
expect((manager as any).serverUrl).toBe('http://localhost:4096')
})
}) })
describe('onSessionCreated', () => { describe('onSessionCreated', () => {

View File

@@ -73,18 +73,10 @@ export class TmuxSessionManager {
this.tmuxConfig = tmuxConfig this.tmuxConfig = tmuxConfig
this.deps = deps this.deps = deps
const defaultPort = process.env.OPENCODE_PORT ?? "4096" const defaultPort = process.env.OPENCODE_PORT ?? "4096"
const fallbackUrl = `http://localhost:${defaultPort}`
try { try {
const raw = ctx.serverUrl?.toString() this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
if (raw) {
const parsed = new URL(raw)
const port = parsed.port || (parsed.protocol === 'https:' ? '443' : '80')
this.serverUrl = port === '0' ? fallbackUrl : raw
} else {
this.serverUrl = fallbackUrl
}
} catch { } catch {
this.serverUrl = fallbackUrl this.serverUrl = `http://localhost:${defaultPort}`
} }
this.sourcePaneId = deps.getCurrentPaneId() this.sourcePaneId = deps.getCurrentPaneId()
this.pollingManager = new TmuxPollingManager( this.pollingManager = new TmuxPollingManager(

View File

@@ -1,10 +1,10 @@
# src/hooks/ — 48 Lifecycle Hooks # src/hooks/ — 46 Lifecycle Hooks
**Generated:** 2026-03-06 **Generated:** 2026-03-06
## OVERVIEW ## OVERVIEW
48 hooks across dedicated modules and standalone files. Three-tier composition: Core(39) + Continuation(7) + Skill(2). All hooks follow `createXXXHook(deps) → HookFunction` factory pattern. 46 hooks across 45 directories + 11 standalone files. Three-tier composition: Core(37) + Continuation(7) + Skill(2). All hooks follow `createXXXHook(deps) → HookFunction` factory pattern.
## HOOK TIERS ## HOOK TIERS
@@ -85,7 +85,7 @@ hooks/
| noHephaestusNonGpt | chat.message | Block Hephaestus from using non-GPT models | | noHephaestusNonGpt | chat.message | Block Hephaestus from using non-GPT models |
| runtimeFallback | event | Auto-switch models on API provider errors | | runtimeFallback | event | Auto-switch models on API provider errors |
### Tier 2: Tool Guard Hooks (12) — `create-tool-guard-hooks.ts` ### Tier 2: Tool Guard Hooks (10) — `create-tool-guard-hooks.ts`
| Hook | Event | Purpose | | Hook | Event | Purpose |
|------|-------|---------| |------|-------|---------|

View File

@@ -70,7 +70,7 @@ function isTokenLimitError(text: string): boolean {
return false return false
} }
const lower = text.toLowerCase() const lower = text.toLowerCase()
return TOKEN_LIMIT_KEYWORDS.some((kw) => lower.includes(kw)) return TOKEN_LIMIT_KEYWORDS.some((kw) => lower.includes(kw.toLowerCase()))
} }
export function parseAnthropicTokenLimitError(err: unknown): ParsedTokenLimitError | null { export function parseAnthropicTokenLimitError(err: unknown): ParsedTokenLimitError | null {

View File

@@ -98,9 +98,9 @@ describe("runSummarizeRetryStrategy", () => {
}) as typeof setTimeout }) as typeof setTimeout
autoCompactState.retryStateBySession.set(sessionID, { autoCompactState.retryStateBySession.set(sessionID, {
attempt: 0, attempt: 1,
lastAttemptTime: Date.now(), lastAttemptTime: Date.now(),
firstAttemptTime: Date.now() - 119900, firstAttemptTime: Date.now() - 119700,
}) })
summarizeMock.mockRejectedValueOnce(new Error("rate limited")) summarizeMock.mockRejectedValueOnce(new Error("rate limited"))
@@ -117,6 +117,6 @@ describe("runSummarizeRetryStrategy", () => {
//#then //#then
expect(timeoutCalls.length).toBe(1) expect(timeoutCalls.length).toBe(1)
expect(timeoutCalls[0]!.delay).toBeGreaterThan(0) expect(timeoutCalls[0]!.delay).toBeGreaterThan(0)
expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(300) expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(500)
}) })
}) })

View File

@@ -114,7 +114,6 @@ export async function runSummarizeRetryStrategy(params: {
body: summarizeBody as never, body: summarizeBody as never,
query: { directory: params.directory }, query: { directory: params.directory },
}) })
clearSessionState(params.autoCompactState, params.sessionID)
return return
} catch { } catch {
const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime) const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime)

View File

@@ -1,6 +1,6 @@
import { log, normalizeModelID } from "../../shared" import { log, normalizeModelID } from "../../shared"
const OPUS_PATTERN = /claude-opus/i const OPUS_4_6_PATTERN = /claude-opus-4[-.]6/i
function isClaudeProvider(providerID: string, modelID: string): boolean { function isClaudeProvider(providerID: string, modelID: string): boolean {
if (["anthropic", "google-vertex-anthropic", "opencode"].includes(providerID)) return true if (["anthropic", "google-vertex-anthropic", "opencode"].includes(providerID)) return true
@@ -8,9 +8,9 @@ function isClaudeProvider(providerID: string, modelID: string): boolean {
return false return false
} }
function isOpusModel(modelID: string): boolean { function isOpus46(modelID: string): boolean {
const normalized = normalizeModelID(modelID) const normalized = normalizeModelID(modelID)
return OPUS_PATTERN.test(normalized) return OPUS_4_6_PATTERN.test(normalized)
} }
interface ChatParamsInput { interface ChatParamsInput {
@@ -28,20 +28,6 @@ interface ChatParamsOutput {
options: Record<string, unknown> options: Record<string, unknown>
} }
/**
* Valid thinking budget levels per model tier.
* Opus supports "max"; all other Claude models cap at "high".
*/
const MAX_VARIANT_BY_TIER: Record<string, string> = {
opus: "max",
default: "high",
}
function clampVariant(variant: string, isOpus: boolean): string {
if (variant !== "max") return variant
return isOpus ? MAX_VARIANT_BY_TIER.opus : MAX_VARIANT_BY_TIER.default
}
export function createAnthropicEffortHook() { export function createAnthropicEffortHook() {
return { return {
"chat.params": async ( "chat.params": async (
@@ -52,27 +38,15 @@ export function createAnthropicEffortHook() {
if (!model?.modelID || !model?.providerID) return if (!model?.modelID || !model?.providerID) return
if (message.variant !== "max") return if (message.variant !== "max") return
if (!isClaudeProvider(model.providerID, model.modelID)) return if (!isClaudeProvider(model.providerID, model.modelID)) return
if (!isOpus46(model.modelID)) return
if (output.options.effort !== undefined) return if (output.options.effort !== undefined) return
const opus = isOpusModel(model.modelID) output.options.effort = "max"
const clamped = clampVariant(message.variant, opus) log("anthropic-effort: injected effort=max", {
output.options.effort = clamped sessionID: input.sessionID,
provider: model.providerID,
if (!opus) { model: model.modelID,
// Override the variant so OpenCode doesn't pass "max" to the API })
;(message as { variant?: string }).variant = clamped
log("anthropic-effort: clamped variant max→high for non-Opus model", {
sessionID: input.sessionID,
provider: model.providerID,
model: model.modelID,
})
} else {
log("anthropic-effort: injected effort=max", {
sessionID: input.sessionID,
provider: model.providerID,
model: model.modelID,
})
}
}, },
} }
} }

View File

@@ -116,21 +116,6 @@ describe("createAnthropicEffortHook", () => {
//#then should normalize and inject effort //#then should normalize and inject effort
expect(output.options.effort).toBe("max") expect(output.options.effort).toBe("max")
}) })
it("should preserve max for other opus model IDs such as opus-4-5", async () => {
//#given another opus model id that is not 4.6
const hook = createAnthropicEffortHook()
const { input, output } = createMockParams({
modelID: "claude-opus-4-5",
})
//#when chat.params hook is called
await hook["chat.params"](input, output)
//#then max should still be treated as valid for opus family
expect(output.options.effort).toBe("max")
expect(input.message.variant).toBe("max")
})
}) })
describe("conditions NOT met - should skip", () => { describe("conditions NOT met - should skip", () => {
@@ -158,8 +143,8 @@ describe("createAnthropicEffortHook", () => {
expect(output.options.effort).toBeUndefined() expect(output.options.effort).toBeUndefined()
}) })
it("should clamp effort to high for non-opus claude model with variant max", async () => { it("should NOT inject effort for non-opus model", async () => {
//#given claude-sonnet-4-6 (not opus) with variant max //#given claude-sonnet-4-6 (not opus)
const hook = createAnthropicEffortHook() const hook = createAnthropicEffortHook()
const { input, output } = createMockParams({ const { input, output } = createMockParams({
modelID: "claude-sonnet-4-6", modelID: "claude-sonnet-4-6",
@@ -168,9 +153,8 @@ describe("createAnthropicEffortHook", () => {
//#when chat.params hook is called //#when chat.params hook is called
await hook["chat.params"](input, output) await hook["chat.params"](input, output)
//#then effort should be clamped to high (not max) //#then effort should NOT be injected
expect(output.options.effort).toBe("high") expect(output.options.effort).toBeUndefined()
expect(input.message.variant).toBe("high")
}) })
it("should NOT inject effort for non-anthropic provider with non-claude model", async () => { it("should NOT inject effort for non-anthropic provider with non-claude model", async () => {

View File

@@ -2,12 +2,11 @@ import type { PluginInput } from "@opencode-ai/plugin"
import { createAtlasEventHandler } from "./event-handler" import { createAtlasEventHandler } from "./event-handler"
import { createToolExecuteAfterHandler } from "./tool-execute-after" import { createToolExecuteAfterHandler } from "./tool-execute-after"
import { createToolExecuteBeforeHandler } from "./tool-execute-before" import { createToolExecuteBeforeHandler } from "./tool-execute-before"
import type { AtlasHookOptions, PendingTaskRef, SessionState } from "./types" import type { AtlasHookOptions, SessionState } from "./types"
export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) { export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
const sessions = new Map<string, SessionState>() const sessions = new Map<string, SessionState>()
const pendingFilePaths = new Map<string, string>() const pendingFilePaths = new Map<string, string>()
const pendingTaskRefs = new Map<string, PendingTaskRef>()
const autoCommit = options?.autoCommit ?? true const autoCommit = options?.autoCommit ?? true
function getState(sessionID: string): SessionState { function getState(sessionID: string): SessionState {
@@ -21,7 +20,7 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
return { return {
handler: createAtlasEventHandler({ ctx, options, sessions, getState }), handler: createAtlasEventHandler({ ctx, options, sessions, getState }),
"tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths, pendingTaskRefs }), "tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }),
"tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState }), "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit, getState }),
} }
} }

View File

@@ -15,8 +15,6 @@ export async function injectBoulderContinuation(input: {
total: number total: number
agent?: string agent?: string
worktreePath?: string worktreePath?: string
preferredTaskSessionId?: string
preferredTaskTitle?: string
backgroundManager?: BackgroundManager backgroundManager?: BackgroundManager
sessionState: SessionState sessionState: SessionState
}): Promise<void> { }): Promise<void> {
@@ -28,8 +26,6 @@ export async function injectBoulderContinuation(input: {
total, total,
agent, agent,
worktreePath, worktreePath,
preferredTaskSessionId,
preferredTaskTitle,
backgroundManager, backgroundManager,
sessionState, sessionState,
} = input } = input
@@ -44,13 +40,9 @@ export async function injectBoulderContinuation(input: {
} }
const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : "" const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
const preferredSessionContext = preferredTaskSessionId
? `\n\n[Preferred reuse session for current top-level plan task${preferredTaskTitle ? `: ${preferredTaskTitle}` : ""}: ${preferredTaskSessionId}]`
: ""
const prompt = const prompt =
BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) + BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` + `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
preferredSessionContext +
worktreeContext worktreeContext
try { try {

View File

@@ -4,7 +4,7 @@ import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
import { tmpdir } from "node:os" import { tmpdir } from "node:os"
import { join } from "node:path" import { join } from "node:path"
import { createOpencodeClient } from "@opencode-ai/sdk" import { createOpencodeClient } from "@opencode-ai/sdk"
import type { AssistantMessage, Session } from "@opencode-ai/sdk" import type { AssistantMessage } from "@opencode-ai/sdk"
import type { BoulderState } from "../../features/boulder-state" import type { BoulderState } from "../../features/boulder-state"
import { clearBoulderState, writeBoulderState } from "../../features/boulder-state" import { clearBoulderState, writeBoulderState } from "../../features/boulder-state"
@@ -52,23 +52,6 @@ describe("Atlas final-wave approval gate regressions", () => {
response: new Response(), response: new Response(),
})) }))
Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => {
const parentID = path.id === "ses_nested_scope_review"
? "atlas-nested-final-wave-session"
: path.id.startsWith("ses_parallel_review_")
? "atlas-parallel-final-wave-session"
: "main-session-123"
return {
data: {
id: path.id,
parentID,
} as Session,
request: new Request(`http://localhost/session/${path.id}`),
response: new Response(),
}
})
return { return {
directory: testDirectory, directory: testDirectory,
project: {} as AtlasHookContext["project"], project: {} as AtlasHookContext["project"],

View File

@@ -60,18 +60,10 @@ describe("Atlas final verification approval gate", () => {
} }
}) })
Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => { Reflect.set(client.session, "get", async () => {
const parentID = path.id === "ses_final_wave_review"
? "atlas-final-wave-session"
: path.id === "ses_feature_task"
? "atlas-non-final-session"
: "main-session-123"
return { return {
data: { data: { parentID: "main-session-123" } as Session,
id: path.id, request: new Request("http://localhost/session/main-session-123"),
parentID,
} as Session,
request: new Request(`http://localhost/session/${path.id}`),
response: new Response(), response: new Response(),
} }
}) })

View File

@@ -1,10 +1,5 @@
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import { import { getPlanProgress, readBoulderState } from "../../features/boulder-state"
getPlanProgress,
getTaskSessionState,
readBoulderState,
readCurrentTopLevelTask,
} from "../../features/boulder-state"
import { log } from "../../shared/logger" import { log } from "../../shared/logger"
import { injectBoulderContinuation } from "./boulder-continuation-injector" import { injectBoulderContinuation } from "./boulder-continuation-injector"
import { HOOK_NAME } from "./hook-name" import { HOOK_NAME } from "./hook-name"
@@ -13,7 +8,6 @@ import type { AtlasHookOptions, SessionState } from "./types"
const CONTINUATION_COOLDOWN_MS = 5000 const CONTINUATION_COOLDOWN_MS = 5000
const FAILURE_BACKOFF_MS = 5 * 60 * 1000 const FAILURE_BACKOFF_MS = 5 * 60 * 1000
const MAX_CONSECUTIVE_PROMPT_FAILURES = 10
const RETRY_DELAY_MS = CONTINUATION_COOLDOWN_MS + 1000 const RETRY_DELAY_MS = CONTINUATION_COOLDOWN_MS + 1000
function hasRunningBackgroundTasks(sessionID: string, options?: AtlasHookOptions): boolean { function hasRunningBackgroundTasks(sessionID: string, options?: AtlasHookOptions): boolean {
@@ -37,14 +31,6 @@ async function injectContinuation(input: {
input.sessionState.lastContinuationInjectedAt = Date.now() input.sessionState.lastContinuationInjectedAt = Date.now()
try { try {
const currentBoulder = readBoulderState(input.ctx.directory)
const currentTask = currentBoulder
? readCurrentTopLevelTask(currentBoulder.active_plan)
: null
const preferredTaskSession = currentTask
? getTaskSessionState(input.ctx.directory, currentTask.key)
: null
await injectBoulderContinuation({ await injectBoulderContinuation({
ctx: input.ctx, ctx: input.ctx,
sessionID: input.sessionID, sessionID: input.sessionID,
@@ -53,8 +39,6 @@ async function injectContinuation(input: {
total: input.progress.total, total: input.progress.total,
agent: input.agent, agent: input.agent,
worktreePath: input.worktreePath, worktreePath: input.worktreePath,
preferredTaskSessionId: preferredTaskSession?.session_id,
preferredTaskTitle: preferredTaskSession?.task_title,
backgroundManager: input.options?.backgroundManager, backgroundManager: input.options?.backgroundManager,
sessionState: input.sessionState, sessionState: input.sessionState,
}) })
@@ -78,7 +62,7 @@ function scheduleRetry(input: {
sessionState.pendingRetryTimer = setTimeout(async () => { sessionState.pendingRetryTimer = setTimeout(async () => {
sessionState.pendingRetryTimer = undefined sessionState.pendingRetryTimer = undefined
if (sessionState.promptFailureCount >= MAX_CONSECUTIVE_PROMPT_FAILURES) return if (sessionState.promptFailureCount >= 2) return
if (sessionState.waitingForFinalWaveApproval) return if (sessionState.waitingForFinalWaveApproval) return
const currentBoulder = readBoulderState(ctx.directory) const currentBoulder = readBoulderState(ctx.directory)
@@ -88,6 +72,7 @@ function scheduleRetry(input: {
const currentProgress = getPlanProgress(currentBoulder.active_plan) const currentProgress = getPlanProgress(currentBoulder.active_plan)
if (currentProgress.isComplete) return if (currentProgress.isComplete) return
if (options?.isContinuationStopped?.(sessionID)) return if (options?.isContinuationStopped?.(sessionID)) return
if (options?.shouldSkipContinuation?.(sessionID)) return
if (hasRunningBackgroundTasks(sessionID, options)) return if (hasRunningBackgroundTasks(sessionID, options)) return
await injectContinuation({ await injectContinuation({
@@ -150,7 +135,7 @@ export async function handleAtlasSessionIdle(input: {
return return
} }
if (sessionState.promptFailureCount >= MAX_CONSECUTIVE_PROMPT_FAILURES) { if (sessionState.promptFailureCount >= 2) {
const timeSinceLastFailure = const timeSinceLastFailure =
sessionState.lastFailureAt !== undefined ? now - sessionState.lastFailureAt : Number.POSITIVE_INFINITY sessionState.lastFailureAt !== undefined ? now - sessionState.lastFailureAt : Number.POSITIVE_INFINITY
if (timeSinceLastFailure < FAILURE_BACKOFF_MS) { if (timeSinceLastFailure < FAILURE_BACKOFF_MS) {
@@ -176,6 +161,11 @@ export async function handleAtlasSessionIdle(input: {
return return
} }
if (options?.shouldSkipContinuation?.(sessionID)) {
log(`[${HOOK_NAME}] Skipped: another continuation hook already injected`, { sessionID })
return
}
if (sessionState.lastContinuationInjectedAt && now - sessionState.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) { if (sessionState.lastContinuationInjectedAt && now - sessionState.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) {
scheduleRetry({ ctx, sessionID, sessionState, options }) scheduleRetry({ ctx, sessionID, sessionState, options })
log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, { log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, {

View File

@@ -10,7 +10,6 @@ import {
} from "../../features/boulder-state" } from "../../features/boulder-state"
import type { BoulderState } from "../../features/boulder-state" import type { BoulderState } from "../../features/boulder-state"
import { _resetForTesting, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state" import { _resetForTesting, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state"
import type { PendingTaskRef } from "./types"
const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`) const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`)
const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message") const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message")
@@ -34,40 +33,25 @@ mock.module("../../shared/opencode-storage-detection", () => ({
})) }))
const { createAtlasHook } = await import("./index") const { createAtlasHook } = await import("./index")
const { createToolExecuteAfterHandler } = await import("./tool-execute-after")
const { createToolExecuteBeforeHandler } = await import("./tool-execute-before")
const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector") const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector")
describe("atlas hook", () => { describe("atlas hook", () => {
let TEST_DIR: string let TEST_DIR: string
let SISYPHUS_DIR: string let SISYPHUS_DIR: string
function createMockPluginInput(overrides?: { function createMockPluginInput(overrides?: { promptMock?: ReturnType<typeof mock> }) {
promptMock?: ReturnType<typeof mock>
sessionGetMock?: ReturnType<typeof mock>
}) {
const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve()) const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve())
const sessionGetMock = overrides?.sessionGetMock ?? mock(async ({ path }: { path: { id: string } }) => ({
data: {
id: path.id,
parentID: path.id.startsWith("ses_") ? "session-1" : "main-session-123",
},
}))
return { return {
directory: TEST_DIR, directory: TEST_DIR,
client: { client: {
session: { session: {
get: sessionGetMock, get: async () => ({ data: { parentID: "main-session-123" } }),
prompt: promptMock, prompt: promptMock,
promptAsync: promptMock, promptAsync: promptMock,
}, },
}, },
_promptMock: promptMock, _promptMock: promptMock,
_sessionGetMock: sessionGetMock, } as unknown as Parameters<typeof createAtlasHook>[0] & { _promptMock: ReturnType<typeof mock> }
} as unknown as Parameters<typeof createAtlasHook>[0] & {
_promptMock: ReturnType<typeof mock>
_sessionGetMock: ReturnType<typeof mock>
}
} }
function setupMessageStorage(sessionID: string, agent: string): void { function setupMessageStorage(sessionID: string, agent: string): void {
@@ -420,417 +404,12 @@ describe("atlas hook", () => {
// then - should include verification instructions // then - should include verification instructions
expect(output.output).toContain("LYING") expect(output.output).toContain("LYING")
expect(output.output).toContain("PHASE 1") expect(output.output).toContain("PHASE 1")
expect(output.output).toContain("PHASE 2") expect(output.output).toContain("PHASE 2")
cleanupMessageStorage(sessionID) cleanupMessageStorage(sessionID)
}) })
test("should clean pending task refs when a task returns background launch output", async () => {
// given - direct handlers with shared pending maps
const sessionID = "session-bg-launch-cleanup-test"
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "background-cleanup-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [ ] 1. Implement auth flow
`)
writeBoulderState(TEST_DIR, {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "background-cleanup-plan",
})
const pendingFilePaths = new Map<string, string>()
const pendingTaskRefs = new Map<string, PendingTaskRef>()
const beforeHandler = createToolExecuteBeforeHandler({
ctx: createMockPluginInput(),
pendingFilePaths,
pendingTaskRefs,
})
const afterHandler = createToolExecuteAfterHandler({
ctx: createMockPluginInput(),
pendingFilePaths,
pendingTaskRefs,
autoCommit: true,
getState: () => ({ promptFailureCount: 0 }),
})
// when - the task is captured before execution
await beforeHandler(
{ tool: "task", sessionID, callID: "call-bg-launch" },
{ args: { prompt: "Implement auth flow" } }
)
expect(pendingTaskRefs.size).toBe(1)
// and the task returns a background launch result
await afterHandler(
{ tool: "task", sessionID, callID: "call-bg-launch" },
{
title: "Sisyphus Task",
output: "Background task launched.\n\nSession ID: ses_bg_12345",
metadata: {},
}
)
// then - the pending task ref is still cleaned up
expect(pendingTaskRefs.size).toBe(0)
cleanupMessageStorage(sessionID)
})
test("should persist preferred subagent session for the current top-level task", async () => {
// given - boulder state with a current top-level task, Atlas caller
const sessionID = "session-task-session-track-test"
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "task-session-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [ ] 1. Implement auth flow
- [ ] nested acceptance checkbox
`)
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "task-session-plan",
}
writeBoulderState(TEST_DIR, state)
const hook = createAtlasHook(createMockPluginInput())
const output = {
title: "Sisyphus Task",
output: `Task completed successfully
<task_metadata>
session_id: ses_auth_flow_123
</task_metadata>`,
metadata: {
agent: "sisyphus-junior",
category: "deep",
},
}
// when
await hook["tool.execute.after"](
{ tool: "task", sessionID },
output
)
// then
const updatedState = readBoulderState(TEST_DIR)
expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
expect(updatedState?.task_sessions?.["todo:1"]?.task_title).toBe("Implement auth flow")
expect(updatedState?.task_sessions?.["todo:1"]?.agent).toBe("sisyphus-junior")
expect(updatedState?.task_sessions?.["todo:1"]?.category).toBe("deep")
cleanupMessageStorage(sessionID)
})
test("should preserve the delegated task key even after the plan advances to the next task", async () => {
// given - Atlas caller starts task 1, then the plan advances before task output is processed
const sessionID = "session-stable-task-key-test"
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "stable-task-key-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [ ] 1. Implement auth flow
- [ ] 2. Add API validation
`)
writeBoulderState(TEST_DIR, {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "stable-task-key-plan",
})
const hook = createAtlasHook(createMockPluginInput())
// when - Atlas delegates task 1
await hook["tool.execute.before"](
{ tool: "task", sessionID, callID: "call-task-1" },
{ args: { prompt: "Implement auth flow" } }
)
// and the plan is advanced before the task output is processed
writeFileSync(planPath, `# Plan
## TODOs
- [x] 1. Implement auth flow
- [ ] 2. Add API validation
`)
await hook["tool.execute.after"](
{ tool: "task", sessionID, callID: "call-task-1" },
{
title: "Sisyphus Task",
output: `Task completed successfully
<task_metadata>
session_id: ses_auth_flow_123
</task_metadata>`,
metadata: {
agent: "sisyphus-junior",
category: "deep",
},
}
)
// then - the completed task session is still recorded against task 1, not task 2
const updatedState = readBoulderState(TEST_DIR)
expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()
cleanupMessageStorage(sessionID)
})
test("should not overwrite the current task mapping when task() explicitly resumes an older session", async () => {
// given - current plan is on task 2, but Atlas explicitly resumes an older session for a previous task
const sessionID = "session-cross-task-resume-test"
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "cross-task-resume-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [x] 1. Implement auth flow
- [ ] 2. Add API validation
`)
writeBoulderState(TEST_DIR, {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "cross-task-resume-plan",
})
const hook = createAtlasHook(createMockPluginInput())
// when - Atlas resumes an explicit prior session
await hook["tool.execute.before"](
{ tool: "task", sessionID, callID: "call-resume-old-task" },
{ args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
)
const output = {
title: "Sisyphus Task",
output: `Task continued successfully
<task_metadata>
session_id: ses_old_task_111
</task_metadata>`,
metadata: {
agent: "sisyphus-junior",
category: "deep",
},
}
await hook["tool.execute.after"](
{ tool: "task", sessionID, callID: "call-resume-old-task" },
output
)
// then - Atlas does not poison task 2's preferred session mapping
const updatedState = readBoulderState(TEST_DIR)
expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()
expect(output.output).not.toContain('task(session_id="ses_old_task_111"')
cleanupMessageStorage(sessionID)
})
test("should not reuse an explicitly resumed session id in completion reminders", async () => {
// given - current plan is on task 2 with an existing tracked session
const sessionID = "session-explicit-resume-reminder-test"
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "explicit-resume-reminder-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [x] 1. Implement auth flow
- [ ] 2. Add API validation
`)
writeBoulderState(TEST_DIR, {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "explicit-resume-reminder-plan",
task_sessions: {
"todo:2": {
task_key: "todo:2",
task_label: "2",
task_title: "Add API validation",
session_id: "ses_tracked_current_task",
updated_at: "2026-01-02T10:00:00Z",
},
},
})
const hook = createAtlasHook(createMockPluginInput())
const output = {
title: "Sisyphus Task",
output: `Task continued successfully
<task_metadata>
session_id: ses_old_task_111
</task_metadata>`,
metadata: {},
}
// when
await hook["tool.execute.before"](
{ tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
{ args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
)
await hook["tool.execute.after"](
{ tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
output
)
// then
expect(output.output).not.toContain('task(session_id="ses_old_task_111"')
expect(output.output).toContain("ses_tracked_current_task")
cleanupMessageStorage(sessionID)
})
test("should skip persistence when multiple in-flight task calls claim the same top-level task", async () => {
// given
const sessionID = "session-parallel-task-collision-test"
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "parallel-task-collision-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [ ] 1. Implement auth flow
- [ ] 2. Add API validation
`)
writeBoulderState(TEST_DIR, {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "parallel-task-collision-plan",
})
const pendingFilePaths = new Map<string, string>()
const pendingTaskRefs = new Map<string, PendingTaskRef>()
const beforeHandler = createToolExecuteBeforeHandler({
ctx: createMockPluginInput(),
pendingFilePaths,
pendingTaskRefs,
})
const afterHandler = createToolExecuteAfterHandler({
ctx: createMockPluginInput(),
pendingFilePaths,
pendingTaskRefs,
autoCommit: true,
getState: () => ({ promptFailureCount: 0 }),
})
// when - two task() calls start before either one completes
await beforeHandler(
{ tool: "task", sessionID, callID: "call-task-first" },
{ args: { prompt: "Implement auth flow part 1" } }
)
await beforeHandler(
{ tool: "task", sessionID, callID: "call-task-second" },
{ args: { prompt: "Implement auth flow part 2" } }
)
const secondPendingTaskRef = pendingTaskRefs.get("call-task-second")
await afterHandler(
{ tool: "task", sessionID, callID: "call-task-second" },
{
title: "Sisyphus Task",
output: `Task completed successfully
<task_metadata>
session_id: ses_parallel_collision_222
</task_metadata>`,
metadata: {},
}
)
// then
expect(secondPendingTaskRef).toEqual({
kind: "skip",
reason: "ambiguous_task_key",
task: {
key: "todo:1",
label: "1",
title: "Implement auth flow",
},
})
const updatedState = readBoulderState(TEST_DIR)
expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()
cleanupMessageStorage(sessionID)
})
test("should ignore extracted session ids that are outside the active boulder lineage", async () => {
// given
const sessionID = "session-untrusted-session-id-test"
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "untrusted-session-id-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [ ] 1. Implement auth flow
`)
writeBoulderState(TEST_DIR, {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: ["session-1"],
plan_name: "untrusted-session-id-plan",
})
const hook = createAtlasHook(createMockPluginInput({
sessionGetMock: mock(async ({ path }: { path: { id: string } }) => ({
data: {
id: path.id,
parentID: path.id === "ses_untrusted_999" ? "session-outside-lineage" : "main-session-123",
},
})),
}))
const output = {
title: "Sisyphus Task",
output: `Task completed successfully
<task_metadata>
session_id: ses_untrusted_999
</task_metadata>`,
metadata: {},
}
// when
await hook["tool.execute.after"](
{ tool: "task", sessionID },
output
)
// then
const updatedState = readBoulderState(TEST_DIR)
expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()
expect(output.output).not.toContain('task(session_id="ses_untrusted_999"')
expect(output.output).toContain('task(session_id="<session_id>"')
cleanupMessageStorage(sessionID)
})
describe("completion gate output ordering", () => { describe("completion gate output ordering", () => {
const COMPLETION_GATE_SESSION = "completion-gate-order-test" const COMPLETION_GATE_SESSION = "completion-gate-order-test"
@@ -1464,6 +1043,37 @@ session_id: ses_untrusted_999
expect(mockInput._promptMock).not.toHaveBeenCalled() expect(mockInput._promptMock).not.toHaveBeenCalled()
}) })
test("should skip when another continuation hook already injected", async () => {
// given - boulder state with incomplete plan
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const mockInput = createMockPluginInput()
const hook = createAtlasHook(mockInput, {
directory: TEST_DIR,
shouldSkipContinuation: (sessionID: string) => sessionID === MAIN_SESSION_ID,
})
// when
await hook.handler({
event: {
type: "session.idle",
properties: { sessionID: MAIN_SESSION_ID },
},
})
// then - should not call prompt because another continuation already handled it
expect(mockInput._promptMock).not.toHaveBeenCalled()
})
test("should clear abort state on message.updated", async () => { test("should clear abort state on message.updated", async () => {
// given - boulder with incomplete plan // given - boulder with incomplete plan
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")
@@ -1537,48 +1147,6 @@ session_id: ses_untrusted_999
expect(callArgs.body.parts[0].text).toContain("2 remaining") expect(callArgs.body.parts[0].text).toContain("2 remaining")
}) })
test("should include preferred reuse session in continuation prompt for current top-level task", async () => {
// given - boulder state with tracked preferred session
const planPath = join(TEST_DIR, "preferred-session-plan.md")
writeFileSync(planPath, `# Plan
## TODOs
- [ ] 1. Implement auth flow
`)
writeBoulderState(TEST_DIR, {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "preferred-session-plan",
task_sessions: {
"todo:1": {
task_key: "todo:1",
task_label: "1",
task_title: "Implement auth flow",
session_id: "ses_auth_flow_123",
updated_at: "2026-01-02T10:00:00Z",
},
},
})
const mockInput = createMockPluginInput()
const hook = createAtlasHook(mockInput)
// when
await hook.handler({
event: {
type: "session.idle",
properties: { sessionID: MAIN_SESSION_ID },
},
})
// then
const callArgs = mockInput._promptMock.mock.calls[0][0]
expect(callArgs.body.parts[0].text).toContain("Preferred reuse session for current top-level plan task")
expect(callArgs.body.parts[0].text).toContain("ses_auth_flow_123")
})
test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => { test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
// given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work) // given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")
@@ -1715,7 +1283,7 @@ session_id: ses_untrusted_999
expect(mockInput._promptMock).toHaveBeenCalledTimes(1) expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
}) })
test("should stop continuation after 10 consecutive prompt failures (issue #1355)", async () => { test("should stop continuation after 2 consecutive prompt failures (issue #1355)", async () => {
//#given - boulder state with incomplete plan and prompt always fails //#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
@@ -1728,7 +1296,7 @@ session_id: ses_untrusted_999
} }
writeBoulderState(TEST_DIR, state) writeBoulderState(TEST_DIR, state)
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request"))) const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock }) const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput) const hook = createAtlasHook(mockInput)
@@ -1738,23 +1306,25 @@ session_id: ses_untrusted_999
try { try {
//#when - idle fires repeatedly, past cooldown each time //#when - idle fires repeatedly, past cooldown each time
for (let i = 0; i < 10; i++) { await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await flushMicrotasks()
await flushMicrotasks() now += 6000
now += 6000
} await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
//#then - should attempt only 10 times, then disable continuation //#then - should attempt only twice, then disable continuation
expect(promptMock).toHaveBeenCalledTimes(10) expect(promptMock).toHaveBeenCalledTimes(2)
} finally { } finally {
Date.now = originalDateNow Date.now = originalDateNow
} }
}) })
test("should reset prompt failure counter on success and only stop after 10 consecutive failures", async () => { test("should reset prompt failure counter on success and only stop after 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan //#given - boulder state with incomplete plan
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
@@ -1767,9 +1337,11 @@ session_id: ses_untrusted_999
} }
writeBoulderState(TEST_DIR, state) writeBoulderState(TEST_DIR, state)
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request"))) const promptMock = mock(() => Promise.resolve())
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request"))) promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.resolve()) promptMock.mockImplementationOnce(() => Promise.resolve())
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock }) const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput) const hook = createAtlasHook(mockInput)
@@ -1779,21 +1351,21 @@ session_id: ses_untrusted_999
Date.now = () => now Date.now = () => now
try { try {
//#when - fail, succeed (reset), then fail 10 times (disable), then attempt again //#when - fail, succeed (reset), then fail twice (disable), then attempt again
for (let i = 0; i < 13; i++) { for (let i = 0; i < 5; i++) {
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
now += 6000 now += 6000
} }
//#then - 12 prompt attempts; 13th idle is skipped after 10 consecutive failures //#then - 4 prompt attempts; 5th idle is skipped after 2 consecutive failures
expect(promptMock).toHaveBeenCalledTimes(12) expect(promptMock).toHaveBeenCalledTimes(4)
} finally { } finally {
Date.now = originalDateNow Date.now = originalDateNow
} }
}) })
test("should keep skipping continuation during 5-minute backoff after 10 consecutive failures", async () => { test("should keep skipping continuation during 5-minute backoff after 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan and prompt always fails //#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
@@ -1815,26 +1387,26 @@ session_id: ses_untrusted_999
Date.now = () => now Date.now = () => now
try { try {
//#when - 11th idle occurs inside 5-minute backoff window //#when - third idle occurs inside 5-minute backoff window
for (let i = 0; i < 10; i++) { await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await flushMicrotasks()
await flushMicrotasks() now += 6000
now += 6000
}
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 60000 now += 60000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
//#then - 11th attempt should still be skipped //#then - third attempt should still be skipped
expect(promptMock).toHaveBeenCalledTimes(10) expect(promptMock).toHaveBeenCalledTimes(2)
} finally { } finally {
Date.now = originalDateNow Date.now = originalDateNow
} }
}) })
test("should retry continuation after 5-minute backoff expires following 10 consecutive failures", async () => { test("should retry continuation after 5-minute backoff expires following 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan and prompt always fails //#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
@@ -1856,20 +1428,20 @@ session_id: ses_untrusted_999
Date.now = () => now Date.now = () => now
try { try {
//#when - 11th idle occurs after 5+ minutes //#when - third idle occurs after 5+ minutes
for (let i = 0; i < 10; i++) { await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await flushMicrotasks()
await flushMicrotasks() now += 6000
now += 6000
}
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 300000 now += 300000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
//#then - 11th attempt should run after backoff expiration //#then - third attempt should run after backoff expiration
expect(promptMock).toHaveBeenCalledTimes(11) expect(promptMock).toHaveBeenCalledTimes(3)
} finally { } finally {
Date.now = originalDateNow Date.now = originalDateNow
} }
@@ -1889,9 +1461,8 @@ session_id: ses_untrusted_999
writeBoulderState(TEST_DIR, state) writeBoulderState(TEST_DIR, state)
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request"))) const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
for (let i = 0; i < 10; i++) { promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request"))) promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
}
promptMock.mockImplementationOnce(() => Promise.resolve(undefined)) promptMock.mockImplementationOnce(() => Promise.resolve(undefined))
const mockInput = createMockPluginInput({ promptMock }) const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput) const hook = createAtlasHook(mockInput)
@@ -1901,30 +1472,32 @@ session_id: ses_untrusted_999
Date.now = () => now Date.now = () => now
try { try {
//#when - fail 10 times, recover after backoff with success, then fail 10 times again //#when - fail twice, recover after backoff with success, then fail twice again
for (let i = 0; i < 10; i++) { await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await flushMicrotasks()
await flushMicrotasks() now += 6000
now += 6000
}
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 300000 now += 300000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
now += 6000 now += 6000
for (let i = 0; i < 10; i++) { await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await flushMicrotasks()
await flushMicrotasks() now += 6000
now += 6000
} await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
//#then - success retry resets counter, so 10 additional failures are allowed before skip //#then - success retry resets counter, so two additional failures are allowed before skip
expect(promptMock).toHaveBeenCalledTimes(21) expect(promptMock).toHaveBeenCalledTimes(5)
} finally { } finally {
Date.now = originalDateNow Date.now = originalDateNow
} }
@@ -1952,12 +1525,14 @@ session_id: ses_untrusted_999
Date.now = () => now Date.now = () => now
try { try {
//#when - 10 failures disable continuation, then compaction resets it //#when - two failures disables continuation, then compaction resets it
for (let i = 0; i < 10; i++) { await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await flushMicrotasks()
await flushMicrotasks() now += 6000
now += 6000
} await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
@@ -1968,8 +1543,8 @@ session_id: ses_untrusted_999
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } }) await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks() await flushMicrotasks()
//#then - 10 attempts + 1 after compaction (11 total) //#then - 2 attempts + 1 after compaction (3 total)
expect(promptMock).toHaveBeenCalledTimes(11) expect(promptMock).toHaveBeenCalledTimes(3)
} finally { } finally {
Date.now = originalDateNow Date.now = originalDateNow
} }

View File

@@ -18,9 +18,9 @@ function getLastAgentFromMessageDir(messageDir: string): string | null {
const files = readdirSync(messageDir) const files = readdirSync(messageDir)
.filter((fileName) => fileName.endsWith(".json")) .filter((fileName) => fileName.endsWith(".json"))
.sort() .sort()
.reverse()
for (let i = files.length - 1; i >= 0; i--) { for (const fileName of files) {
const fileName = files[i]
try { try {
const content = readFileSync(join(messageDir, fileName), "utf-8") const content = readFileSync(join(messageDir, fileName), "utf-8")
const parsed = JSON.parse(content) as { agent?: unknown } const parsed = JSON.parse(content) as { agent?: unknown }

View File

@@ -1,79 +0,0 @@
import { describe, expect, test } from "bun:test"
import { extractSessionIdFromOutput } from "./subagent-session-id"
describe("extractSessionIdFromOutput", () => {
test("extracts Session ID blocks from background output", () => {
// given
const output = `Background task launched.\n\nSession ID: ses_bg_12345`
// when
const result = extractSessionIdFromOutput(output)
// then
expect(result).toBe("ses_bg_12345")
})
test("extracts session_id from task metadata blocks", () => {
// given
const output = `Task completed.\n\n<task_metadata>\nsession_id: ses_sync_12345\n</task_metadata>`
// when
const result = extractSessionIdFromOutput(output)
// then
expect(result).toBe("ses_sync_12345")
})
test("extracts hyphenated session IDs from task metadata blocks", () => {
// given
const output = `Task completed.\n\n<task_metadata>\nsession_id: ses_auth-flow-123\n</task_metadata>`
// when
const result = extractSessionIdFromOutput(output)
// then
expect(result).toBe("ses_auth-flow-123")
})
test("returns undefined when no session id is present", () => {
// given
const output = "Task completed without metadata"
// when
const result = extractSessionIdFromOutput(output)
// then
expect(result).toBeUndefined()
})
test("prefers the session id inside the trailing task_metadata block", () => {
// given
const output = `The previous attempt mentioned session_id: ses_wrong_body_123 but that was only context.
<task_metadata>
session_id: ses_real_metadata_456
</task_metadata>`
// when
const result = extractSessionIdFromOutput(output)
// then
expect(result).toBe("ses_real_metadata_456")
})
test("does not let task_metadata parsing bleed into incidental body text after the closing tag", () => {
// given
const output = `<task_metadata>
session_id: ses_real_metadata_456
</task_metadata>
debug log: session_id: ses_wrong_body_789`
// when
const result = extractSessionIdFromOutput(output)
// then
expect(result).toBe("ses_real_metadata_456")
})
})

View File

@@ -1,44 +1,4 @@
import type { PluginInput } from "@opencode-ai/plugin" export function extractSessionIdFromOutput(output: string): string {
import { log } from "../../shared/logger" const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/)
import { isSessionInBoulderLineage } from "./boulder-session-lineage" return match?.[1] ?? "<session_id>"
import { HOOK_NAME } from "./hook-name"
export function extractSessionIdFromOutput(output: string): string | undefined {
const taskMetadataBlocks = [...output.matchAll(/<task_metadata>([\s\S]*?)<\/task_metadata>/gi)]
const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1]
if (lastTaskMetadataBlock) {
const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_-]+)/i)
if (taskMetadataSessionMatch) {
return taskMetadataSessionMatch[1]
}
}
const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_-]+)/g)]
return explicitSessionMatches.at(-1)?.[1]
}
export async function validateSubagentSessionId(input: {
client: PluginInput["client"]
sessionID?: string
lineageSessionIDs: string[]
}): Promise<string | undefined> {
if (!input.sessionID || input.lineageSessionIDs.length === 0) {
return undefined
}
const belongsToLineage = await isSessionInBoulderLineage({
client: input.client,
sessionID: input.sessionID,
boulderSessionIDs: input.lineageSessionIDs,
})
if (!belongsToLineage) {
log(`[${HOOK_NAME}] Ignoring extracted session id outside active lineage`, {
sessionID: input.sessionID,
lineageSessionIDs: input.lineageSessionIDs,
})
return undefined
}
return input.sessionID
} }

View File

@@ -218,31 +218,21 @@ ${createSystemDirective(SystemDirectiveTypes.SINGLE_TASK_ONLY)}
**STOP. READ THIS BEFORE PROCEEDING.** **STOP. READ THIS BEFORE PROCEEDING.**
If you were given **multiple genuinely independent goals** (unrelated tasks, parallel workstreams, separate features), you MUST: If you were NOT given **exactly ONE atomic task**, you MUST:
1. **IMMEDIATELY REFUSE** this request 1. **IMMEDIATELY REFUSE** this request
2. **DEMAND** the orchestrator provide a single goal 2. **DEMAND** the orchestrator provide a single, specific task
**What counts as multiple independent tasks (REFUSE):** **Your response if multiple tasks detected:**
- "Implement feature A. Also, add feature B." > "I refuse to proceed. You provided multiple tasks. An orchestrator's impatience destroys work quality.
- "Fix bug X. Then refactor module Y. Also update the docs."
- Multiple unrelated changes bundled into one request
**What is a single task with sequential steps (PROCEED):**
- A single goal broken into numbered steps (e.g., "Implement X by: 1. finding files, 2. adding logic, 3. writing tests")
- Multi-step context where all steps serve ONE objective
- Orchestrator-provided context explaining approach for a single deliverable
**Your response if genuinely independent tasks are detected:**
> "I refuse to proceed. You provided multiple independent tasks. Each task needs full attention.
> >
> PROVIDE EXACTLY ONE GOAL. One deliverable. One clear outcome. > PROVIDE EXACTLY ONE TASK. One file. One change. One verification.
> >
> Batching unrelated tasks causes: incomplete work, missed edge cases, broken tests, wasted context." > Your rushing will cause: incomplete work, missed edge cases, broken tests, wasted context."
**WARNING TO ORCHESTRATOR:** **WARNING TO ORCHESTRATOR:**
- Bundling unrelated tasks RUINS deliverables - Your hasty batching RUINS deliverables
- Each independent goal needs FULL attention and PROPER verification - Each task needs FULL attention and PROPER verification
- Batch delegation of separate concerns = sloppy work = rework = wasted tokens - Batch delegation = sloppy work = rework = wasted tokens
**REFUSE genuinely multi-task requests. ALLOW single-goal multi-step workflows.** **REFUSE multi-task requests. DEMAND single-task clarity.**
` `

View File

@@ -1,12 +1,5 @@
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import { import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state"
appendSessionId,
getPlanProgress,
getTaskSessionState,
readBoulderState,
readCurrentTopLevelTask,
upsertTaskSessionState,
} from "../../features/boulder-state"
import { log } from "../../shared/logger" import { log } from "../../shared/logger"
import { isCallerOrchestrator } from "../../shared/session-utils" import { isCallerOrchestrator } from "../../shared/session-utils"
import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree" import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree"
@@ -14,7 +7,7 @@ import { shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate"
import { HOOK_NAME } from "./hook-name" import { HOOK_NAME } from "./hook-name"
import { DIRECT_WORK_REMINDER } from "./system-reminder-templates" import { DIRECT_WORK_REMINDER } from "./system-reminder-templates"
import { isSisyphusPath } from "./sisyphus-path" import { isSisyphusPath } from "./sisyphus-path"
import { extractSessionIdFromOutput, validateSubagentSessionId } from "./subagent-session-id" import { extractSessionIdFromOutput } from "./subagent-session-id"
import { import {
buildCompletionGate, buildCompletionGate,
buildFinalWaveApprovalReminder, buildFinalWaveApprovalReminder,
@@ -22,60 +15,16 @@ import {
buildStandaloneVerificationReminder, buildStandaloneVerificationReminder,
} from "./verification-reminders" } from "./verification-reminders"
import { isWriteOrEditToolName } from "./write-edit-tool-policy" import { isWriteOrEditToolName } from "./write-edit-tool-policy"
import type { PendingTaskRef, SessionState } from "./types" import type { SessionState } from "./types"
import type { ToolExecuteAfterInput, ToolExecuteAfterOutput, TrackedTopLevelTaskRef } from "./types" import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types"
function resolvePreferredSessionId(currentSessionId?: string, trackedSessionId?: string): string {
return currentSessionId ?? trackedSessionId ?? "<session_id>"
}
function resolveTaskContext(
pendingTaskRef: PendingTaskRef | undefined,
planPath: string,
): {
currentTask: TrackedTopLevelTaskRef | null
shouldSkipTaskSessionUpdate: boolean
shouldIgnoreCurrentSessionId: boolean
} {
if (!pendingTaskRef) {
return {
currentTask: readCurrentTopLevelTask(planPath),
shouldSkipTaskSessionUpdate: false,
shouldIgnoreCurrentSessionId: false,
}
}
if (pendingTaskRef.kind === "track") {
return {
currentTask: pendingTaskRef.task,
shouldSkipTaskSessionUpdate: false,
shouldIgnoreCurrentSessionId: false,
}
}
if (pendingTaskRef.reason === "explicit_resume") {
return {
currentTask: readCurrentTopLevelTask(planPath),
shouldSkipTaskSessionUpdate: true,
shouldIgnoreCurrentSessionId: true,
}
}
return {
currentTask: pendingTaskRef.task,
shouldSkipTaskSessionUpdate: true,
shouldIgnoreCurrentSessionId: true,
}
}
export function createToolExecuteAfterHandler(input: { export function createToolExecuteAfterHandler(input: {
ctx: PluginInput ctx: PluginInput
pendingFilePaths: Map<string, string> pendingFilePaths: Map<string, string>
pendingTaskRefs: Map<string, PendingTaskRef>
autoCommit: boolean autoCommit: boolean
getState: (sessionID: string) => SessionState getState: (sessionID: string) => SessionState
}): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise<void> { }): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise<void> {
const { ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState } = input const { ctx, pendingFilePaths, autoCommit, getState } = input
return async (toolInput, toolOutput): Promise<void> => { return async (toolInput, toolOutput): Promise<void> => {
// Guard against undefined output (e.g., from /review command - see issue #1035) // Guard against undefined output (e.g., from /review command - see issue #1035)
if (!toolOutput) { if (!toolOutput) {
@@ -110,33 +59,19 @@ export function createToolExecuteAfterHandler(input: {
} }
const outputStr = toolOutput.output && typeof toolOutput.output === "string" ? toolOutput.output : "" const outputStr = toolOutput.output && typeof toolOutput.output === "string" ? toolOutput.output : ""
const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined
if (toolInput.callID) {
pendingTaskRefs.delete(toolInput.callID)
}
const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued") const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued")
if (isBackgroundLaunch) { if (isBackgroundLaunch) {
return return
} }
if (toolOutput.output && typeof toolOutput.output === "string") { if (toolOutput.output && typeof toolOutput.output === "string") {
const boulderState = readBoulderState(ctx.directory) const gitStats = collectGitDiffStats(ctx.directory)
const worktreePath = boulderState?.worktree_path?.trim()
const verificationDirectory = worktreePath ? worktreePath : ctx.directory
const gitStats = collectGitDiffStats(verificationDirectory)
const fileChanges = formatFileChanges(gitStats) const fileChanges = formatFileChanges(gitStats)
const extractedSessionId = extractSessionIdFromOutput(toolOutput.output) const subagentSessionId = extractSessionIdFromOutput(toolOutput.output)
const boulderState = readBoulderState(ctx.directory)
if (boulderState) { if (boulderState) {
const progress = getPlanProgress(boulderState.active_plan) const progress = getPlanProgress(boulderState.active_plan)
const {
currentTask,
shouldSkipTaskSessionUpdate,
shouldIgnoreCurrentSessionId,
} = resolveTaskContext(pendingTaskRef, boulderState.active_plan)
const trackedTaskSession = currentTask
? getTaskSessionState(ctx.directory, currentTask.key)
: null
const sessionState = toolInput.sessionID ? getState(toolInput.sessionID) : undefined const sessionState = toolInput.sessionID ? getState(toolInput.sessionID) : undefined
if (toolInput.sessionID && !boulderState.session_ids?.includes(toolInput.sessionID)) { if (toolInput.sessionID && !boulderState.session_ids?.includes(toolInput.sessionID)) {
@@ -147,31 +82,6 @@ export function createToolExecuteAfterHandler(input: {
}) })
} }
const lineageSessionIDs = toolInput.sessionID && !boulderState.session_ids.includes(toolInput.sessionID)
? [...boulderState.session_ids, toolInput.sessionID]
: boulderState.session_ids
const subagentSessionId = await validateSubagentSessionId({
client: ctx.client,
sessionID: extractedSessionId,
lineageSessionIDs,
})
if (currentTask && subagentSessionId && !shouldSkipTaskSessionUpdate) {
upsertTaskSessionState(ctx.directory, {
taskKey: currentTask.key,
taskLabel: currentTask.label,
taskTitle: currentTask.title,
sessionId: subagentSessionId,
agent: typeof toolOutput.metadata?.agent === "string" ? toolOutput.metadata.agent : undefined,
category: typeof toolOutput.metadata?.category === "string" ? toolOutput.metadata.category : undefined,
})
}
const preferredSessionId = resolvePreferredSessionId(
shouldIgnoreCurrentSessionId ? undefined : subagentSessionId,
trackedTaskSession?.session_id,
)
// Preserve original subagent response - critical for debugging failed tasks // Preserve original subagent response - critical for debugging failed tasks
const originalResponse = toolOutput.output const originalResponse = toolOutput.output
const shouldPauseForApproval = sessionState const shouldPauseForApproval = sessionState
@@ -192,11 +102,11 @@ export function createToolExecuteAfterHandler(input: {
} }
const leadReminder = shouldPauseForApproval const leadReminder = shouldPauseForApproval
? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, preferredSessionId) ? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, subagentSessionId)
: buildCompletionGate(boulderState.plan_name, preferredSessionId) : buildCompletionGate(boulderState.plan_name, subagentSessionId)
const followupReminder = shouldPauseForApproval const followupReminder = shouldPauseForApproval
? null ? null
: buildOrchestratorReminder(boulderState.plan_name, progress, preferredSessionId, autoCommit, false) : buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)
toolOutput.output = ` toolOutput.output = `
<system-reminder> <system-reminder>
@@ -222,22 +132,10 @@ ${
plan: boulderState.plan_name, plan: boulderState.plan_name,
progress: `${progress.completed}/${progress.total}`, progress: `${progress.completed}/${progress.total}`,
fileCount: gitStats.length, fileCount: gitStats.length,
preferredSessionId,
waitingForFinalWaveApproval: shouldPauseForApproval, waitingForFinalWaveApproval: shouldPauseForApproval,
}) })
} else { } else {
const lineageSessionIDs = toolInput.sessionID ? [toolInput.sessionID] : [] toolOutput.output += `\n<system-reminder>\n${buildStandaloneVerificationReminder(subagentSessionId)}\n</system-reminder>`
const subagentSessionId = await validateSubagentSessionId({
client: ctx.client,
sessionID: extractedSessionId,
lineageSessionIDs,
})
const preferredSessionId = pendingTaskRef?.kind === "skip"
? undefined
: subagentSessionId
toolOutput.output += `\n<system-reminder>\n${buildStandaloneVerificationReminder(
resolvePreferredSessionId(preferredSessionId),
)}\n</system-reminder>`
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, { log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
sessionID: toolInput.sessionID, sessionID: toolInput.sessionID,

View File

@@ -2,26 +2,19 @@ import { log } from "../../shared/logger"
import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive" import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
import { isCallerOrchestrator } from "../../shared/session-utils" import { isCallerOrchestrator } from "../../shared/session-utils"
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import { readBoulderState, readCurrentTopLevelTask } from "../../features/boulder-state"
import { HOOK_NAME } from "./hook-name" import { HOOK_NAME } from "./hook-name"
import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates" import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates"
import { isSisyphusPath } from "./sisyphus-path" import { isSisyphusPath } from "./sisyphus-path"
import type { PendingTaskRef, TrackedTopLevelTaskRef } from "./types"
import { isWriteOrEditToolName } from "./write-edit-tool-policy" import { isWriteOrEditToolName } from "./write-edit-tool-policy"
export function createToolExecuteBeforeHandler(input: { export function createToolExecuteBeforeHandler(input: {
ctx: PluginInput ctx: PluginInput
pendingFilePaths: Map<string, string> pendingFilePaths: Map<string, string>
pendingTaskRefs: Map<string, PendingTaskRef>
}): ( }): (
toolInput: { tool: string; sessionID?: string; callID?: string }, toolInput: { tool: string; sessionID?: string; callID?: string },
toolOutput: { args: Record<string, unknown>; message?: string } toolOutput: { args: Record<string, unknown>; message?: string }
) => Promise<void> { ) => Promise<void> {
const { ctx, pendingFilePaths, pendingTaskRefs } = input const { ctx, pendingFilePaths } = input
function trackTask(callID: string, task: TrackedTopLevelTaskRef): void {
pendingTaskRefs.set(callID, { kind: "track", task })
}
return async (toolInput, toolOutput): Promise<void> => { return async (toolInput, toolOutput): Promise<void> => {
if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) { if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) {
@@ -50,46 +43,6 @@ export function createToolExecuteBeforeHandler(input: {
// Check task - inject single-task directive // Check task - inject single-task directive
if (toolInput.tool === "task") { if (toolInput.tool === "task") {
if (toolInput.callID) {
const requestedSessionId = toolOutput.args.session_id as string | undefined
if (requestedSessionId) {
pendingTaskRefs.set(toolInput.callID, {
kind: "skip",
reason: "explicit_resume",
})
} else {
const boulderState = readBoulderState(ctx.directory)
const currentTask = boulderState
? readCurrentTopLevelTask(boulderState.active_plan)
: null
if (currentTask) {
const task = {
key: currentTask.key,
label: currentTask.label,
title: currentTask.title,
}
const hasExistingClaim = [...pendingTaskRefs.values()].some((pendingTaskRef) => (
pendingTaskRef.kind === "track" && pendingTaskRef.task.key === task.key
))
if (hasExistingClaim) {
pendingTaskRefs.set(toolInput.callID, {
kind: "skip",
reason: "ambiguous_task_key",
task,
})
log(`[${HOOK_NAME}] Skipping task session persistence for ambiguous task key`, {
sessionID: toolInput.sessionID,
callID: toolInput.callID,
taskKey: task.key,
})
} else {
trackTask(toolInput.callID, task)
}
}
}
}
const prompt = toolOutput.args.prompt as string | undefined const prompt = toolOutput.args.prompt as string | undefined
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) { if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
toolOutput.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt toolOutput.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt

View File

@@ -1,6 +1,5 @@
import type { AgentOverrides } from "../../config" import type { AgentOverrides } from "../../config"
import type { BackgroundManager } from "../../features/background-agent" import type { BackgroundManager } from "../../features/background-agent"
import type { TopLevelTaskRef } from "../../features/boulder-state"
export type ModelInfo = { providerID: string; modelID: string } export type ModelInfo = { providerID: string; modelID: string }
@@ -8,6 +7,7 @@ export interface AtlasHookOptions {
directory: string directory: string
backgroundManager?: BackgroundManager backgroundManager?: BackgroundManager
isContinuationStopped?: (sessionID: string) => boolean isContinuationStopped?: (sessionID: string) => boolean
shouldSkipContinuation?: (sessionID: string) => boolean
agentOverrides?: AgentOverrides agentOverrides?: AgentOverrides
/** Enable auto-commit after each atomic task completion (default: true) */ /** Enable auto-commit after each atomic task completion (default: true) */
autoCommit?: boolean autoCommit?: boolean
@@ -25,13 +25,6 @@ export interface ToolExecuteAfterOutput {
metadata: Record<string, unknown> metadata: Record<string, unknown>
} }
export type TrackedTopLevelTaskRef = Pick<TopLevelTaskRef, "key" | "label" | "title">
export type PendingTaskRef =
| { kind: "track"; task: TrackedTopLevelTaskRef }
| { kind: "skip"; reason: "explicit_resume" }
| { kind: "skip"; reason: "ambiguous_task_key"; task: TrackedTopLevelTaskRef }
export interface SessionState { export interface SessionState {
lastEventWasAbortError?: boolean lastEventWasAbortError?: boolean
lastContinuationInjectedAt?: number lastContinuationInjectedAt?: number

View File

@@ -1,14 +1,5 @@
import { VERIFICATION_REMINDER } from "./system-reminder-templates" import { VERIFICATION_REMINDER } from "./system-reminder-templates"
function buildReuseHint(sessionId: string): string {
return `
**PREFERRED REUSE SESSION FOR THE CURRENT TOP-LEVEL PLAN TASK**
- Reuse \`${sessionId}\` first if verification fails or the result needs follow-up.
- Start a fresh subagent session only when reuse is unavailable or would cross task boundaries.
`
}
export function buildCompletionGate(planName: string, sessionId: string): string { export function buildCompletionGate(planName: string, sessionId: string): string {
return ` return `
**COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE** **COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE**
@@ -34,8 +25,7 @@ task(session_id="${sessionId}", prompt="fix: checkbox not recorded correctly")
**Your completion is NOT tracked until the checkbox is marked in the plan file.** **Your completion is NOT tracked until the checkbox is marked in the plan file.**
**VERIFICATION_REMINDER** **VERIFICATION_REMINDER**`
${buildReuseHint(sessionId)}`
} }
function buildVerificationReminder(sessionId: string): string { function buildVerificationReminder(sessionId: string): string {
@@ -48,9 +38,7 @@ ${VERIFICATION_REMINDER}
**If ANY verification fails, use this immediately:** **If ANY verification fails, use this immediately:**
\`\`\` \`\`\`
task(session_id="${sessionId}", prompt="fix: [describe the specific failure]") task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
\`\`\` \`\`\``
${buildReuseHint(sessionId)}`
} }
export function buildOrchestratorReminder( export function buildOrchestratorReminder(

View File

@@ -44,6 +44,12 @@ export interface ExecutorOptions {
agent?: string agent?: string
} }
function filterDiscoveredCommandsByScope(
commands: DiscoveredCommandInfo[],
scope: DiscoveredCommandInfo["scope"],
): DiscoveredCommandInfo[] {
return commands.filter(command => command.scope === scope)
}
async function discoverAllCommands(options?: ExecutorOptions): Promise<CommandInfo[]> { async function discoverAllCommands(options?: ExecutorOptions): Promise<CommandInfo[]> {
const discoveredCommands = discoverCommandsSync(process.cwd(), { const discoveredCommands = discoverCommandsSync(process.cwd(), {
@@ -54,18 +60,14 @@ async function discoverAllCommands(options?: ExecutorOptions): Promise<CommandIn
const skills = options?.skills ?? await discoverAllSkills() const skills = options?.skills ?? await discoverAllSkills()
const skillCommands = skills.map(skillToCommandInfo) const skillCommands = skills.map(skillToCommandInfo)
const scopeOrder: DiscoveredCommandInfo["scope"][] = ["project", "user", "opencode-project", "opencode", "builtin", "plugin"]
const grouped = new Map<string, DiscoveredCommandInfo[]>()
for (const cmd of discoveredCommands) {
const list = grouped.get(cmd.scope) ?? []
list.push(cmd)
grouped.set(cmd.scope, list)
}
const orderedCommands = scopeOrder.flatMap((scope) => grouped.get(scope) ?? [])
return [ return [
...skillCommands, ...skillCommands,
...orderedCommands, ...filterDiscoveredCommandsByScope(discoveredCommands, "project"),
...filterDiscoveredCommandsByScope(discoveredCommands, "user"),
...filterDiscoveredCommandsByScope(discoveredCommands, "opencode-project"),
...filterDiscoveredCommandsByScope(discoveredCommands, "opencode"),
...filterDiscoveredCommandsByScope(discoveredCommands, "builtin"),
...filterDiscoveredCommandsByScope(discoveredCommands, "plugin"),
] ]
} }

Some files were not shown because too many files have changed in this diff Show More