Compare commits

...

65 Commits

Author SHA1 Message Date
github-actions[bot]
8f31211c75 release: v3.1.0 2026-01-26 06:46:47 +00:00
justsisyphus
04f2b513c6 feat(tmux-subagent): add replace action to prevent mass eviction
- Add column-based splittable calculation (getColumnCount, getColumnWidth)
- New decision tree: splittable → split, k=1 eviction → close+spawn, else → replace
- Add 'replace' action type using tmux respawn-pane (preserves layout)
- Replace oldest pane in-place instead of closing all panes when unsplittable
- Prevents scenario where all agent panes get closed leaving only 1
2026-01-26 15:25:11 +09:00
justsisyphus
8ebc933118 fix(tmux-subagent): enable 2D grid layout with divider-aware calculations
- Account for tmux pane dividers (1 char) in all size calculations
- Reduce MIN_PANE_WIDTH from 53 to 52 to fit 2 columns in standard terminals
- Fix enforceMainPaneWidth to use (windowWidth - divider) / 2
- Add virtual mainPane handling for close-spawn eviction loop
- Add comprehensive decision-engine tests (23 test cases)
2026-01-26 15:11:16 +09:00
justsisyphus
a67a35aea8 docs: regenerate AGENTS.md knowledge base via /init-deep 2026-01-26 14:56:55 +09:00
justsisyphus
9d66b80709 feat(hooks): add active working context section to compaction summary
Include files, code in progress, external references, and state/variables
in compaction summary for seamless continuation after context compaction.
2026-01-26 14:23:05 +09:00
justsisyphus
5c7eb02d5b chore(test): sync agent name casing in tests (#1128)
Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-26 12:10:30 +09:00
justsisyphus
68aa913499 refactor(tmux-subagent): state-first architecture with decision engine (#1125)
* refactor(tmux-subagent): add state-first architecture with decision engine

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

* feat(tmux): add pane spawn callbacks for background and sync sessions

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

---------

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-01-26 12:02:37 +09:00
justsisyphus
3a79b8761b feat(shared): add connected-providers-cache for model availability (#1121)
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-01-26 11:53:41 +09:00
justsisyphus
da416b362b feat(hooks): add category-skill-reminder hook (#1123)
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-01-26 11:48:32 +09:00
justsisyphus
90054b28ad chore(docs): regenerate AGENTS.md knowledge base (#1118)
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-01-26 11:48:30 +09:00
justsisyphus
892b245779 fix(test): update builtin skills count from 3 to 4 (#1126)
* fix(test): update builtin skills count from 3 to 4 (dev-browser added)

* chore(ci): add block-master-pr workflow

---------

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-26 02:29:28 +00:00
YeonGyu-Kim
aead4aebd2 Add tmux pane management for background agent sessions (#1094)
* feat(config): add TmuxConfigSchema for tmux subagent pane management

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

* feat(shared): add tmux module structure

* feat(shared/tmux): implement tmux pane utilities

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

* test(tmux-subagent): add TmuxSessionManager tests (TDD RED)

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

* feat(tmux-subagent): implement TmuxSessionManager

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

* feat(integration): wire TmuxSessionManager with 500ms delay

- Task 5: Add 500ms delay in BackgroundManager after session creation
- Task 6: Wire TmuxSessionManager event handlers (session.created/deleted)
- Both changes integrate tmux pane management into plugin lifecycle

Co-authored-by: Sisyphus <ultrawork@oh-my-opencode>

---------

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
Co-authored-by: Sisyphus <ultrawork@oh-my-opencode>
2026-01-25 15:34:10 +09:00
YeonGyu-Kim
bccc943173 feat(skills): add dev-browser skill with Windows support (#1093)
* feat(skills): add dev-browser skill with Windows support

* chore: trigger CI
2026-01-25 15:34:07 +09:00
justsisyphus
05904ca617 docs(agent-browser): add detailed installation guide with Playwright troubleshooting 2026-01-25 15:12:32 +09:00
YeonGyu-Kim
3af30b0a21 feat(skills): add agent-browser option for browser automation (#1090)
Add configurable browser automation allowing users to choose between
Playwright MCP (default) and Vercel's agent-browser CLI.

Changes:
- Add browser_automation_engine.provider config option
- Dynamic skill loading based on provider selection
- Comprehensive agent-browser CLI reference (inline in skills.ts)
- Propagate browserProvider to delegate_task and buildAgent
- Update documentation with provider comparison

Co-authored-by: Suyeol Jeon <devxoul@gmail.com>
Co-authored-by: YeonGyu Kim <code.yeongyu@gmail.com>
2026-01-25 15:02:41 +09:00
YeonGyu-Kim
b55fd8d76f feat(explore): add github-copilot/gpt-5-mini to fallback chain (#1091)
* feat(explore): add github-copilot/gpt-5-mini to fallback chain

* test(explore): add tests for github-copilot/gpt-5-mini fallback

---------

Co-authored-by: Suyeol Jeon <devxoul@gmail.com>
2026-01-25 05:53:11 +00:00
Sisyphus
208af055ef fix: generate skill/slashcommand descriptions synchronously when pre-provided (#1087)
* fix: generate skill/slashcommand tool descriptions synchronously when pre-provided

When skills are passed via options (pre-resolved), build the tool description
synchronously instead of fire-and-forget async. This eliminates the race
condition where the description getter returns the bare prefix before the
async cache-warming microtask completes.

Fixes #1039

* chore: changes by sisyphus-dev-ai

---------

Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
2026-01-25 14:52:50 +09:00
YeonGyu-Kim
0aa8f486af feat(hooks): add sisyphus-junior-notepad hook for conditional notepad rules injection (#1092)
* refactor(shared): extract isCallerOrchestrator to session-utils

* refactor(atlas): use shared isCallerOrchestrator, change to prepend

* refactor(prometheus-md-only): change to prepend pattern

* refactor(sisyphus-junior): remove Work_Context (moved to hook)

* feat(hooks): add sisyphus-junior-notepad hook

* fix(shared): replace dynamic require with static import in session-utils

- Change from dynamic require to static import for better bundler compatibility
- Fix import path: ../../features -> ../features
- Add barrel export to src/shared/index.ts

* feat(hooks): register sisyphus-junior-notepad hook

- Add to HookNameSchema in schema.ts
- Export from hooks/index.ts
- Register with isHookEnabled in index.ts
- Auto-generated schema.json update

---------

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-25 14:52:11 +09:00
github-actions[bot]
a5db86ee15 release: v3.0.1 2026-01-25 05:04:20 +00:00
justsisyphus
14f450bd25 refactor: sync delegate_task schema with OpenCode Task tool (resume→session_id, add command param) 2026-01-25 13:57:45 +09:00
justsisyphus
5a1da39def refactor(ultrawork): replace vague plan agent references with explicit delegate_task(subagent_type="plan") invocation syntax 2026-01-25 13:57:45 +09:00
Sisyphus
24d065c43a fix: update documentation to use load_skills instead of skills parameter (#1088)
All documentation, agent prompts, and skill descriptions were still
referencing the old 'skills' parameter name for delegate_task, but the
tool implementation requires 'load_skills' (renamed in commit aa2b052).
This caused confusion and errors for users following the docs.

Fixes #1008

Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
2026-01-25 13:45:00 +09:00
justsisyphus
fd72ce5ce7 docs: update AGENTS.md knowledge base (043b1a33)
- Add 7 missing hooks, remove deleted background-compaction
- Update line counts (atlas 572, sisyphus 450, config-manager 664)
- Add 18 undocumented shared utilities, remove stale references
- Add task-toast-manager, remove-deadcode command
- Update test count 90→95, add 4 complexity hotspots
2026-01-25 13:12:40 +09:00
justsisyphus
043b1a3377 refactor: remove dead re-exports from tools barrel (getTmuxPath, DelegateTaskToolOptions, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS) 2026-01-25 12:59:19 +09:00
justsisyphus
512952f66d refactor: remove deprecated config-path.ts (dead code, 0 references) 2026-01-25 12:58:40 +09:00
justsisyphus
d9723e76ab refactor: remove unused background-compaction hook module 2026-01-25 12:58:05 +09:00
justsisyphus
212baa6674 feat(commands): add /remove-deadcode slash command for LSP-verified dead code removal 2026-01-25 12:46:37 +09:00
justsisyphus
1c76e0513a fix: add missing name property in loadBuiltinCommands causing TypeError on slashcommand 2026-01-25 12:46:03 +09:00
justsisyphus
c8cc94cd3c fix: remove github-copilot association from gpt-5-nano model mapping
explore agent uses opencode/gpt-5-nano exclusively — github-copilot
should not be associated with gpt-5-nano in docs, tests, or fallback chains.
2026-01-25 12:46:03 +09:00
Sisyphus
20cca35157 fix(ralph-loop): skip user messages in transcript completion detection (#622) (#1086)
* fix(ralph-loop): skip user messages in transcript completion detection (#622)

The transcript-based completion detection was searching the entire JSONL
file for <promise>DONE</promise>, including user message entries. The
RALPH_LOOP_TEMPLATE instructional text contains this literal pattern,
which gets recorded as a user message, causing false positive completion
detection on every iteration. This made the loop always terminate at
iteration 1.

Fix: Parse JSONL entries line-by-line and skip entries with type 'user'
so only tool_result/assistant entries are checked for the completion
promise. Also remove the hardcoded <promise>DONE</promise> from the
template exit conditions as defense-in-depth.

* chore: changes by sisyphus-dev-ai

---------

Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
2026-01-25 12:34:42 +09:00
sisyphus-dev-ai
81d27afadb chore: changes by sisyphus-dev-ai 2026-01-25 03:27:56 +00:00
github-actions[bot]
6cb2f3031c @kvokka has signed the CLA in code-yeongyu/oh-my-opencode#1084 2026-01-25 03:14:31 +00:00
github-actions[bot]
f116ea1d43 @potb has signed the CLA in code-yeongyu/oh-my-opencode#1083 2026-01-25 02:38:28 +00:00
github-actions[bot]
6aa0674000 @jsl9208 has signed the CLA in code-yeongyu/oh-my-opencode#1082 2026-01-24 21:44:22 +00:00
github-actions[bot]
2b828624a0 @sadnow has signed the CLA in code-yeongyu/oh-my-opencode#1080 2026-01-24 20:49:38 +00:00
github-actions[bot]
e60ccb93fb @ThanhNguyxn has signed the CLA in code-yeongyu/oh-my-opencode#1075 2026-01-24 17:42:03 +00:00
justsisyphus
aa244e8098 docs: fix atlas agent name case in example config 2026-01-24 22:46:40 +09:00
github-actions[bot]
6f60f03433 @AamiRobin has signed the CLA in code-yeongyu/oh-my-opencode#1067 2026-01-24 13:28:32 +00:00
github-actions[bot]
b8a0eee92d release: v3.0.0 2026-01-24 13:23:25 +00:00
justsisyphus
1486ebbc87 docs: update READMEs for 3.0 stable release
- Update TIP banner from beta.10 to stable 3.0 in all languages
- Add Korean language link to Japanese and Chinese READMEs
- Add DeepWiki badge to Japanese and Chinese READMEs
- Adjust DeepWiki badge position in Korean README for consistency
2026-01-24 21:58:53 +09:00
justsisyphus
063c759275 feat: show detailed task info and resume instructions on background_cancel(all=true) (#1062)
Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-24 17:15:31 +09:00
justsisyphus
6e9ebaf3ee fix: add missing gemini-3-flash to writing category migration (#1061)
MODEL_TO_CATEGORY_MAP was missing the mapping for google/gemini-3-flash
to the 'writing' category. Users who had configured agents with
model: 'google/gemini-3-flash' would not get auto-migrated to
category: 'writing'.

Ref: PR #1057 review comment

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-24 17:05:14 +09:00
justsisyphus
0e1d4e52e1 chore: remove website directory (fixes CI test failures) 2026-01-24 16:37:46 +09:00
sisyphus-dev-ai
c0fb4b79bd chore: changes by sisyphus-dev-ai 2026-01-24 07:12:01 +00:00
justsisyphus
ec32dd65c2 fix(question-label-truncator): fix type errors and add test coverage
- Remove invalid Pick<Plugin> type usage
- Add explicit input/output type annotations
- Add comprehensive test suite (5 tests)
- Tests verify truncation at 30 chars with '...' suffix
2026-01-24 16:07:08 +09:00
Ssoon-m
04fb339622 fix: add model fallback from agent/category configs 2026-01-24 16:03:12 +09:00
yimingll
3a22c24cf4 fix: auto-truncate question option labels exceeding 30 characters
When AI generates AskUserQuestion tool calls with option labels longer
than 30 characters, opencode validation rejects them with "too_big" error.

This fix adds a pre-tool-use hook that automatically truncates labels
to 30 characters (with "..." suffix) before the validation occurs.

Fixes the error:
"The question tool was called with invalid arguments: expected string
to have <=30 characters"

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 15:59:45 +09:00
Stephen Wang
cf2320480f Fix MCP disabled flag not removing previously loaded servers (#985)
When a later-loaded MCP config (e.g., .claude/.mcp.json) marks a server
as disabled, it now properly removes that server from both the servers
object and loadedServers array.

Previously, disabled servers were only skipped during loading, which
meant they wouldn't override servers loaded from earlier configs. This
made it impossible to disable project-level MCPs using local overrides.

Now the disabled flag works as expected: local configs can disable
servers defined in project or user configs.
2026-01-24 15:55:59 +09:00
Rouven Hi!
9532680879 fix(slashcommand): include built-in commands (like start-work) in discovery (#1031)
This ensures that commands defined in src/features/builtin-commands/commands.ts
(like /start-work, /refactor, /init-deep) are visible to the slashcommand tool
and the agent. Previously, only markdown-based commands were discovered.
2026-01-24 15:55:31 +09:00
justsisyphus
2a945ddbf5 fix(background-task): pass config to BackgroundManager for concurrency limits
The background_task config (providerConcurrency, modelConcurrency, etc.)
was not being passed to BackgroundManager, causing all models to use
the hardcoded default limit of 5 instead of user-configured values.
2026-01-24 15:50:44 +09:00
justsisyphus
58bb92134d fix(todo-continuation): filter compaction agent to prevent infinite loop
- Add 'compaction' to DEFAULT_SKIP_AGENTS
- Skip compaction agent messages when resolving agent info
- Skip injection when compaction occurred but no real agent resolved
- Replace cooldown-based approach with agent-based filtering
2026-01-24 15:50:44 +09:00
Sungho Park
f1a279a10a Add xhigh reasoningEffort to config schema (#965)
* test: cover xhigh reasoningEffort

* feat: add xhigh reasoningEffort option

* test: make reasoningEffort xhigh test model-agnostic
2026-01-24 15:48:15 +09:00
YeonGyu-Kim
faf172a91d fix(multimodal-looker): update fallback chain order (#1050)
New order:
1. google/gemini-3-flash
2. openai/gpt-5.2
3. zai-coding-plan/glm-4.6v
4. anthropic/claude-haiku-4-5
5. opencode/gpt-5-nano (FREE, ultimate fallback)

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-24 15:40:24 +09:00
YeonGyu-Kim
04633ba208 fix(models): update model names to match OpenCode Zen catalog (#1048)
* fix(models): update model names to match OpenCode Zen catalog

OpenCode Zen recently updated their official model catalog, deprecating
several preview and free model variants:

DEPRECATED → NEW (Official Zen Names):
- gemini-3-pro-preview → gemini-3-pro
- gemini-3-flash-preview → gemini-3-flash
- grok-code → gpt-5-nano (FREE tier maintained)
- glm-4.7-free → big-pickle (FREE tier maintained)
- glm-4.6v → glm-4.6

Changes:
- Updated 6 source files (model-requirements, delegate-task, think-mode, etc.)
- Updated 9 documentation files (installation, configurations, features, etc.)
- Updated 14 test files with new model references
- Regenerated snapshots to reflect catalog changes
- Removed duplicate think-mode entries for preview variants

Impact:
- FREE tier access preserved via gpt-5-nano and big-pickle
- All 55 model-related tests passing
- Zero breaking changes - pure string replacement
- Aligns codebase with official OpenCode Zen model catalog

Verified:
- Zero deprecated model names in codebase
- All model-related tests pass (55/55)
- Snapshots regenerated and validated

Affects: 30 files (6 source, 9 docs, 14 tests, 1 snapshot)

* fix(multimodal-looker): update fallback chain with glm-4.6v and gpt-5-nano

- Change glm-4.6 to glm-4.6v for zai-coding-plan provider
- Add opencode/gpt-5-nano as 4th fallback (FREE tier)
- Push gpt-5.2 to 5th position

Fallback chain now:
1. gemini-3-flash (google, github-copilot, opencode)
2. claude-haiku-4-5 (anthropic, github-copilot, opencode)
3. glm-4.6v (zai-coding-plan)
4. gpt-5-nano (opencode) - FREE
5. gpt-5.2 (openai, github-copilot, opencode)

* chore: update bun.lock

---------

Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-24 15:30:35 +09:00
justsisyphus
58459e692b feat(website): add layout with header, sidebar, footer and navigation
- Create Header component with logo, nav, theme toggle, language switcher
- Create Sidebar component with doc navigation from config
- Create Footer component
- Create MobileNav component with hamburger menu
- Create navigation config file (docsConfig)
- Integrate all layout components into [locale]/layout.tsx
- Add framer-motion for mobile nav animations
- All tests passing, build successful
2026-01-24 14:25:05 +09:00
justsisyphus
894a0fa849 feat(website): add next-intl i18n and dark mode support 2026-01-24 14:25:05 +09:00
justsisyphus
21c7d29c1d fix(website): resolve @opennextjs/cloudflare and test configuration issues
- Successfully installed @opennextjs/cloudflare v1.15.1
- Fixed Vitest configuration to exclude e2e tests
- Renamed e2e test files from .spec.ts to .e2e.ts to avoid Bun test runner conflicts
- Updated eslint.config.mjs and playwright.config.ts
- All tests passing: Vitest (1/1), Playwright (6/6)
- Production bundle size: ~5MB < 10MiB limit
- Marked TODO 0 complete in plan
2026-01-24 14:25:05 +09:00
justsisyphus
ba93c42943 feat(website): initialize Next.js 15 project with @opennextjs/cloudflare 2026-01-24 14:25:05 +09:00
github-actions[bot]
5c7dd40751 @AndersHsueh has signed the CLA in code-yeongyu/oh-my-opencode#1042 2026-01-24 04:41:56 +00:00
github-actions[bot]
acc7b8b2f7 @gongxh0901 has signed the CLA in code-yeongyu/oh-my-opencode#1037 2026-01-24 02:27:36 +00:00
github-actions[bot]
8c90838f3b @RouHim has signed the CLA in code-yeongyu/oh-my-opencode#1031 2026-01-23 19:32:14 +00:00
github-actions[bot]
0b784d24f2 release: v3.0.0-beta.16 2026-01-23 18:12:07 +00:00
justsisyphus
444fbe396a fix(delegate-task): use lowercase sisyphus-junior agent name in API calls
Previous fix (7ed7bf5c) only updated Atlas → atlas, but missed Sisyphus-Junior.
OpenCode does case-sensitive agent lookup, causing crash when delegate_task
tried to spawn 'Sisyphus-Junior' (registered as 'sisyphus-junior').

- SISYPHUS_JUNIOR_AGENT constant: 'Sisyphus-Junior' → 'sisyphus-junior'
- agent-tool-restrictions key: 'Sisyphus-Junior' → 'sisyphus-junior'
- Updated related test mocks
2026-01-24 03:00:58 +09:00
github-actions[bot]
ad86e58077 release: v3.0.0-beta.15 2026-01-23 17:44:45 +00:00
justsisyphus
7ed7bf5c66 fix(agents): use lowercase agent names in API calls
- atlas/index.ts: agent: 'Atlas' -> 'atlas'
- start-work/index.ts: updateSessionAgent(..., 'Atlas') -> 'atlas'
- builtin-commands/commands.ts: agent: 'Atlas' -> 'atlas'
- Updated tests to match lowercase convention
2026-01-24 02:39:12 +09:00
139 changed files with 8215 additions and 1098 deletions

View File

@@ -4,13 +4,32 @@ on:
push:
branches: [master, dev]
pull_request:
branches: [dev]
branches: [master, dev]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
# Block PRs targeting master branch
block-master-pr:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- name: Check PR target branch
run: |
if [ "${{ github.base_ref }}" = "master" ]; then
echo "::error::PRs to master branch are not allowed. Please target the 'dev' branch instead."
echo ""
echo "PULL REQUESTS TO MASTER ARE BLOCKED"
echo ""
echo "All PRs must target the 'dev' branch."
echo "Please close this PR and create a new one targeting 'dev'."
exit 1
else
echo "PR targets '${{ github.base_ref }}' branch - OK"
fi
test:
runs-on: ubuntu-latest
steps:

View File

@@ -0,0 +1,342 @@
---
description: Remove unused code from this project with ultrawork mode, LSP-verified safety, atomic commits
---
<command-instruction>
You are a dead code removal specialist. Execute the FULL dead code removal workflow using ultrawork mode.
Your core weapon: **LSP FindReferences**. If a symbol has ZERO external references, it's dead. Remove it.
## CRITICAL RULES
1. **LSP is law.** Never guess. Always verify with `LspFindReferences` before removing ANYTHING.
2. **One removal = one commit.** Every dead code removal gets its own atomic commit.
3. **Test after every removal.** Run `bun test` after each. If it fails, REVERT and skip.
4. **Leaf-first order.** Remove deepest unused symbols first, then work up the dependency chain. Removing a leaf may expose new dead code upstream.
5. **Never remove entry points.** `src/index.ts`, `src/cli/index.ts`, test files, config files, and files in `packages/` are off-limits unless explicitly targeted.
---
## STEP 0: REGISTER TODO LIST (MANDATORY FIRST ACTION)
```
TodoWrite([
{"id": "scan", "content": "PHASE 1: Scan codebase for dead code candidates using LSP + explore agents", "status": "pending", "priority": "high"},
{"id": "verify", "content": "PHASE 2: Verify each candidate with LspFindReferences - zero false positives", "status": "pending", "priority": "high"},
{"id": "plan", "content": "PHASE 3: Plan removal order (leaf-first dependency order)", "status": "pending", "priority": "high"},
{"id": "remove", "content": "PHASE 4: Remove dead code one-by-one (remove -> test -> commit loop)", "status": "pending", "priority": "high"},
{"id": "final", "content": "PHASE 5: Final verification - full test suite + build + typecheck", "status": "pending", "priority": "high"}
])
```
---
## PHASE 1: SCAN FOR DEAD CODE CANDIDATES
**Mark scan as in_progress.**
### 1.1: Launch Parallel Explore Agents (ALL BACKGROUND)
Fire ALL simultaneously:
```
// Agent 1: Find all exported symbols
delegate_task(subagent_type="explore", run_in_background=true,
prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
List each with: file path, line number, symbol name, export type (named/default).
EXCLUDE: src/index.ts root exports, test files.
Return as structured list.")
// Agent 2: Find potentially unused files
delegate_task(subagent_type="explore", run_in_background=true,
prompt="Find files in src/ that are NOT imported by any other file.
Check import/require statements across the entire codebase.
EXCLUDE: index.ts files, test files, entry points, config files, .md files.
Return list of potentially orphaned files.")
// Agent 3: Find unused imports within files
delegate_task(subagent_type="explore", run_in_background=true,
prompt="Find unused imports across src/**/*.ts files.
Look for import statements where the imported symbol is never referenced in the file body.
Return: file path, line number, imported symbol name.")
// Agent 4: Find functions/variables only used in their own declaration
delegate_task(subagent_type="explore", run_in_background=true,
prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
```
### 1.2: Direct AST-Grep Scans (WHILE AGENTS RUN)
```typescript
// Find unused imports pattern
ast_grep_search(pattern="import { $NAME } from '$PATH'", lang="typescript", paths=["src/"])
// Find empty export objects
ast_grep_search(pattern="export {}", lang="typescript", paths=["src/"])
```
### 1.3: Collect All Results
Collect background agent results. Compile into a master candidate list:
```
## DEAD CODE CANDIDATES
| # | File | Line | Symbol | Type | Confidence |
|---|------|------|--------|------|------------|
| 1 | src/foo.ts | 42 | unusedFunc | function | HIGH |
| 2 | src/bar.ts | 10 | OldType | type | MEDIUM |
```
**Mark scan as completed.**
---
## PHASE 2: VERIFY WITH LSP (ZERO FALSE POSITIVES)
**Mark verify as in_progress.**
For EVERY candidate from Phase 1, run this verification:
### 2.1: The LSP Verification Protocol
For each candidate symbol:
```typescript
// Step 1: Find the symbol's exact position
LspDocumentSymbols(filePath) // Get line/character of the symbol
// Step 2: Find ALL references across the ENTIRE workspace
LspFindReferences(filePath, line, character, includeDeclaration=false)
// includeDeclaration=false → only counts USAGES, not the definition itself
// Step 3: Evaluate
// 0 references → CONFIRMED DEAD CODE
// 1+ references → NOT dead, remove from candidate list
```
### 2.2: False Positive Guards
**NEVER mark as dead code if:**
- Symbol is in `src/index.ts` (package entry point)
- Symbol is in any `index.ts` that re-exports (barrel file check: look if it's re-exported)
- Symbol is referenced in test files (tests are valid consumers)
- Symbol has `@public` or `@api` JSDoc tags
- Symbol is in a file listed in `package.json` exports
- Symbol is a hook factory (`createXXXHook`) registered in `src/index.ts`
- Symbol is a tool factory (`createXXXTool`) registered in tool loading
- Symbol is an agent definition registered in `agentSources`
- File is a command template, skill definition, or MCP config
### 2.3: Build Confirmed Dead Code List
After verification, produce:
```
## CONFIRMED DEAD CODE (LSP-verified, 0 external references)
| # | File | Line | Symbol | Type | Safe to Remove |
|---|------|------|--------|------|----------------|
| 1 | src/foo.ts | 42 | unusedFunc | function | YES |
```
**If ZERO confirmed dead code found: Report "No dead code found" and STOP.**
**Mark verify as completed.**
---
## PHASE 3: PLAN REMOVAL ORDER
**Mark plan as in_progress.**
### 3.1: Dependency Analysis
For each confirmed dead symbol:
1. Check if removing it would expose other dead code
2. Check if other dead symbols depend on this one
3. Build removal dependency graph
### 3.2: Order by Leaf-First
```
Removal Order:
1. [Leaf symbols - no other dead code depends on them]
2. [Intermediate symbols - depended on only by already-removed dead code]
3. [Dead files - entire files with no live exports]
```
### 3.3: Register Granular Todos
Create one todo per removal:
```
TodoWrite([
{"id": "remove-1", "content": "Remove unusedFunc from src/foo.ts:42", "status": "pending", "priority": "high"},
{"id": "remove-2", "content": "Remove OldType from src/bar.ts:10", "status": "pending", "priority": "high"},
// ... one per confirmed dead symbol
])
```
**Mark plan as completed.**
---
## PHASE 4: ITERATIVE REMOVAL LOOP
**Mark remove as in_progress.**
For EACH dead code item, execute this exact loop:
### 4.1: Pre-Removal Check
```typescript
// Re-verify it's still dead (previous removals may have changed things)
LspFindReferences(filePath, line, character, includeDeclaration=false)
// If references > 0 now → SKIP (previous removal exposed a new consumer)
```
### 4.2: Remove the Dead Code
Use appropriate tool:
**For unused imports:**
```typescript
Edit(filePath, oldString="import { deadSymbol } from '...';\n", newString="")
// Or if it's one of many imports, remove just the symbol from the import list
```
**For unused functions/classes/types:**
```typescript
// Read the full symbol extent first
Read(filePath, offset=startLine, limit=endLine-startLine+1)
// Then remove it
Edit(filePath, oldString="[full symbol text]", newString="")
```
**For dead files:**
```bash
# Only after confirming ZERO imports point to this file
rm "path/to/dead-file.ts"
```
**After removal, also clean up:**
- Remove any imports that were ONLY used by the removed code
- Remove any now-empty import statements
- Fix any trailing whitespace / double blank lines left behind
### 4.3: Post-Removal Verification
```typescript
// 1. LSP diagnostics on changed file
LspDiagnostics(filePath, severity="error")
// Must be clean (or only pre-existing errors)
// 2. Run tests
bash("bun test")
// Must pass
// 3. Typecheck
bash("bun run typecheck")
// Must pass
```
### 4.4: Handle Failures
If ANY verification fails:
1. **REVERT** the change immediately (`git checkout -- [file]`)
2. Mark this removal todo as `cancelled` with note: "Removal caused [error]. Skipped."
3. Proceed to next item
### 4.5: Commit
```bash
git add [changed-files]
git commit -m "refactor: remove unused [symbolType] [symbolName] from [filePath]"
```
Mark this removal todo as `completed`.
### 4.6: Re-scan After Removal
After removing a symbol, check if its removal exposed NEW dead code:
- Were there imports that only existed to serve the removed symbol?
- Are there other symbols in the same file now unreferenced?
If new dead code is found, add it to the removal queue.
**Repeat 4.1-4.6 for every item. Mark remove as completed when done.**
---
## PHASE 5: FINAL VERIFICATION
**Mark final as in_progress.**
### 5.1: Full Test Suite
```bash
bun test
```
### 5.2: Full Typecheck
```bash
bun run typecheck
```
### 5.3: Full Build
```bash
bun run build
```
### 5.4: Summary Report
```markdown
## Dead Code Removal Complete
### Removed
| # | Symbol | File | Type | Commit |
|---|--------|------|------|--------|
| 1 | unusedFunc | src/foo.ts | function | abc1234 |
### Skipped (caused failures)
| # | Symbol | File | Reason |
|---|--------|------|--------|
| 1 | riskyFunc | src/bar.ts | Test failure: [details] |
### Verification
- Tests: PASSED (X/Y passing)
- Typecheck: CLEAN
- Build: SUCCESS
- Total dead code removed: N symbols across M files
- Total commits: K atomic commits
```
**Mark final as completed.**
---
## SCOPE CONTROL
**If $ARGUMENTS is provided**, narrow the scan to the specified scope:
- File path: Only scan that file
- Directory: Only scan that directory
- Symbol name: Only check that specific symbol
- "all" or empty: Full project scan (default)
## ABORT CONDITIONS
**STOP and report to user if:**
- 3 consecutive removals cause test failures
- Build breaks and cannot be fixed by reverting
- More than 50 candidates found (ask user to narrow scope)
## LANGUAGE
Use English for commit messages and technical output.
</command-instruction>
<user-request>
$ARGUMENTS
</user-request>

View File

@@ -1,12 +1,24 @@
# PROJECT KNOWLEDGE BASE
**Generated:** 2026-01-23T15:59:00+09:00
**Commit:** 599fad0e
**Generated:** 2026-01-26T14:50:00+09:00
**Commit:** 9d66b807
**Branch:** dev
---
## **IMPORTANT: PULL REQUEST TARGET BRANCH**
> **ALL PULL REQUESTS MUST TARGET THE `dev` BRANCH.**
>
> **DO NOT CREATE PULL REQUESTS TARGETING `master` BRANCH.**
>
> PRs to `master` will be automatically rejected by CI.
---
## OVERVIEW
OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemini 3 Flash, Grok Code, GLM-4.7). 31 lifecycle hooks, 20+ tools (LSP, AST-Grep, delegation), 10 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.
OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemini 3 Flash, Grok Code). 32 lifecycle hooks, 20+ tools (LSP, AST-Grep, delegation), 10 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.
## STRUCTURE
@@ -14,14 +26,14 @@ OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemi
oh-my-opencode/
├── src/
│ ├── agents/ # 10 AI agents - see src/agents/AGENTS.md
│ ├── hooks/ # 31 lifecycle hooks - see src/hooks/AGENTS.md
│ ├── hooks/ # 32 lifecycle hooks - see src/hooks/AGENTS.md
│ ├── tools/ # 20+ tools - see src/tools/AGENTS.md
│ ├── features/ # Background agents, Claude Code compat - see src/features/AGENTS.md
│ ├── shared/ # 50 cross-cutting utilities - see src/shared/AGENTS.md
│ ├── shared/ # 55 cross-cutting utilities - see src/shared/AGENTS.md
│ ├── cli/ # CLI installer, doctor - see src/cli/AGENTS.md
│ ├── mcp/ # Built-in MCPs - see src/mcp/AGENTS.md
│ ├── config/ # Zod schema, TypeScript types
│ └── index.ts # Main plugin entry (593 lines)
│ └── index.ts # Main plugin entry (672 lines)
├── script/ # build-schema.ts, build-binaries.ts
├── packages/ # 7 platform-specific binaries
└── dist/ # Build output (ESM + .d.ts)
@@ -36,9 +48,10 @@ oh-my-opencode/
| Add tool | `src/tools/` | Dir with index/types/constants/tools.ts |
| Add MCP | `src/mcp/` | Create config, add to index.ts |
| Add skill | `src/features/builtin-skills/` | Create dir with SKILL.md |
| Add command | `src/features/builtin-commands/` | Add template + register in commands.ts |
| Config schema | `src/config/schema.ts` | Zod schema, run `bun run build:schema` |
| Background agents | `src/features/background-agent/` | manager.ts (1335 lines) |
| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (773 lines) |
| Background agents | `src/features/background-agent/` | manager.ts (1377 lines) |
| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (752 lines) |
## TDD (Test-Driven Development)
@@ -50,8 +63,8 @@ oh-my-opencode/
**Rules:**
- NEVER write implementation before test
- NEVER delete failing tests - fix the code
- Test file: `*.test.ts` alongside source
- BDD comments: `#given`, `#when`, `#then`
- Test file: `*.test.ts` alongside source (100 test files)
- BDD comments: `//#given`, `//#when`, `//#then`
## CONVENTIONS
@@ -60,7 +73,7 @@ oh-my-opencode/
- **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
- **Exports**: Barrel pattern via index.ts
- **Naming**: kebab-case dirs, `createXXXHook`/`createXXXTool` factories
- **Testing**: BDD comments, 90 test files
- **Testing**: BDD comments, 100 test files
- **Temperature**: 0.1 for code agents, max 0.3
## ANTI-PATTERNS
@@ -88,9 +101,9 @@ oh-my-opencode/
| Sisyphus | anthropic/claude-opus-4-5 | Primary orchestrator |
| Atlas | anthropic/claude-opus-4-5 | Master orchestrator |
| oracle | openai/gpt-5.2 | Consultation, debugging |
| librarian | opencode/glm-4.7-free | Docs, GitHub search |
| explore | opencode/grok-code | Fast codebase grep |
| multimodal-looker | google/gemini-3-flash-preview | PDF/image analysis |
| librarian | opencode/big-pickle | Docs, GitHub search |
| explore | opencode/gpt-5-nano | Fast codebase grep |
| multimodal-looker | google/gemini-3-flash | PDF/image analysis |
| Prometheus | anthropic/claude-opus-4-5 | Strategic planning |
## COMMANDS
@@ -99,7 +112,7 @@ oh-my-opencode/
bun run typecheck # Type check
bun run build # ESM + declarations + schema
bun run rebuild # Clean + Build
bun test # 90 test files
bun test # 100 test files
```
## DEPLOYMENT
@@ -113,12 +126,14 @@ bun test # 90 test files
| File | Lines | Description |
|------|-------|-------------|
| `src/features/background-agent/manager.ts` | 1335 | Task lifecycle, concurrency |
| `src/features/builtin-skills/skills.ts` | 1203 | Skill definitions |
| `src/features/builtin-skills/skills.ts` | 1729 | Skill definitions |
| `src/features/background-agent/manager.ts` | 1377 | Task lifecycle, concurrency |
| `src/agents/prometheus-prompt.ts` | 1196 | Planning agent |
| `src/tools/delegate-task/tools.ts` | 1039 | Category-based delegation |
| `src/hooks/atlas/index.ts` | 773 | Orchestrator hook |
| `src/cli/config-manager.ts` | 641 | JSONC config parsing |
| `src/tools/delegate-task/tools.ts` | 1070 | Category-based delegation |
| `src/hooks/atlas/index.ts` | 752 | Orchestrator hook |
| `src/cli/config-manager.ts` | 664 | JSONC config parsing |
| `src/index.ts` | 672 | Main plugin entry |
| `src/features/builtin-commands/templates/refactor.ts` | 619 | Refactor command template |
## MCP ARCHITECTURE

View File

@@ -16,8 +16,8 @@
> [!TIP]
>
> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
> > **オーケストレーターがベータ版で利用可能になりました`oh-my-opencode@3.0.0-beta.10`を使用してインストールしてください。**
> [![Oh My OpenCode 3.0が正式リリースされました!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
> > **Oh My OpenCode 3.0が正式リリースされました`oh-my-opencode@latest`を使用してインストールしてください。**
>
> 一緒に歩みましょう!
>
@@ -73,7 +73,9 @@
[![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
[![License](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)
[English](README.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
[English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)
</div>

View File

@@ -16,8 +16,8 @@
>
> [!TIP]
>
> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
> > **오케스트레이터가 베타 버전으로 사용 가능합니다. 설치하려면 `oh-my-opencode@3.0.0-beta.10`을 사용하세요.**
> [![Oh My OpenCode 3.0이 정식 출시되었습니다!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
> > **Oh My OpenCode 3.0이 정식 출시되었습니다! `oh-my-opencode@latest`를 사용하여 설치하세요.**
>
> 함께해요!
>
@@ -73,10 +73,11 @@
[![GitHub Stars](https://img.shields.io/github/stars/code-yeongyu/oh-my-opencode?color=ffcb47&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/stargazers)
[![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
[![License](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)
[English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)
</div>
<!-- </CENTERED SECTION FOR GITHUB DISPLAY> -->

View File

@@ -16,8 +16,8 @@
> [!TIP]
>
> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
> > **The Orchestrator is now available in beta. Use `oh-my-opencode@3.0.0-beta.10` to install it.**
> [![Oh My OpenCode 3.0 is now stable!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
> > **Oh My OpenCode 3.0 is now stable! Use `oh-my-opencode@latest` to install it.**
>
> Be with us!
>

View File

@@ -16,8 +16,8 @@
> [!TIP]
>
> [![Orchestrator 现已进入测试阶段。](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
> > **Orchestrator 现已进入测试阶段。使用 `oh-my-opencode@3.0.0-beta.10` 安装。**
> [![Oh My OpenCode 3.0 正式发布!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
> > **Oh My OpenCode 3.0 正式发布!使用 `oh-my-opencode@latest` 安装。**
>
> 加入我们!
>
@@ -74,7 +74,9 @@
[![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
[![许可证](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)
[English](README.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
[English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)
</div>

View File

@@ -38,6 +38,7 @@
"type": "string",
"enum": [
"playwright",
"agent-browser",
"frontend-ui-ux",
"git-master"
]
@@ -70,12 +71,14 @@
"interactive-bash-session",
"thinking-block-validator",
"ralph-loop",
"category-skill-reminder",
"compaction-context-injector",
"claude-code-hooks",
"auto-slash-command",
"edit-error-recovery",
"delegate-task-retry",
"prometheus-md-only",
"sisyphus-junior-notepad",
"start-work",
"atlas"
]
@@ -1787,7 +1790,8 @@
"enum": [
"low",
"medium",
"high"
"high",
"xhigh"
]
},
"textVerbosity": {
@@ -2170,6 +2174,55 @@
"type": "boolean"
}
}
},
"browser_automation_engine": {
"type": "object",
"properties": {
"provider": {
"default": "playwright",
"type": "string",
"enum": [
"playwright",
"agent-browser"
]
}
}
},
"tmux": {
"type": "object",
"properties": {
"enabled": {
"default": false,
"type": "boolean"
},
"layout": {
"default": "main-vertical",
"type": "string",
"enum": [
"main-horizontal",
"main-vertical",
"tiled",
"even-horizontal",
"even-vertical"
]
},
"main_pane_size": {
"default": 60,
"type": "number",
"minimum": 20,
"maximum": 80
},
"main_pane_min_width": {
"default": 120,
"type": "number",
"minimum": 40
},
"agent_pane_min_width": {
"default": 40,
"type": "number",
"minimum": 20
}
}
}
}
}

View File

@@ -27,13 +27,13 @@
"typescript": "^5.7.3",
},
"optionalDependencies": {
"oh-my-opencode-darwin-arm64": "3.0.0-beta.11",
"oh-my-opencode-darwin-x64": "3.0.0-beta.11",
"oh-my-opencode-linux-arm64": "3.0.0-beta.11",
"oh-my-opencode-linux-arm64-musl": "3.0.0-beta.11",
"oh-my-opencode-linux-x64": "3.0.0-beta.11",
"oh-my-opencode-linux-x64-musl": "3.0.0-beta.11",
"oh-my-opencode-windows-x64": "3.0.0-beta.11",
"oh-my-opencode-darwin-arm64": "3.0.1",
"oh-my-opencode-darwin-x64": "3.0.1",
"oh-my-opencode-linux-arm64": "3.0.1",
"oh-my-opencode-linux-arm64-musl": "3.0.1",
"oh-my-opencode-linux-x64": "3.0.1",
"oh-my-opencode-linux-x64-musl": "3.0.1",
"oh-my-opencode-windows-x64": "3.0.1",
},
},
},
@@ -225,19 +225,19 @@
"object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.0.0-beta.11", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7cFv2bbz9HTY7sshgVTu+IhvYf7CT0czDYqHEB+dYfEqFU6TaoSMimq6uHqcWegUUR1T7PNmc0dyjYVw69FeVA=="],
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.0.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-LRcLVi6DsmGh3ICFeN4yVJ0KinvCM5jotd2z7tZQ74n0sziHO7grjK1CmJaPV9eCv0clatoK5xfFCeEJ3FvXYg=="],
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.0.0-beta.11", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-rGAbDdUySWITIdm2yiuNFB9lFYaSXT8LMtg97LTlOO5vZbI3M+obIS3QlIkBtAhgOTIPB7Ni+T0W44OmJpHoYA=="],
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.0.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ZaC0ZBe5M2f2aMncNsAMu9IZ3MjSPfNVcfUTCgJkp03db8lLPsajgjeG3556Er72hxignDPsEbrLkJBNlsDbAA=="],
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.0.0-beta.11", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-F9dqwWwGAdqeSkE7Tre5DmHQXwDpU2Z8Jk0lwTJMLj+kMqYFDVPjLPo4iVUdwPpxpmm0pR84u/oonG/2+84/zw=="],
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.0.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-pcOvV6Y2GSwKr0exDndeB2BtFt297XhJFQgrq1cbeEJawoRONDRp7LNSpjwILSQpQ7YkkYnO2bIczBmxI5llNA=="],
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.0.0-beta.11", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-H+zOtHkHd+TmdPj64M1A0zLOk7OHIK4C8yqfLFhfizOIBffT1yOhAs6EpK3EqPhfPLu54ADgcQcu8W96VP24UA=="],
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.0.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7kXKaVbgFnOMSaw+j4JbZNs7O7mkvCekcfWPwh/9I/0WD21/n4PbAGl01ePhRoQh+u9MC6t8FH046hEjL2sk1g=="],
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.0.0-beta.11", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-IG+KODTJ8rs6cEJ2wN6Zpr6YtvCS5OpYP6jBdGJltmUpjQdMhdMsaY3ysZk+9Vxpx2KC3xj5KLHV1USg3uBTeg=="],
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.0.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-1BOV1EnKa5BErhZmWiddnbriHwm1KFrPr+0BUCDdFX/d/hrMAJTo1733zaEnvKuXzvrdHSp/VznXheeUI1VjkA=="],
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.0.0-beta.11", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-irV+AuWrHqNm7VT7HO56qgymR0+vEfJbtB3vCq68kprH2V4NQmGp2MNKIYPnUCYL7NEK3H2NX+h06YFZJ/8ELQ=="],
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.0.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ASyTVatvU1nNJ0mk9o+A/GjybT5vOdgU172ystzCsnQ+12Mnv68GgaeMu/UFJgJNaZmKdhyUAP9XhnOKvEDBGQ=="],
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.0.0-beta.11", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-exZ/NEwGBlxyWszN7dvOfzbYX0cuhBZXftqAAFOlVP26elDHdo+AmSmLR/4cJyzpR9nCWz4xvl/RYF84bY6OEA=="],
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.0.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-QIuA564mVpwzCprhhAoyd8TSw0Rt2VM6M9y7H0fOoC/UjXuU+d7wIuUNuqUUMVaUnMedkctTZop0X0i2Q+Bvhg=="],
"on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

View File

@@ -21,13 +21,13 @@ A Category is an agent configuration preset optimized for specific domains.
| Category | Default Model | Use Cases |
|----------|---------------|-----------|
| `visual-engineering` | `google/gemini-3-pro-preview` | Frontend, UI/UX, design, styling, animation |
| `visual-engineering` | `google/gemini-3-pro` | Frontend, UI/UX, design, styling, animation |
| `ultrabrain` | `openai/gpt-5.2-codex` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
| `artistry` | `google/gemini-3-pro-preview` (max) | Highly creative/artistic tasks, novel ideas |
| `artistry` | `google/gemini-3-pro` (max) | Highly creative/artistic tasks, novel ideas |
| `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
| `unspecified-low` | `anthropic/claude-sonnet-4-5` | Tasks that don't fit other categories, low effort required |
| `unspecified-high` | `anthropic/claude-opus-4-5` (max) | Tasks that don't fit other categories, high effort required |
| `writing` | `google/gemini-3-flash-preview` | Documentation, prose, technical writing |
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
### Usage
@@ -70,12 +70,12 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to
### Usage
Add desired skill names to the `skills` array.
Add desired skill names to the `load_skills` array.
```typescript
delegate_task(
category="quick",
skills=["git-master"],
load_skills=["git-master"],
prompt="Commit current changes. Follow commit message style."
)
```
@@ -110,17 +110,17 @@ You can create powerful specialized agents by combining Categories and Skills.
### 🎨 The Designer (UI Implementation)
- **Category**: `visual-engineering`
- **Skills**: `["frontend-ui-ux", "playwright"]`
- **load_skills**: `["frontend-ui-ux", "playwright"]`
- **Effect**: Implements aesthetic UI and verifies rendering results directly in browser.
### 🏗️ The Architect (Design Review)
- **Category**: `ultrabrain`
- **Skills**: `[]` (pure reasoning)
- **load_skills**: `[]` (pure reasoning)
- **Effect**: Leverages GPT-5.2's logical reasoning for in-depth system architecture analysis.
### ⚡ The Maintainer (Quick Fixes)
- **Category**: `quick`
- **Skills**: `["git-master"]`
- **load_skills**: `["git-master"]`
- **Effect**: Uses cost-effective models to quickly fix code and generate clean commits.
---
@@ -131,7 +131,7 @@ When delegating, **clear and specific** prompts are essential. Include these 7 e
1. **TASK**: What needs to be done? (single objective)
2. **EXPECTED OUTCOME**: What is the deliverable?
3. **REQUIRED SKILLS**: Which skills should be used?
3. **REQUIRED SKILLS**: Which skills should be loaded via `load_skills`?
4. **REQUIRED TOOLS**: Which tools must be used? (whitelist)
5. **MUST DO**: What must be done (constraints)
6. **MUST NOT DO**: What must never be done
@@ -177,7 +177,7 @@ You can fine-tune categories in `oh-my-opencode.json`.
"categories": {
// 1. Define new custom category
"korean-writer": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
"temperature": 0.5,
"prompt_append": "You are a Korean technical writer. Maintain a friendly and clear tone."
},

View File

@@ -175,7 +175,7 @@ Configuration files support **JSONC (JSON with Comments)** format. You can use c
/* Category customization */
"categories": {
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
},
}

View File

@@ -22,13 +22,13 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
"agents": {
"oracle": { "model": "openai/gpt-5.2" }, // Use GPT for debugging
"librarian": { "model": "zai-coding-plan/glm-4.7" }, // Cheap model for research
"explore": { "model": "opencode/grok-code" } // Free model for grep
"explore": { "model": "opencode/gpt-5-nano" } // Free model for grep
},
// Override category models (used by delegate_task)
"categories": {
"quick": { "model": "opencode/grok-code" }, // Fast/cheap for trivial tasks
"visual-engineering": { "model": "google/gemini-3-pro-preview" } // Gemini for UI
"quick": { "model": "opencode/gpt-5-nano" }, // Fast/cheap for trivial tasks
"visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
}
}
```
@@ -75,7 +75,7 @@ When both `oh-my-opencode.jsonc` and `oh-my-opencode.json` files exist, `.jsonc`
"model": "openai/gpt-5.2" // GPT for strategic reasoning
},
"explore": {
"model": "opencode/grok-code" // Free & fast for exploration
"model": "opencode/gpt-5-nano" // Free & fast for exploration
},
},
}
@@ -83,7 +83,7 @@ When both `oh-my-opencode.jsonc` and `oh-my-opencode.json` files exist, `.jsonc`
## Google Auth
**Recommended**: For Google Gemini authentication, install the [`opencode-antigravity-auth`](https://github.com/NoeFabris/opencode-antigravity-auth) plugin. It provides multi-account load balancing, more models (including Claude via Antigravity), and active maintenance. See [Installation > Google Gemini](../README.md#google-gemini-antigravity-oauth).
**Recommended**: For Google Gemini authentication, install the [`opencode-antigravity-auth`](https://github.com/NoeFabris/opencode-antigravity-auth) plugin (`@latest`). It provides multi-account load balancing, variant-based thinking levels, dual quota system (Antigravity + Gemini CLI), and active maintenance. See [Installation > Google Gemini](docs/guide/installation.md#google-gemini-antigravity-oauth).
## Agents
@@ -159,8 +159,8 @@ Available agents: `oracle`, `librarian`, `explore`, `multimodal-looker`
Oh My OpenCode includes built-in skills that provide additional capabilities:
- **playwright**: Browser automation with Playwright MCP. Use for web scraping, testing, screenshots, and browser interactions.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', skills=['git-master'], ...)` to save context.
- **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.
Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:
@@ -170,7 +170,54 @@ Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-openc
}
```
Available built-in skills: `playwright`, `git-master`
Available built-in skills: `playwright`, `agent-browser`, `git-master`
## Browser Automation
Choose between two browser automation providers:
| Provider | Interface | Features | Installation |
|----------|-----------|----------|--------------|
| **playwright** (default) | MCP tools | Playwright MCP server with structured tool calls | Auto-installed via npx |
| **agent-browser** | Bash CLI | Vercel's CLI with session management, parallel browsers | Requires `bun add -g agent-browser` |
**Switch providers** via `browser_automation_engine` in `oh-my-opencode.json`:
```json
{
"browser_automation_engine": {
"provider": "agent-browser"
}
}
```
### Playwright (Default)
Uses the official Playwright MCP server (`@playwright/mcp`). Browser automation happens through structured MCP tool calls.
### agent-browser
Uses [Vercel's agent-browser CLI](https://github.com/vercel-labs/agent-browser). Key advantages:
- **Session management**: Run multiple isolated browser instances with `--session` flag
- **Persistent profiles**: Keep browser state across restarts with `--profile`
- **Snapshot-based workflow**: Get element refs via `snapshot -i`, interact with `@e1`, `@e2`, etc.
- **CLI-first**: All commands via Bash - great for scripting
**Installation required**:
```bash
bun add -g agent-browser
agent-browser install # Download Chromium
```
**Example workflow**:
```bash
agent-browser open https://example.com
agent-browser snapshot -i # Get interactive elements with refs
agent-browser fill @e1 "user@example.com"
agent-browser click @e2
agent-browser screenshot result.png
agent-browser close
```
## Git Master
@@ -305,7 +352,7 @@ Categories enable domain-specific task delegation via the `delegate_task` tool.
| Category | Model | Description |
| ---------------- | ----------------------------- | ---------------------------------------------------------------------------- |
| `visual` | `google/gemini-3-pro-preview` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7). |
| `visual` | `google/gemini-3-pro` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7). |
| `business-logic` | `openai/gpt-5.2` | Backend logic, architecture, strategic reasoning. Low creativity (temp 0.1). |
**Usage:**
@@ -332,7 +379,7 @@ Add custom categories in `oh-my-opencode.json`:
"prompt_append": "Focus on data analysis, ML pipelines, and statistical methods."
},
"visual": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"prompt_append": "Use shadcn/ui components and Tailwind CSS."
}
}
@@ -403,9 +450,9 @@ Each agent has a defined provider priority chain. The system tries providers in
|-------|-------------------|-------------------------|
| **Sisyphus** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
| **oracle** | `gpt-5.2` | openai → anthropic → google → github-copilot → opencode |
| **librarian** | `glm-4.7-free` | opencode → github-copilot → anthropic |
| **explore** | `grok-code` | opencode → anthropic → github-copilot |
| **multimodal-looker** | `gemini-3-flash-preview` | google → anthropic → zai → openai → github-copilot → opencode |
| **librarian** | `big-pickle` | opencode → github-copilot → anthropic |
| **explore** | `gpt-5-nano` | anthropic → opencode |
| **multimodal-looker** | `gemini-3-flash` | google → openai → zai-coding-plan → anthropic → opencode |
| **Prometheus (Planner)** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
| **Metis (Plan Consultant)** | `claude-sonnet-4-5` | anthropic → github-copilot → opencode → antigravity → google |
| **Momus (Plan Reviewer)** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
@@ -417,13 +464,13 @@ Categories follow the same resolution logic:
| Category | Model (no prefix) | Provider Priority Chain |
|----------|-------------------|-------------------------|
| **visual-engineering** | `gemini-3-pro-preview` | google → openai → anthropic → github-copilot → opencode |
| **visual-engineering** | `gemini-3-pro` | google → openai → anthropic → github-copilot → opencode |
| **ultrabrain** | `gpt-5.2-codex` | openai → anthropic → google → github-copilot → opencode |
| **artistry** | `gemini-3-pro-preview` | google → openai → anthropic → github-copilot → opencode |
| **artistry** | `gemini-3-pro` | google → openai → anthropic → github-copilot → opencode |
| **quick** | `claude-haiku-4-5` | anthropic → github-copilot → opencode → antigravity → google |
| **unspecified-low** | `claude-sonnet-4-5` | anthropic → github-copilot → opencode → antigravity → google |
| **unspecified-high** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
| **writing** | `gemini-3-flash-preview` | google → openai → anthropic → github-copilot → opencode |
| **writing** | `gemini-3-flash` | google → openai → anthropic → github-copilot → opencode |
### Checking Your Configuration

View File

@@ -12,9 +12,9 @@ Oh-My-OpenCode provides 10 specialized AI agents. Each has distinct expertise, o
|-------|-------|---------|
| **Sisyphus** | `anthropic/claude-opus-4-5` | **The default orchestrator.** Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Todo-driven workflow with extended thinking (32k budget). |
| **oracle** | `openai/gpt-5.2` | Architecture decisions, code review, debugging. Read-only consultation - stellar logical reasoning and deep analysis. Inspired by AmpCode. |
| **librarian** | `opencode/glm-4.7-free` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Inspired by AmpCode. |
| **explore** | `opencode/grok-code` | Fast codebase exploration and contextual grep. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code. |
| **multimodal-looker** | `google/gemini-3-flash-preview` | Visual content specialist. Analyzes PDFs, images, diagrams to extract information. Saves tokens by having another agent process media. |
| **librarian** | `opencode/big-pickle` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Inspired by AmpCode. |
| **explore** | `opencode/gpt-5-nano` | Fast codebase exploration and contextual grep. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code. |
| **multimodal-looker** | `google/gemini-3-flash` | Visual content specialist. Analyzes PDFs, images, diagrams to extract information. Saves tokens by having another agent process media. |
### Planning Agents
@@ -78,11 +78,15 @@ Skills provide specialized workflows with embedded MCP servers and detailed inst
| **frontend-ui-ux** | UI/UX tasks, styling | Designer-turned-developer persona. Crafts stunning UI/UX even without design mockups. Emphasizes bold aesthetic direction, distinctive typography, cohesive color palettes. |
| **git-master** | commit, rebase, squash, blame | MUST USE for ANY git operations. Atomic commits with automatic splitting, rebase/squash workflows, history search (blame, bisect, log -S). |
### Skill: playwright
### Skill: Browser Automation (playwright / agent-browser)
**Trigger**: Any browser-related request
Provides browser automation via Playwright MCP server:
Oh-My-OpenCode provides two browser automation providers, configurable via `browser_automation_engine.provider`:
#### Option 1: Playwright MCP (Default)
The default provider uses Playwright MCP server:
```yaml
mcp:
@@ -91,18 +95,41 @@ mcp:
args: ["@playwright/mcp@latest"]
```
**Capabilities**:
**Usage**:
```
/playwright Navigate to example.com and take a screenshot
```
#### Option 2: Agent Browser CLI (Vercel)
Alternative provider using [Vercel's agent-browser CLI](https://github.com/vercel-labs/agent-browser):
```json
{
"browser_automation_engine": {
"provider": "agent-browser"
}
}
```
**Requires installation**:
```bash
bun add -g agent-browser
```
**Usage**:
```
Use agent-browser to navigate to example.com and extract the main heading
```
#### Capabilities (Both Providers)
- Navigate and interact with web pages
- Take screenshots and PDFs
- Fill forms and click elements
- Wait for network requests
- Scrape content
**Usage**:
```
/playwright Navigate to example.com and take a screenshot
```
### Skill: frontend-ui-ux
**Trigger**: UI design tasks, visual changes

View File

@@ -132,7 +132,7 @@ First, add the opencode-antigravity-auth plugin:
{
"plugin": [
"oh-my-opencode",
"opencode-antigravity-auth@1.2.8"
"opencode-antigravity-auth@latest"
]
}
```
@@ -140,7 +140,7 @@ First, add the opencode-antigravity-auth plugin:
##### Model Configuration
You'll also need full model settings in `opencode.json`.
Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy provider/models config from the README, and merge carefully to avoid breaking the user's existing setup.
Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy the full model configuration from the README, and merge carefully to avoid breaking the user's existing setup. The plugin now uses a **variant system** — models like `antigravity-gemini-3-pro` support `low`/`high` variants instead of separate `-low`/`-high` model entries.
##### oh-my-opencode Agent Model Override
@@ -154,7 +154,17 @@ The `opencode-antigravity-auth` plugin uses different model names than the built
}
```
**Available model names**: `google/antigravity-gemini-3-pro-high`, `google/antigravity-gemini-3-pro-low`, `google/antigravity-gemini-3-flash`, `google/antigravity-claude-sonnet-4-5`, `google/antigravity-claude-sonnet-4-5-thinking-low`, `google/antigravity-claude-sonnet-4-5-thinking-medium`, `google/antigravity-claude-sonnet-4-5-thinking-high`, `google/antigravity-claude-opus-4-5-thinking-low`, `google/antigravity-claude-opus-4-5-thinking-medium`, `google/antigravity-claude-opus-4-5-thinking-high`, `google/gemini-3-pro-preview`, `google/gemini-3-flash-preview`, `google/gemini-2.5-pro`, `google/gemini-2.5-flash`
**Available models (Antigravity quota)**:
- `google/antigravity-gemini-3-pro` — variants: `low`, `high`
- `google/antigravity-gemini-3-flash` — variants: `minimal`, `low`, `medium`, `high`
- `google/antigravity-claude-sonnet-4-5` — no variants
- `google/antigravity-claude-sonnet-4-5-thinking` — variants: `low`, `max`
- `google/antigravity-claude-opus-4-5-thinking` — variants: `low`, `max`
**Available models (Gemini CLI quota)**:
- `google/gemini-2.5-flash`, `google/gemini-2.5-pro`, `google/gemini-3-flash-preview`, `google/gemini-3-pro-preview`
> **Note**: Legacy tier-suffixed names like `google/antigravity-gemini-3-pro-high` still work but variants are recommended. Use `--variant=high` with the base model name instead.
Then authenticate:
@@ -183,7 +193,7 @@ When GitHub Copilot is the best available provider, oh-my-opencode uses these mo
| ------------- | -------------------------------- |
| **Sisyphus** | `github-copilot/claude-opus-4.5` |
| **Oracle** | `github-copilot/gpt-5.2` |
| **Explore** | `github-copilot/grok-code-fast-1`|
| **Explore** | `opencode/gpt-5-nano` |
| **Librarian** | `zai-coding-plan/glm-4.7` (if Z.ai available) or fallback |
GitHub Copilot acts as a proxy provider, routing requests to underlying models based on your subscription.
@@ -203,7 +213,7 @@ If Z.ai is the only provider available, all agents will use GLM models:
#### OpenCode Zen
OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-5`, `opencode/gpt-5.2`, `opencode/grok-code`, and `opencode/glm-4.7-free`.
OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-5`, `opencode/gpt-5.2`, `opencode/gpt-5-nano`, and `opencode/big-pickle`.
When OpenCode Zen is the best available provider (no native or Copilot), these models are used:
@@ -211,8 +221,8 @@ When OpenCode Zen is the best available provider (no native or Copilot), these m
| ------------- | -------------------------------- |
| **Sisyphus** | `opencode/claude-opus-4-5` |
| **Oracle** | `opencode/gpt-5.2` |
| **Explore** | `opencode/grok-code` |
| **Librarian** | `opencode/glm-4.7-free` |
| **Explore** | `opencode/gpt-5-nano` |
| **Librarian** | `opencode/big-pickle` |
##### Setup

View File

@@ -112,12 +112,12 @@ Each agent has a **provider priority chain**. The system tries providers in orde
```
Example: multimodal-looker
google → anthropic → zai → openai → github-copilot → opencode
↓ ↓
gemini haiku glm-4.6v gpt-5.2 fallback fallback
google → openai → zai-coding-plan → anthropic → opencode
↓ ↓ ↓ ↓ ↓
gemini gpt-5.2 glm-4.6v haiku gpt-5-nano
```
If you have Gemini, it uses `google/gemini-3-flash-preview`. No Gemini but have Claude? Uses `anthropic/claude-haiku-4-5`. And so on.
If you have Gemini, it uses `google/gemini-3-flash`. No Gemini but have Claude? Uses `anthropic/claude-haiku-4-5`. And so on.
### Example Configuration
@@ -128,14 +128,14 @@ Here's a real-world config for a user with **Claude, OpenAI, Gemini, and Z.ai**
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
// Override specific agents only - rest use fallback chain
"Atlas": { "model": "anthropic/claude-sonnet-4-5", "variant": "max" },
"atlas": { "model": "anthropic/claude-sonnet-4-5", "variant": "max" },
"librarian": { "model": "zai-coding-plan/glm-4.7" },
"explore": { "model": "opencode/grok-code" },
"explore": { "model": "opencode/gpt-5-nano" },
"multimodal-looker": { "model": "zai-coding-plan/glm-4.6v" }
},
"categories": {
// Override categories for cost optimization
"quick": { "model": "opencode/grok-code" },
"quick": { "model": "opencode/gpt-5-nano" },
"unspecified-low": { "model": "zai-coding-plan/glm-4.7" }
},
"experimental": {

View File

@@ -326,13 +326,13 @@ Skills prepend specialized instructions to subagent prompts:
// Category + Skill combination
delegate_task(
category="visual-engineering",
skills=["frontend-ui-ux"], // Adds UI/UX expertise
load_skills=["frontend-ui-ux"], // Adds UI/UX expertise
prompt="..."
)
delegate_task(
category="general",
skills=["playwright"], // Adds browser automation expertise
load_skills=["playwright"], // Adds browser automation expertise
prompt="..."
)
```
@@ -341,8 +341,8 @@ delegate_task(
| Before | After |
|--------|-------|
| Hardcoded: `frontend-ui-ux-engineer` (Gemini 3 Pro) | `category="visual-engineering" + skills=["frontend-ui-ux"]` |
| One-size-fits-all | `category="visual-engineering" + skills=["unity-master"]` |
| Hardcoded: `frontend-ui-ux-engineer` (Gemini 3 Pro) | `category="visual-engineering" + load_skills=["frontend-ui-ux"]` |
| One-size-fits-all | `category="visual-engineering" + load_skills=["unity-master"]` |
| Model bias | Category-based: model abstraction eliminates bias |
---
@@ -365,7 +365,7 @@ sequenceDiagram
Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
Orchestrator->>Junior: delegate_task(category, skills, prompt)
Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
Junior->>Junior: Create todos, execute
Junior->>Junior: Verify (lsp_diagnostics, tests)

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -73,13 +73,13 @@
"typescript": "^5.7.3"
},
"optionalDependencies": {
"oh-my-opencode-darwin-arm64": "3.0.0-beta.14",
"oh-my-opencode-darwin-x64": "3.0.0-beta.14",
"oh-my-opencode-linux-arm64": "3.0.0-beta.14",
"oh-my-opencode-linux-arm64-musl": "3.0.0-beta.14",
"oh-my-opencode-linux-x64": "3.0.0-beta.14",
"oh-my-opencode-linux-x64-musl": "3.0.0-beta.14",
"oh-my-opencode-windows-x64": "3.0.0-beta.14"
"oh-my-opencode-darwin-arm64": "3.1.0",
"oh-my-opencode-darwin-x64": "3.1.0",
"oh-my-opencode-linux-arm64": "3.1.0",
"oh-my-opencode-linux-arm64-musl": "3.1.0",
"oh-my-opencode-linux-x64": "3.1.0",
"oh-my-opencode-linux-x64-musl": "3.1.0",
"oh-my-opencode-windows-x64": "3.1.0"
},
"trustedDependencies": [
"@ast-grep/cli",

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode-darwin-arm64",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
"license": "MIT",
"repository": {

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode-darwin-x64",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
"license": "MIT",
"repository": {

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode-linux-arm64-musl",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
"license": "MIT",
"repository": {

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode-linux-arm64",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
"license": "MIT",
"repository": {

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode-linux-x64-musl",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
"license": "MIT",
"repository": {

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode-linux-x64",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "Platform-specific binary for oh-my-opencode (linux-x64)",
"license": "MIT",
"repository": {

View File

@@ -1,6 +1,6 @@
{
"name": "oh-my-opencode-windows-x64",
"version": "3.0.0-beta.14",
"version": "3.1.0",
"description": "Platform-specific binary for oh-my-opencode (windows-x64)",
"license": "MIT",
"repository": {

View File

@@ -735,6 +735,86 @@
"created_at": "2026-01-23T08:27:02Z",
"repoId": 1108837393,
"pullRequestNo": 985
},
{
"name": "RouHim",
"id": 3582050,
"comment_id": 3791988227,
"created_at": "2026-01-23T19:32:01Z",
"repoId": 1108837393,
"pullRequestNo": 1031
},
{
"name": "gongxh0901",
"id": 15622561,
"comment_id": 3793478620,
"created_at": "2026-01-24T02:15:02Z",
"repoId": 1108837393,
"pullRequestNo": 1037
},
{
"name": "gongxh0901",
"id": 15622561,
"comment_id": 3793521632,
"created_at": "2026-01-24T02:23:34Z",
"repoId": 1108837393,
"pullRequestNo": 1037
},
{
"name": "AndersHsueh",
"id": 121805544,
"comment_id": 3793787614,
"created_at": "2026-01-24T04:41:46Z",
"repoId": 1108837393,
"pullRequestNo": 1042
},
{
"name": "AamiRobin",
"id": 22963668,
"comment_id": 3794632200,
"created_at": "2026-01-24T13:28:22Z",
"repoId": 1108837393,
"pullRequestNo": 1067
},
{
"name": "ThanhNguyxn",
"id": 74597207,
"comment_id": 3795232176,
"created_at": "2026-01-24T17:41:53Z",
"repoId": 1108837393,
"pullRequestNo": 1075
},
{
"name": "sadnow",
"id": 87896100,
"comment_id": 3795495342,
"created_at": "2026-01-24T20:49:29Z",
"repoId": 1108837393,
"pullRequestNo": 1080
},
{
"name": "jsl9208",
"id": 4048787,
"comment_id": 3795582626,
"created_at": "2026-01-24T21:41:24Z",
"repoId": 1108837393,
"pullRequestNo": 1082
},
{
"name": "potb",
"id": 10779093,
"comment_id": 3795856573,
"created_at": "2026-01-25T02:38:16Z",
"repoId": 1108837393,
"pullRequestNo": 1083
},
{
"name": "kvokka",
"id": 15954013,
"comment_id": 3795884358,
"created_at": "2026-01-25T03:13:52Z",
"repoId": 1108837393,
"pullRequestNo": 1084
}
]
}

View File

@@ -239,7 +239,7 @@ Ask yourself:
I will use delegate_task with:
- **Category**: [selected-category-name]
- **Why this category**: [how category description matches task domain]
- **Skills**: [list of selected skills]
- **load_skills**: [list of selected skills]
- **Skill evaluation**:
- [skill-1]: INCLUDED because [reason based on skill description]
- [skill-2]: OMITTED because [reason why skill domain doesn't apply]
@@ -256,7 +256,7 @@ I will use delegate_task with:
I will use delegate_task with:
- **Category**: [category-name]
- **Why this category**: Category description says "[quote description]" which matches this task's requirements
- **Skills**: ["skill-a", "skill-b"]
- **load_skills**: ["skill-a", "skill-b"]
- **Skill evaluation**:
- skill-a: INCLUDED - description says "[quote]" which applies to this task
- skill-b: INCLUDED - description says "[quote]" which is needed here
@@ -265,7 +265,7 @@ I will use delegate_task with:
delegate_task(
category="[category-name]",
skills=["skill-a", "skill-b"],
load_skills=["skill-a", "skill-b"],
prompt="..."
)
```
@@ -276,12 +276,12 @@ delegate_task(
I will use delegate_task with:
- **Agent**: [agent-name]
- **Reason**: This requires [agent's specialty] based on agent description
- **Skills**: [] (agents have built-in expertise)
- **load_skills**: [] (agents have built-in expertise)
- **Expected Outcome**: [what agent should return]
delegate_task(
subagent_type="[agent-name]",
skills=[],
load_skills=[],
prompt="..."
)
```
@@ -292,13 +292,13 @@ delegate_task(
I will use delegate_task with:
- **Agent**: explore
- **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
- **Skills**: []
- **load_skills**: []
- **Expected Outcome**: List of files containing auth patterns
delegate_task(
subagent_type="explore",
run_in_background=true,
skills=[],
load_skills=[],
prompt="Find all authentication implementations in the codebase"
)
```
@@ -306,7 +306,7 @@ delegate_task(
**WRONG: No Skill Evaluation**
```
delegate_task(category="...", skills=[], prompt="...") // Where's the justification?
delegate_task(category="...", load_skills=[], prompt="...") // Where's the justification?
```
**WRONG: Vague Category Selection**
@@ -329,11 +329,11 @@ I'll use this category because it seems right.
```typescript
// CORRECT: Always background, always parallel
// Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find auth implementations in our codebase...")
delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find error handling patterns here...")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
// Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find JWT best practices in official docs...")
delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find how production apps handle auth in Express...")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
// Continue working immediately. Collect with background_output when needed.
// WRONG: Sequential or blocking
@@ -416,7 +416,7 @@ Skills inject specialized instructions into the subagent. Read the description t
For EVERY skill listed above, ask yourself:
> "Does this skill's expertise domain overlap with my task?"
- If YES → INCLUDE in `skills=[...]`
- If YES → INCLUDE in `load_skills=[...]`
- If NO → You MUST justify why (see below)
**STEP 3: Justify Omissions**
@@ -444,14 +444,14 @@ SKILL EVALUATION for "[skill-name]":
```typescript
delegate_task(
category="[selected-category]",
skills=["skill-1", "skill-2"], // Include ALL relevant skills
load_skills=["skill-1", "skill-2"], // Include ALL relevant skills
prompt="..."
)
```
**ANTI-PATTERN (will produce poor results):**
```typescript
delegate_task(category="...", skills=[], prompt="...") // Empty skills without justification
delegate_task(category="...", load_skills=[], prompt="...") // Empty load_skills without justification
```
### Delegation Table:
@@ -724,7 +724,7 @@ If the user's approach seems problematic:
| **Error Handling** | Empty catch blocks `catch(e) {}` |
| **Testing** | Deleting failing tests to "pass" |
| **Search** | Firing agents for single-line typos or obvious syntax errors |
| **Delegation** | Using `skills=[]` without justifying why no skills apply |
| **Delegation** | Using `load_skills=[]` without justifying why no skills apply |
| **Debugging** | Shotgun debugging, random changes |
## Soft Guidelines

View File

@@ -1,53 +1,48 @@
# AGENTS KNOWLEDGE BASE
## OVERVIEW
10 AI agents for multi-model orchestration. Sisyphus (primary), Atlas (orchestrator), oracle, librarian, explore, multimodal-looker, Prometheus, Metis, Momus, Sisyphus-Junior.
## STRUCTURE
```
agents/
├── atlas.ts # Master Orchestrator (543 lines)
├── sisyphus.ts # Main prompt (615 lines)
├── sisyphus-junior.ts # Delegated task executor
├── dynamic-agent-prompt-builder.ts # Dynamic prompt generation
├── atlas.ts # Master Orchestrator (holds todo list)
├── sisyphus.ts # Main prompt (SF Bay Area engineer identity)
├── sisyphus-junior.ts # Delegated task executor (category-spawned)
├── oracle.ts # Strategic advisor (GPT-5.2)
├── librarian.ts # Multi-repo research (GLM-4.7-free)
├── explore.ts # Fast grep (Grok Code)
├── librarian.ts # Multi-repo research (GitHub CLI, Context7)
├── explore.ts # Fast contextual grep (Grok Code)
├── multimodal-looker.ts # Media analyzer (Gemini 3 Flash)
├── prometheus-prompt.ts # Planning (1196 lines)
├── metis.ts # Plan consultant
├── momus.ts # Plan reviewer
├── prometheus-prompt.ts # Planning (Interview/Consultant mode, 1196 lines)
├── metis.ts # Pre-planning analysis (Gap detection)
├── momus.ts # Plan reviewer (Ruthless fault-finding)
├── dynamic-agent-prompt-builder.ts # Dynamic prompt generation
├── types.ts # AgentModelConfig, AgentPromptMetadata
├── utils.ts # createBuiltinAgents(), resolveModelWithFallback()
└── index.ts # builtinAgents export
```
## AGENT MODELS
| Agent | Model | Temp | Purpose |
|-------|-------|------|---------|
| Sisyphus | anthropic/claude-opus-4-5 | 0.1 | Primary orchestrator |
| Atlas | anthropic/claude-opus-4-5 | 0.1 | Master orchestrator |
| oracle | openai/gpt-5.2 | 0.1 | Consultation, debugging |
| librarian | opencode/glm-4.7-free | 0.1 | Docs, GitHub search |
| explore | opencode/grok-code | 0.1 | Fast contextual grep |
| multimodal-looker | google/gemini-3-flash-preview | 0.1 | PDF/image analysis |
| librarian | opencode/big-pickle | 0.1 | Docs, GitHub search |
| explore | opencode/gpt-5-nano | 0.1 | Fast contextual grep |
| multimodal-looker | google/gemini-3-flash | 0.1 | PDF/image analysis |
| Prometheus | anthropic/claude-opus-4-5 | 0.1 | Strategic planning |
| Metis | anthropic/claude-sonnet-4-5 | 0.3 | Pre-planning analysis |
| Momus | anthropic/claude-sonnet-4-5 | 0.1 | Plan validation |
| Sisyphus-Junior | anthropic/claude-sonnet-4-5 | 0.1 | Category-spawned executor |
## HOW TO ADD
1. Create `src/agents/my-agent.ts` exporting factory + metadata
2. Add to `agentSources` in `src/agents/utils.ts`
3. Update `AgentNameSchema` in `src/config/schema.ts`
4. Register in `src/index.ts` initialization
1. Create `src/agents/my-agent.ts` exporting factory + metadata.
2. Add to `agentSources` in `src/agents/utils.ts`.
3. Update `AgentNameSchema` in `src/config/schema.ts`.
4. Register in `src/index.ts` initialization.
## TOOL RESTRICTIONS
| Agent | Denied Tools |
|-------|-------------|
| oracle | write, edit, task, delegate_task |
@@ -57,14 +52,13 @@ agents/
| Sisyphus-Junior | task, delegate_task |
## PATTERNS
- **Factory**: `createXXXAgent(model?: string): AgentConfig`
- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers
- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`
- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas
- **Factory**: `createXXXAgent(model: string): AgentConfig`
- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers.
- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`.
- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas.
## ANTI-PATTERNS
- **Trust reports**: NEVER trust "I'm done" - verify outputs
- **High temp**: Don't use >0.3 for code agents
- **Sequential calls**: Use `delegate_task` with `run_in_background`
- **Trust reports**: NEVER trust "I'm done" - verify outputs.
- **High temp**: Don't use >0.3 for code agents.
- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration.
- **Prometheus writing code**: Planner only - never implements.

View File

@@ -58,7 +58,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
${categoryRows.join("\n")}
\`\`\`typescript
delegate_task(category="[category-name]", skills=[...], prompt="...")
delegate_task(category="[category-name]", load_skills=[...], prompt="...")
\`\`\``
}
@@ -84,12 +84,12 @@ ${skillRows.join("\n")}
**MANDATORY: Evaluate ALL skills for relevance to your task.**
Read each skill's description and ask: "Does this skill's domain overlap with my task?"
- If YES: INCLUDE in skills=[...]
- If YES: INCLUDE in load_skills=[...]
- If NO: You MUST justify why in your pre-delegation declaration
**Usage:**
\`\`\`typescript
delegate_task(category="[category]", skills=["skill-1", "skill-2"], prompt="...")
delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], prompt="...")
\`\`\`
**IMPORTANT:**
@@ -102,7 +102,7 @@ function buildDecisionMatrix(agents: AvailableAgent[], userCategories?: Record<s
const allCategories = { ...DEFAULT_CATEGORIES, ...userCategories }
const categoryRows = Object.entries(allCategories).map(([name]) =>
`| ${getCategoryDescription(name, userCategories)} | \`category="${name}", skills=[...]\` |`
`| ${getCategoryDescription(name, userCategories)} | \`category="${name}", load_skills=[...]\` |`
)
const agentRows = agents.map((a) => {
@@ -323,7 +323,7 @@ delegate_task(
**If verification fails**: Resume the SAME session with the ACTUAL error output:
\`\`\`typescript
delegate_task(
resume="ses_xyz789", // ALWAYS use the session from the failed task
session_id="ses_xyz789", // ALWAYS use the session from the failed task
load_skills=[...],
prompt="Verification failed: {actual error}. Fix."
)
@@ -331,24 +331,24 @@ delegate_task(
### 3.5 Handle Failures (USE RESUME)
**CRITICAL: When re-delegating, ALWAYS use \`resume\` parameter.**
**CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**
Every \`delegate_task()\` output includes a session_id. STORE IT.
If task fails:
1. Identify what went wrong
2. **Resume the SAME session** - subagent has full context already:
\`\`\`typescript
delegate_task(
resume="ses_xyz789", // Session from failed task
load_skills=[...],
prompt="FAILED: {error}. Fix by: {specific instruction}"
)
\`\`\`
\`\`\`typescript
delegate_task(
session_id="ses_xyz789", // Session from failed task
load_skills=[...],
prompt="FAILED: {error}. Fix by: {specific instruction}"
)
\`\`\`
3. Maximum 3 retry attempts with the SAME session
4. If blocked after 3 attempts: Document and continue to independent tasks
**Why resume is MANDATORY for failures:**
**Why session_id is MANDATORY for failures:**
- Subagent already read all files, knows the context
- No repeated exploration = 70%+ token savings
- Subagent knows what approaches already failed
@@ -493,7 +493,7 @@ You are the QA gate. Subagents lie. Verify EVERYTHING.
- Parallelize independent tasks
- Verify with your own tools
- **Store session_id from every delegation output**
- **Use \`resume="{session_id}"\` for retries, fixes, and follow-ups**
- **Use \`session_id="{session_id}"\` for retries, fixes, and follow-ups**
</critical_overrides>
`

View File

@@ -20,32 +20,6 @@ ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
You work ALONE for implementation. No delegation of implementation tasks.
</Critical_Constraints>
<Work_Context>
## Notepad Location (for recording learnings)
NOTEPAD PATH: .sisyphus/notepads/{plan-name}/
- learnings.md: Record patterns, conventions, successful approaches
- issues.md: Record problems, blockers, gotchas encountered
- decisions.md: Record architectural choices and rationales
- problems.md: Record unresolved issues, technical debt
You SHOULD append findings to notepad files after completing work.
IMPORTANT: Always APPEND to notepad files - never overwrite or use Edit tool.
## Plan Location (READ ONLY)
PLAN PATH: .sisyphus/plans/{plan-name}.md
CRITICAL RULE: NEVER MODIFY THE PLAN FILE
The plan file (.sisyphus/plans/*.md) is SACRED and READ-ONLY.
- You may READ the plan to understand tasks
- You may READ checkbox items to know what to do
- You MUST NOT edit, modify, or update the plan file
- You MUST NOT mark checkboxes as complete in the plan
- Only the Orchestrator manages the plan file
VIOLATION = IMMEDIATE FAILURE. The Orchestrator tracks plan state.
</Work_Context>
<Todo_Discipline>
TODO OBSESSION (NON-NEGOTIABLE):
- 2+ steps → todowrite FIRST, atomic breakdown

View File

@@ -144,11 +144,11 @@ ${librarianSection}
\`\`\`typescript
// CORRECT: Always background, always parallel
// Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find auth implementations in our codebase...")
delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find error handling patterns here...")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
// Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find JWT best practices in official docs...")
delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find how production apps handle auth in Express...")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
// Continue working immediately. Collect with background_output when needed.
// WRONG: Sequential or blocking
@@ -209,15 +209,15 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
Every \`delegate_task()\` output includes a session_id. **USE IT.**
**ALWAYS resume when:**
**ALWAYS continue when:**
| Scenario | Action |
|----------|--------|
| Task failed/incomplete | \`resume="{session_id}", prompt="Fix: {specific error}"\` |
| Follow-up question on result | \`resume="{session_id}", prompt="Also: {question}"\` |
| Multi-turn with same agent | \`resume="{session_id}"\` - NEVER start fresh |
| Verification failed | \`resume="{session_id}", prompt="Failed verification: {error}. Fix."\` |
| Task failed/incomplete | \`session_id="{session_id}", prompt="Fix: {specific error}"\` |
| Follow-up question on result | \`session_id="{session_id}", prompt="Also: {question}"\` |
| Multi-turn with same agent | \`session_id="{session_id}"\` - NEVER start fresh |
| Verification failed | \`session_id="{session_id}", prompt="Failed verification: {error}. Fix."\` |
**Why resume is CRITICAL:**
**Why session_id is CRITICAL:**
- Subagent has FULL conversation context preserved
- No repeated file reads, exploration, or setup
- Saves 70%+ tokens on follow-ups
@@ -228,10 +228,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**
delegate_task(category="quick", prompt="Fix the type error in auth.ts...")
// CORRECT: Resume preserves everything
delegate_task(resume="ses_abc123", prompt="Fix: Type error on line 42")
delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
\`\`\`
**After EVERY delegation, STORE the session_id for potential resume.**
**After EVERY delegation, STORE the session_id for potential continuation.**
### Code Changes:
- Match existing patterns (if codebase is disciplined)

View File

@@ -1,6 +1,7 @@
import { describe, test, expect } from "bun:test"
import { describe, test, expect, beforeEach } from "bun:test"
import { createBuiltinAgents } from "./utils"
import type { AgentConfig } from "@opencode-ai/sdk"
import { clearSkillCache } from "../features/opencode-skill-loader/skill-content"
const TEST_DEFAULT_MODEL = "anthropic/claude-opus-4-5"
@@ -109,6 +110,10 @@ describe("buildAgent with category and skills", () => {
const { buildAgent } = require("./utils")
const TEST_MODEL = "anthropic/claude-opus-4-5"
beforeEach(() => {
clearSkillCache()
})
test("agent with category inherits category settings", () => {
// #given - agent factory that sets category but no model
const source = {
@@ -123,7 +128,7 @@ describe("buildAgent with category and skills", () => {
const agent = buildAgent(source["test-agent"], TEST_MODEL)
// #then - category's built-in model is applied
expect(agent.model).toBe("google/gemini-3-pro-preview")
expect(agent.model).toBe("google/gemini-3-pro")
})
test("agent with category and existing model keeps existing model", () => {
@@ -308,4 +313,42 @@ describe("buildAgent with category and skills", () => {
// #then
expect(agent.prompt).toBe("Base prompt")
})
test("agent with agent-browser skill resolves when browserProvider is set", () => {
// #given
const source = {
"test-agent": () =>
({
description: "Test agent",
skills: ["agent-browser"],
prompt: "Base prompt",
}) as AgentConfig,
}
// #when - browserProvider is "agent-browser"
const agent = buildAgent(source["test-agent"], TEST_MODEL, undefined, undefined, "agent-browser")
// #then - agent-browser skill content should be in prompt
expect(agent.prompt).toContain("agent-browser")
expect(agent.prompt).toContain("Base prompt")
})
test("agent with agent-browser skill NOT resolved when browserProvider not set", () => {
// #given
const source = {
"test-agent": () =>
({
description: "Test agent",
skills: ["agent-browser"],
prompt: "Base prompt",
}) as AgentConfig,
}
// #when - no browserProvider (defaults to playwright)
const agent = buildAgent(source["test-agent"], TEST_MODEL)
// #then - agent-browser skill not found, only base prompt remains
expect(agent.prompt).toBe("Base prompt")
expect(agent.prompt).not.toContain("agent-browser open")
})
})

View File

@@ -10,11 +10,12 @@ import { createMetisAgent } from "./metis"
import { createAtlasAgent } from "./atlas"
import { createMomusAgent } from "./momus"
import type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder"
import { deepMerge, fetchAvailableModels, resolveModelWithFallback, AGENT_MODEL_REQUIREMENTS, findCaseInsensitive, includesCaseInsensitive } from "../shared"
import { deepMerge, fetchAvailableModels, resolveModelWithFallback, AGENT_MODEL_REQUIREMENTS, findCaseInsensitive, includesCaseInsensitive, readConnectedProvidersCache } from "../shared"
import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants"
import { resolveMultipleSkills } from "../features/opencode-skill-loader/skill-content"
import { createBuiltinSkills } from "../features/builtin-skills"
import type { LoadedSkill, SkillScope } from "../features/opencode-skill-loader/types"
import type { BrowserAutomationProvider } from "../config/schema"
type AgentSource = AgentFactory | AgentConfig
@@ -50,7 +51,8 @@ export function buildAgent(
source: AgentSource,
model: string,
categories?: CategoriesConfig,
gitMasterConfig?: GitMasterConfig
gitMasterConfig?: GitMasterConfig,
browserProvider?: BrowserAutomationProvider
): AgentConfig {
const base = isFactory(source) ? source(model) : source
const categoryConfigs: Record<string, CategoryConfig> = categories
@@ -74,7 +76,7 @@ export function buildAgent(
}
if (agentWithCategory.skills?.length) {
const { resolved } = resolveMultipleSkills(agentWithCategory.skills, { gitMasterConfig })
const { resolved } = resolveMultipleSkills(agentWithCategory.skills, { gitMasterConfig, browserProvider })
if (resolved.size > 0) {
const skillContent = Array.from(resolved.values()).join("\n\n")
base.prompt = skillContent + (base.prompt ? "\n\n" + base.prompt : "")
@@ -146,14 +148,17 @@ export async function createBuiltinAgents(
categories?: CategoriesConfig,
gitMasterConfig?: GitMasterConfig,
discoveredSkills: LoadedSkill[] = [],
client?: any
client?: any,
browserProvider?: BrowserAutomationProvider
): Promise<Record<string, AgentConfig>> {
if (!systemDefaultModel) {
throw new Error("createBuiltinAgents requires systemDefaultModel")
}
// Fetch available models at plugin init
const availableModels = client ? await fetchAvailableModels(client) : new Set<string>()
const connectedProviders = readConnectedProvidersCache()
const availableModels = client
? await fetchAvailableModels(client, { connectedProviders: connectedProviders ?? undefined })
: new Set<string>()
const result: Record<string, AgentConfig> = {}
const availableAgents: AvailableAgent[] = []
@@ -167,7 +172,7 @@ export async function createBuiltinAgents(
description: categories?.[name]?.description ?? CATEGORY_DESCRIPTIONS[name] ?? "General tasks",
}))
const builtinSkills = createBuiltinSkills()
const builtinSkills = createBuiltinSkills({ browserProvider })
const builtinSkillNames = new Set(builtinSkills.map(s => s.name))
const builtinAvailable: AvailableSkill[] = builtinSkills.map((skill) => ({
@@ -204,7 +209,7 @@ export async function createBuiltinAgents(
systemDefaultModel,
})
let config = buildAgent(source, model, mergedCategories, gitMasterConfig)
let config = buildAgent(source, model, mergedCategories, gitMasterConfig, browserProvider)
// Apply variant from override or resolved fallback chain
if (override?.variant) {

View File

@@ -8,16 +8,17 @@ CLI entry: `bunx oh-my-opencode`. Interactive installer, doctor diagnostics. Com
```
cli/
├── index.ts # Commander.js entry
├── index.ts # Commander.js entry (4 commands)
├── install.ts # Interactive TUI (520 lines)
├── config-manager.ts # JSONC parsing (641 lines)
├── config-manager.ts # JSONC parsing (664 lines)
├── types.ts # InstallArgs, InstallConfig
├── model-fallback.ts # Model fallback configuration
├── doctor/
│ ├── index.ts # Doctor entry
│ ├── runner.ts # Check orchestration
│ ├── formatter.ts # Colored output
│ ├── constants.ts # Check IDs, symbols
│ ├── types.ts # CheckResult, CheckDefinition
│ ├── types.ts # CheckResult, CheckDefinition (114 lines)
│ └── checks/ # 14 checks, 21 files
│ ├── version.ts # OpenCode + plugin version
│ ├── config.ts # JSONC validity, Zod
@@ -25,6 +26,7 @@ cli/
│ ├── dependencies.ts # AST-Grep, Comment Checker
│ ├── lsp.ts # LSP connectivity
│ ├── mcp.ts # MCP validation
│ ├── model-resolution.ts # Model resolution check
│ └── gh.ts # GitHub CLI
├── run/
│ └── index.ts # Session launcher
@@ -36,36 +38,37 @@ cli/
| Command | Purpose |
|---------|---------|
| `install` | Interactive setup |
| `doctor` | 14 health checks |
| `run` | Launch session |
| `get-local-version` | Version check |
| `install` | Interactive setup with provider selection |
| `doctor` | 14 health checks for diagnostics |
| `run` | Launch session with todo enforcement |
| `get-local-version` | Version detection and update check |
## DOCTOR CATEGORIES
## DOCTOR CATEGORIES (14 Checks)
| Category | Checks |
|----------|--------|
| installation | opencode, plugin |
| configuration | config validity, Zod |
| configuration | config validity, Zod, model-resolution |
| authentication | anthropic, openai, google |
| dependencies | ast-grep, comment-checker |
| dependencies | ast-grep, comment-checker, gh-cli |
| tools | LSP, MCP |
| updates | version comparison |
## HOW TO ADD CHECK
1. Create `src/cli/doctor/checks/my-check.ts`
2. Export from `checks/index.ts`
3. Add to `getAllCheckDefinitions()`
2. Export `getXXXCheckDefinition()` factory returning `CheckDefinition`
3. Add to `getAllCheckDefinitions()` in `checks/index.ts`
## TUI FRAMEWORK
- **@clack/prompts**: `select()`, `spinner()`, `intro()`
- **picocolors**: Terminal colors
- **Symbols**: ✓ (pass), ✗ (fail), ⚠ (warn)
- **@clack/prompts**: `select()`, `spinner()`, `intro()`, `outro()`
- **picocolors**: Terminal colors for status and headers
- **Symbols**: ✓ (pass), ✗ (fail), ⚠ (warn), (info)
## ANTI-PATTERNS
- **Blocking in non-TTY**: Check `process.stdout.isTTY`
- **Direct JSON.parse**: Use `parseJsonc()`
- **Silent failures**: Return warn/fail in doctor
- **Blocking in non-TTY**: Always check `process.stdout.isTTY`
- **Direct JSON.parse**: Use `parseJsonc()` from shared utils
- **Silent failures**: Return `warn` or `fail` in doctor instead of throwing
- **Hardcoded paths**: Use `getOpenCodeConfigPaths()` from `config-manager.ts`

View File

@@ -5,54 +5,54 @@ exports[`generateModelConfig no providers available returns ULTIMATE_FALLBACK fo
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"explore": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"momus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"multimodal-looker": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"oracle": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"prometheus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"sisyphus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
},
"categories": {
"artistry": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"quick": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"ultrabrain": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"unspecified-high": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"unspecified-low": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"visual-engineering": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"writing": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
},
}
@@ -196,10 +196,10 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
"model": "openai/gpt-5.2",
},
"explore": {
"model": "opencode/grok-code",
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "openai/gpt-5.2",
@@ -230,7 +230,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
"model": "openai/gpt-5.2",
},
"quick": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"ultrabrain": {
"model": "openai/gpt-5.2-codex",
@@ -263,10 +263,10 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
"model": "openai/gpt-5.2",
},
"explore": {
"model": "opencode/grok-code",
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "openai/gpt-5.2",
@@ -297,7 +297,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
"model": "openai/gpt-5.2",
},
"quick": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"ultrabrain": {
"model": "openai/gpt-5.2-codex",
@@ -327,57 +327,57 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"explore": {
"model": "opencode/grok-code",
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"momus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"oracle": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"prometheus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"sisyphus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"quick": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"ultrabrain": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"unspecified-high": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"unspecified-low": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"writing": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
},
}
@@ -388,57 +388,57 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"explore": {
"model": "opencode/grok-code",
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"momus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"oracle": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"prometheus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"sisyphus": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"quick": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"ultrabrain": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"unspecified-high": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"unspecified-low": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"writing": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
},
}
@@ -466,7 +466,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
"variant": "medium",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"oracle": {
"model": "openai/gpt-5.2",
@@ -482,7 +482,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -499,10 +499,10 @@ exports[`generateModelConfig all native providers uses preferred models from fal
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"writing": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
},
}
@@ -530,7 +530,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
"variant": "medium",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"oracle": {
"model": "openai/gpt-5.2",
@@ -547,7 +547,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -565,10 +565,10 @@ exports[`generateModelConfig all native providers uses preferred models with isM
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"writing": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
},
}
@@ -585,7 +585,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"model": "opencode/claude-haiku-4-5",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "opencode/claude-opus-4-5",
@@ -596,7 +596,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"variant": "medium",
},
"multimodal-looker": {
"model": "opencode/gemini-3-flash-preview",
"model": "opencode/gemini-3-flash",
},
"oracle": {
"model": "opencode/gpt-5.2",
@@ -612,7 +612,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
},
"categories": {
"artistry": {
"model": "opencode/gemini-3-pro-preview",
"model": "opencode/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -629,10 +629,10 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"model": "opencode/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "opencode/gemini-3-pro-preview",
"model": "opencode/gemini-3-pro",
},
"writing": {
"model": "opencode/gemini-3-flash-preview",
"model": "opencode/gemini-3-flash",
},
},
}
@@ -649,7 +649,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"model": "opencode/claude-haiku-4-5",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "opencode/claude-opus-4-5",
@@ -660,7 +660,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"variant": "medium",
},
"multimodal-looker": {
"model": "opencode/gemini-3-flash-preview",
"model": "opencode/gemini-3-flash",
},
"oracle": {
"model": "opencode/gpt-5.2",
@@ -677,7 +677,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
},
"categories": {
"artistry": {
"model": "opencode/gemini-3-pro-preview",
"model": "opencode/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -695,10 +695,10 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"model": "opencode/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "opencode/gemini-3-pro-preview",
"model": "opencode/gemini-3-pro",
},
"writing": {
"model": "opencode/gemini-3-flash-preview",
"model": "opencode/gemini-3-flash",
},
},
}
@@ -712,7 +712,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"model": "github-copilot/claude-sonnet-4.5",
},
"explore": {
"model": "opencode/grok-code",
"model": "github-copilot/gpt-5-mini",
},
"librarian": {
"model": "github-copilot/claude-sonnet-4.5",
@@ -726,7 +726,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"variant": "medium",
},
"multimodal-looker": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
"oracle": {
"model": "github-copilot/gpt-5.2",
@@ -742,7 +742,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -759,10 +759,10 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
},
"writing": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
},
}
@@ -776,7 +776,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"model": "github-copilot/claude-sonnet-4.5",
},
"explore": {
"model": "opencode/grok-code",
"model": "github-copilot/gpt-5-mini",
},
"librarian": {
"model": "github-copilot/claude-sonnet-4.5",
@@ -790,7 +790,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"variant": "medium",
},
"multimodal-looker": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
"oracle": {
"model": "github-copilot/gpt-5.2",
@@ -807,7 +807,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -825,10 +825,10 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
},
"writing": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
},
}
@@ -839,51 +839,51 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"explore": {
"model": "opencode/grok-code",
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "zai-coding-plan/glm-4.7",
},
"metis": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"momus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"multimodal-looker": {
"model": "zai-coding-plan/glm-4.6v",
},
"oracle": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"prometheus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"sisyphus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
},
"categories": {
"artistry": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"quick": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"ultrabrain": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"unspecified-high": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"unspecified-low": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"visual-engineering": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"writing": {
"model": "zai-coding-plan/glm-4.7",
@@ -897,28 +897,28 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"explore": {
"model": "opencode/grok-code",
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "zai-coding-plan/glm-4.7",
},
"metis": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"momus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"multimodal-looker": {
"model": "zai-coding-plan/glm-4.6v",
},
"oracle": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"prometheus": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"sisyphus": {
"model": "zai-coding-plan/glm-4.7",
@@ -926,22 +926,22 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
},
"categories": {
"artistry": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"quick": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"ultrabrain": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"unspecified-high": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"unspecified-low": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"visual-engineering": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"writing": {
"model": "zai-coding-plan/glm-4.7",
@@ -961,7 +961,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"model": "anthropic/claude-haiku-4-5",
},
"librarian": {
"model": "opencode/glm-4.7-free",
"model": "opencode/big-pickle",
},
"metis": {
"model": "anthropic/claude-opus-4-5",
@@ -972,7 +972,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"variant": "medium",
},
"multimodal-looker": {
"model": "opencode/gemini-3-flash-preview",
"model": "opencode/gemini-3-flash",
},
"oracle": {
"model": "opencode/gpt-5.2",
@@ -988,7 +988,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
},
"categories": {
"artistry": {
"model": "opencode/gemini-3-pro-preview",
"model": "opencode/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -1005,10 +1005,10 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "opencode/gemini-3-pro-preview",
"model": "opencode/gemini-3-pro",
},
"writing": {
"model": "opencode/gemini-3-flash-preview",
"model": "opencode/gemini-3-flash",
},
},
}
@@ -1022,7 +1022,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"model": "github-copilot/claude-sonnet-4.5",
},
"explore": {
"model": "opencode/grok-code",
"model": "github-copilot/gpt-5-mini",
},
"librarian": {
"model": "github-copilot/claude-sonnet-4.5",
@@ -1036,7 +1036,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"variant": "medium",
},
"multimodal-looker": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
"oracle": {
"model": "openai/gpt-5.2",
@@ -1052,7 +1052,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -1069,10 +1069,10 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
},
"writing": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
},
}
@@ -1099,7 +1099,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
"model": "anthropic/claude-opus-4-5",
},
"multimodal-looker": {
"model": "anthropic/claude-haiku-4-5",
"model": "zai-coding-plan/glm-4.6v",
},
"oracle": {
"model": "anthropic/claude-opus-4-5",
@@ -1163,7 +1163,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"model": "anthropic/claude-opus-4-5",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"oracle": {
"model": "anthropic/claude-opus-4-5",
@@ -1179,7 +1179,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -1196,10 +1196,10 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"writing": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
},
}
@@ -1227,7 +1227,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
"variant": "medium",
},
"multimodal-looker": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
"oracle": {
"model": "github-copilot/gpt-5.2",
@@ -1243,7 +1243,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
},
"categories": {
"artistry": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -1260,10 +1260,10 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
"model": "github-copilot/claude-sonnet-4.5",
},
"visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview",
"model": "github-copilot/gemini-3-pro",
},
"writing": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "github-copilot/gemini-3-flash",
},
},
}
@@ -1291,7 +1291,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
"variant": "medium",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"oracle": {
"model": "openai/gpt-5.2",
@@ -1307,7 +1307,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -1324,10 +1324,10 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"writing": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
},
}
@@ -1355,7 +1355,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
"variant": "medium",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
"oracle": {
"model": "openai/gpt-5.2",
@@ -1372,7 +1372,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
},
"categories": {
"artistry": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
"variant": "max",
},
"quick": {
@@ -1390,10 +1390,10 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
"model": "anthropic/claude-sonnet-4-5",
},
"visual-engineering": {
"model": "google/gemini-3-pro-preview",
"model": "google/gemini-3-pro",
},
"writing": {
"model": "google/gemini-3-flash-preview",
"model": "google/gemini-3-flash",
},
},
}

View File

@@ -170,7 +170,7 @@ describe("fetchNpmDistTags", () => {
})
describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
test("Gemini models include full spec (limit + modalities)", () => {
test("all models include full spec (limit + modalities + Antigravity label)", () => {
const google = (ANTIGRAVITY_PROVIDER_CONFIG as any).google
expect(google).toBeTruthy()
@@ -178,9 +178,11 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
expect(models).toBeTruthy()
const required = [
"antigravity-gemini-3-pro-high",
"antigravity-gemini-3-pro-low",
"antigravity-gemini-3-pro",
"antigravity-gemini-3-flash",
"antigravity-claude-sonnet-4-5",
"antigravity-claude-sonnet-4-5-thinking",
"antigravity-claude-opus-4-5-thinking",
]
for (const key of required) {
@@ -198,6 +200,43 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
expect(Array.isArray(model.modalities.output)).toBe(true)
}
})
test("Gemini models have variant definitions", () => {
// #given the antigravity provider config
const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
// #when checking Gemini Pro variants
const pro = models["antigravity-gemini-3-pro"]
// #then should have low and high variants
expect(pro.variants).toBeTruthy()
expect(pro.variants.low).toBeTruthy()
expect(pro.variants.high).toBeTruthy()
// #when checking Gemini Flash variants
const flash = models["antigravity-gemini-3-flash"]
// #then should have minimal, low, medium, high variants
expect(flash.variants).toBeTruthy()
expect(flash.variants.minimal).toBeTruthy()
expect(flash.variants.low).toBeTruthy()
expect(flash.variants.medium).toBeTruthy()
expect(flash.variants.high).toBeTruthy()
})
test("Claude thinking models have variant definitions", () => {
// #given the antigravity provider config
const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
// #when checking Claude thinking variants
const sonnetThinking = models["antigravity-claude-sonnet-4-5-thinking"]
const opusThinking = models["antigravity-claude-opus-4-5-thinking"]
// #then both should have low and max variants
for (const model of [sonnetThinking, opusThinking]) {
expect(model.variants).toBeTruthy()
expect(model.variants.low).toBeTruthy()
expect(model.variants.max).toBeTruthy()
}
})
})
describe("generateOmoConfig - model fallback system", () => {
@@ -277,7 +316,7 @@ describe("generateOmoConfig - model fallback system", () => {
// #then should use ultimate fallback for all agents
expect(result.$schema).toBe("https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json")
expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("opencode/glm-4.7-free")
expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("opencode/big-pickle")
})
test("uses zai-coding-plan/glm-4.7 for librarian when Z.ai available", () => {

View File

@@ -497,38 +497,61 @@ export async function runBunInstallWithDetails(): Promise<BunInstallResult> {
*
* IMPORTANT: Model names MUST use `antigravity-` prefix for stability.
*
* The opencode-antigravity-auth plugin supports two naming conventions:
* - `antigravity-gemini-3-pro-high` (RECOMMENDED, explicit Antigravity quota routing)
* - `gemini-3-pro-high` (LEGACY, backward compatible but may break in future)
* Since opencode-antigravity-auth v1.3.0, models use a variant system:
* - `antigravity-gemini-3-pro` with variants: low, high
* - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high
*
* Legacy names rely on Gemini CLI using `-preview` suffix for disambiguation.
* If Google removes `-preview`, legacy names may route to wrong quota.
* Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work
* but variants are the recommended approach.
*
* @see https://github.com/NoeFabris/opencode-antigravity-auth#migration-guide-v127
* @see https://github.com/NoeFabris/opencode-antigravity-auth#models
*/
export const ANTIGRAVITY_PROVIDER_CONFIG = {
google: {
name: "Google",
models: {
"antigravity-gemini-3-pro-high": {
name: "Gemini 3 Pro High (Antigravity)",
thinking: true,
attachment: true,
limit: { context: 1048576, output: 65535 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] },
},
"antigravity-gemini-3-pro-low": {
name: "Gemini 3 Pro Low (Antigravity)",
thinking: true,
attachment: true,
"antigravity-gemini-3-pro": {
name: "Gemini 3 Pro (Antigravity)",
limit: { context: 1048576, output: 65535 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] },
variants: {
low: { thinkingLevel: "low" },
high: { thinkingLevel: "high" },
},
},
"antigravity-gemini-3-flash": {
name: "Gemini 3 Flash (Antigravity)",
attachment: true,
limit: { context: 1048576, output: 65536 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] },
variants: {
minimal: { thinkingLevel: "minimal" },
low: { thinkingLevel: "low" },
medium: { thinkingLevel: "medium" },
high: { thinkingLevel: "high" },
},
},
"antigravity-claude-sonnet-4-5": {
name: "Claude Sonnet 4.5 (Antigravity)",
limit: { context: 200000, output: 64000 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] },
},
"antigravity-claude-sonnet-4-5-thinking": {
name: "Claude Sonnet 4.5 Thinking (Antigravity)",
limit: { context: 200000, output: 64000 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] },
variants: {
low: { thinkingConfig: { thinkingBudget: 8192 } },
max: { thinkingConfig: { thinkingBudget: 32768 } },
},
},
"antigravity-claude-opus-4-5-thinking": {
name: "Claude Opus 4.5 Thinking (Antigravity)",
limit: { context: 200000, output: 64000 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] },
variants: {
low: { thinkingConfig: { thinkingBudget: 8192 } },
max: { thinkingConfig: { thinkingBudget: 32768 } },
},
},
},
},

View File

@@ -27,7 +27,7 @@ describe("model-resolution check", () => {
// #then: Should have category entries
const visual = info.categories.find((c) => c.name === "visual-engineering")
expect(visual).toBeDefined()
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro-preview")
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
})
})

View File

@@ -199,9 +199,11 @@ function buildDetailsArray(info: ModelResolutionInfo, available: AvailableModels
details.push("═══ Available Models (from cache) ═══")
details.push("")
if (available.cacheExists) {
details.push(` Providers: ${available.providers.length} (${available.providers.slice(0, 8).join(", ")}${available.providers.length > 8 ? "..." : ""})`)
details.push(` Providers in cache: ${available.providers.length}`)
details.push(` Sample: ${available.providers.slice(0, 6).join(", ")}${available.providers.length > 6 ? "..." : ""}`)
details.push(` Total models: ${available.modelCount}`)
details.push(` Cache: ~/.cache/opencode/models.json`)
details.push(` Runtime: only connected providers used`)
details.push(` Refresh: opencode models --refresh`)
} else {
details.push(" ⚠ Cache not found. Run 'opencode' to populate.")

View File

@@ -178,7 +178,7 @@ async function runTuiMode(detected: DetectedConfig): Promise<InstallConfig | nul
const claude = await p.select({
message: "Do you have a Claude Pro/Max subscription?",
options: [
{ value: "no" as const, label: "No", hint: "Will use opencode/glm-4.7-free as fallback" },
{ value: "no" as const, label: "No", hint: "Will use opencode/big-pickle as fallback" },
{ value: "yes" as const, label: "Yes (standard)", hint: "Claude Opus 4.5 for orchestration" },
{ value: "max20" as const, label: "Yes (max20 mode)", hint: "Full power with Claude Sonnet 4.5 for Librarian" },
],
@@ -363,7 +363,7 @@ async function runNonTuiInstall(args: InstallArgs): Promise<number> {
}
if (!config.hasClaude && !config.hasOpenAI && !config.hasGemini && !config.hasCopilot && !config.hasOpencodeZen) {
printWarning("No model providers configured. Using opencode/glm-4.7-free as fallback.")
printWarning("No model providers configured. Using opencode/big-pickle as fallback.")
}
console.log(`${SYMBOLS.star} ${color.bold(color.green(isUpdate ? "Configuration updated!" : "Installation complete!"))}`)
@@ -480,7 +480,7 @@ export async function install(args: InstallArgs): Promise<number> {
}
if (!config.hasClaude && !config.hasOpenAI && !config.hasGemini && !config.hasCopilot && !config.hasOpencodeZen) {
p.log.warn("No model providers configured. Using opencode/glm-4.7-free as fallback.")
p.log.warn("No model providers configured. Using opencode/big-pickle as fallback.")
}
p.note(formatConfigSummary(config), isUpdate ? "Updated Configuration" : "Installation Complete")

View File

@@ -310,15 +310,15 @@ describe("generateModelConfig", () => {
})
describe("explore agent special cases", () => {
test("explore uses grok-code when only Gemini available (no Claude)", () => {
test("explore uses gpt-5-nano when only Gemini available (no Claude)", () => {
// #given only Gemini is available (no Claude)
const config = createConfig({ hasGemini: true })
// #when generateModelConfig is called
const result = generateModelConfig(config)
// #then explore should use grok-code (Claude haiku not available)
expect(result.agents?.explore?.model).toBe("opencode/grok-code")
// #then explore should use gpt-5-nano (Claude haiku not available)
expect(result.agents?.explore?.model).toBe("opencode/gpt-5-nano")
})
test("explore uses Claude haiku when Claude available", () => {
@@ -343,15 +343,26 @@ describe("generateModelConfig", () => {
expect(result.agents?.explore?.model).toBe("anthropic/claude-haiku-4-5")
})
test("explore uses grok-code when only OpenAI available", () => {
test("explore uses gpt-5-nano when only OpenAI available", () => {
// #given only OpenAI is available
const config = createConfig({ hasOpenAI: true })
// #when generateModelConfig is called
const result = generateModelConfig(config)
// #then explore should use grok-code (fallback)
expect(result.agents?.explore?.model).toBe("opencode/grok-code")
// #then explore should use gpt-5-nano (fallback)
expect(result.agents?.explore?.model).toBe("opencode/gpt-5-nano")
})
test("explore uses gpt-5-mini when only Copilot available", () => {
// #given only Copilot is available
const config = createConfig({ hasCopilot: true })
// #when generateModelConfig is called
const result = generateModelConfig(config)
// #then explore should use gpt-5-mini (Copilot fallback)
expect(result.agents?.explore?.model).toBe("github-copilot/gpt-5-mini")
})
})

View File

@@ -36,7 +36,7 @@ export interface GeneratedOmoConfig {
const ZAI_MODEL = "zai-coding-plan/glm-4.7"
const ULTIMATE_FALLBACK = "opencode/glm-4.7-free"
const ULTIMATE_FALLBACK = "opencode/big-pickle"
const SCHEMA_URL = "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json"
function toProviderAvailability(config: InstallConfig): ProviderAvailability {
@@ -103,7 +103,7 @@ function getSisyphusFallbackChain(isMaxPlan: boolean): FallbackEntry[] {
return [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro-preview" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
]
}
@@ -139,14 +139,16 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
continue
}
// Special case: explore uses Claude haiku → OpenCode grok-code
// Special case: explore uses Claude haiku → GitHub Copilot gpt-5-mini → OpenCode gpt-5-nano
if (role === "explore") {
if (avail.native.claude) {
agents[role] = { model: "anthropic/claude-haiku-4-5" }
} else if (avail.opencodeZen) {
agents[role] = { model: "opencode/claude-haiku-4-5" }
} else if (avail.copilot) {
agents[role] = { model: "github-copilot/gpt-5-mini" }
} else {
agents[role] = { model: "opencode/grok-code" }
agents[role] = { model: "opencode/gpt-5-nano" }
}
continue
}

View File

@@ -9,6 +9,8 @@ export {
SisyphusAgentConfigSchema,
ExperimentalConfigSchema,
RalphLoopConfigSchema,
TmuxConfigSchema,
TmuxLayoutSchema,
} from "./schema"
export type {
@@ -23,4 +25,6 @@ export type {
ExperimentalConfig,
DynamicContextPruningConfig,
RalphLoopConfig,
TmuxConfig,
TmuxLayout,
} from "./schema"

View File

@@ -1,5 +1,12 @@
import { describe, expect, test } from "bun:test"
import { AgentOverrideConfigSchema, BuiltinCategoryNameSchema, CategoryConfigSchema, OhMyOpenCodeConfigSchema } from "./schema"
import {
AgentOverrideConfigSchema,
BrowserAutomationConfigSchema,
BrowserAutomationProviderSchema,
BuiltinCategoryNameSchema,
CategoryConfigSchema,
OhMyOpenCodeConfigSchema,
} from "./schema"
describe("disabled_mcps schema", () => {
test("should accept built-in MCP names", () => {
@@ -345,6 +352,20 @@ describe("CategoryConfigSchema", () => {
}
})
test("accepts reasoningEffort as optional string with xhigh", () => {
// #given
const config = { reasoningEffort: "xhigh" }
// #when
const result = CategoryConfigSchema.safeParse(config)
// #then
expect(result.success).toBe(true)
if (result.success) {
expect(result.data.reasoningEffort).toBe("xhigh")
}
})
test("rejects non-string variant", () => {
// #given
const config = { model: "openai/gpt-5.2", variant: 123 }
@@ -494,3 +515,94 @@ describe("Sisyphus-Junior agent override", () => {
}
})
})
describe("BrowserAutomationProviderSchema", () => {
test("accepts 'playwright' as valid provider", () => {
// #given
const input = "playwright"
// #when
const result = BrowserAutomationProviderSchema.safeParse(input)
// #then
expect(result.success).toBe(true)
expect(result.data).toBe("playwright")
})
test("accepts 'agent-browser' as valid provider", () => {
// #given
const input = "agent-browser"
// #when
const result = BrowserAutomationProviderSchema.safeParse(input)
// #then
expect(result.success).toBe(true)
expect(result.data).toBe("agent-browser")
})
test("rejects invalid provider", () => {
// #given
const input = "invalid-provider"
// #when
const result = BrowserAutomationProviderSchema.safeParse(input)
// #then
expect(result.success).toBe(false)
})
})
describe("BrowserAutomationConfigSchema", () => {
test("defaults provider to 'playwright' when not specified", () => {
// #given
const input = {}
// #when
const result = BrowserAutomationConfigSchema.parse(input)
// #then
expect(result.provider).toBe("playwright")
})
test("accepts agent-browser provider", () => {
// #given
const input = { provider: "agent-browser" }
// #when
const result = BrowserAutomationConfigSchema.parse(input)
// #then
expect(result.provider).toBe("agent-browser")
})
})
describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
test("accepts browser_automation_engine config", () => {
// #given
const input = {
browser_automation_engine: {
provider: "agent-browser",
},
}
// #when
const result = OhMyOpenCodeConfigSchema.safeParse(input)
// #then
expect(result.success).toBe(true)
expect(result.data?.browser_automation_engine?.provider).toBe("agent-browser")
})
test("accepts config without browser_automation_engine", () => {
// #given
const input = {}
// #when
const result = OhMyOpenCodeConfigSchema.safeParse(input)
// #then
expect(result.success).toBe(true)
expect(result.data?.browser_automation_engine).toBeUndefined()
})
})

View File

@@ -30,6 +30,7 @@ export const BuiltinAgentNameSchema = z.enum([
export const BuiltinSkillNameSchema = z.enum([
"playwright",
"agent-browser",
"frontend-ui-ux",
"git-master",
])
@@ -76,6 +77,7 @@ export const HookNameSchema = z.enum([
"thinking-block-validator",
"ralph-loop",
"category-skill-reminder",
"compaction-context-injector",
"claude-code-hooks",
@@ -83,6 +85,7 @@ export const HookNameSchema = z.enum([
"edit-error-recovery",
"delegate-task-retry",
"prometheus-md-only",
"sisyphus-junior-notepad",
"start-work",
"atlas",
])
@@ -160,7 +163,7 @@ export const CategoryConfigSchema = z.object({
type: z.enum(["enabled", "disabled"]),
budgetTokens: z.number().optional(),
}).optional(),
reasoningEffort: z.enum(["low", "medium", "high"]).optional(),
reasoningEffort: z.enum(["low", "medium", "high", "xhigh"]).optional(),
textVerbosity: z.enum(["low", "medium", "high"]).optional(),
tools: z.record(z.string(), z.boolean()).optional(),
prompt_append: z.string().optional(),
@@ -297,6 +300,32 @@ export const GitMasterConfigSchema = z.object({
include_co_authored_by: z.boolean().default(true),
})
export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser"])
export const BrowserAutomationConfigSchema = z.object({
/**
* Browser automation provider to use for the "playwright" skill.
* - "playwright": Uses Playwright MCP server (@playwright/mcp) - default
* - "agent-browser": Uses Vercel's agent-browser CLI (requires: bun add -g agent-browser)
*/
provider: BrowserAutomationProviderSchema.default("playwright"),
})
export const TmuxLayoutSchema = z.enum([
'main-horizontal', // main pane top, agent panes bottom stack
'main-vertical', // main pane left, agent panes right stack (default)
'tiled', // all panes same size grid
'even-horizontal', // all panes horizontal row
'even-vertical', // all panes vertical stack
])
export const TmuxConfigSchema = z.object({
enabled: z.boolean().default(false),
layout: TmuxLayoutSchema.default('main-vertical'),
main_pane_size: z.number().min(20).max(80).default(60),
main_pane_min_width: z.number().min(40).default(120),
agent_pane_min_width: z.number().min(20).default(40),
})
export const OhMyOpenCodeConfigSchema = z.object({
$schema: z.string().optional(),
disabled_mcps: z.array(AnyMcpNameSchema).optional(),
@@ -316,6 +345,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
background_task: BackgroundTaskConfigSchema.optional(),
notification: NotificationConfigSchema.optional(),
git_master: GitMasterConfigSchema.optional(),
browser_automation_engine: BrowserAutomationConfigSchema.optional(),
tmux: TmuxConfigSchema.optional(),
})
export type OhMyOpenCodeConfig = z.infer<typeof OhMyOpenCodeConfigSchema>
@@ -338,5 +369,9 @@ export type CategoryConfig = z.infer<typeof CategoryConfigSchema>
export type CategoriesConfig = z.infer<typeof CategoriesConfigSchema>
export type BuiltinCategoryName = z.infer<typeof BuiltinCategoryNameSchema>
export type GitMasterConfig = z.infer<typeof GitMasterConfigSchema>
export type BrowserAutomationProvider = z.infer<typeof BrowserAutomationProviderSchema>
export type BrowserAutomationConfig = z.infer<typeof BrowserAutomationConfigSchema>
export type TmuxConfig = z.infer<typeof TmuxConfigSchema>
export type TmuxLayout = z.infer<typeof TmuxLayoutSchema>
export { AnyMcpNameSchema, type AnyMcpName, McpNameSchema, type McpName } from "../mcp/types"

View File

@@ -2,31 +2,31 @@
## OVERVIEW
Core feature modules + Claude Code compatibility layer. Background agents, skill MCP, builtin skills/commands, 5 loaders.
Core feature modules + Claude Code compatibility layer. Orchestrates background agents, skill MCPs, builtin skills/commands, and 16 feature modules.
## STRUCTURE
```
features/
├── background-agent/ # Task lifecycle (1335 lines)
├── background-agent/ # Task lifecycle (1377 lines)
│ ├── manager.ts # Launch → poll → complete
── concurrency.ts # Per-provider limits
│ └── types.ts # BackgroundTask, LaunchInput
── skill-mcp-manager/ # MCP client lifecycle
│ ├── manager.ts # Lazy loading, cleanup
│ └── types.ts # SkillMcpConfig
├── builtin-skills/ # Playwright, git-master, frontend-ui-ux
│ └── skills.ts # 1203 lines
├── builtin-commands/ # ralph-loop, refactor, init-deep
── concurrency.ts # Per-provider limits
├── builtin-skills/ # Core skills (1729 lines)
│ └── skills.ts # agent-browser, dev-browser, frontend-ui-ux, git-master, typescript-programmer
├── builtin-commands/ # ralph-loop, refactor, ulw-loop, init-deep, start-work, cancel-ralph
├── claude-code-agent-loader/ # ~/.claude/agents/*.md
├── claude-code-command-loader/ # ~/.claude/commands/*.md
├── claude-code-mcp-loader/ # .mcp.json
├── claude-code-mcp-loader/ # .mcp.json with ${VAR} expansion
├── claude-code-plugin-loader/ # installed_plugins.json
├── claude-code-session-state/ # Session persistence
├── opencode-skill-loader/ # Skills from 6 directories
├── context-injector/ # AGENTS.md/README.md injection
├── boulder-state/ # Todo state persistence
── hook-message-injector/ # Message injection
── hook-message-injector/ # Message injection
├── task-toast-manager/ # Background task notifications
├── skill-mcp-manager/ # MCP client lifecycle (520 lines)
├── tmux-subagent/ # Tmux session management
└── ... (16 modules total)
```
## LOADER PRIORITY
@@ -41,8 +41,9 @@ features/
- **Lifecycle**: `launch``poll` (2s) → `complete`
- **Stability**: 3 consecutive polls = idle
- **Concurrency**: Per-provider/model limits
- **Concurrency**: Per-provider/model limits via `ConcurrencyManager`
- **Cleanup**: 30m TTL, 3m stale timeout
- **State**: Per-session Maps, cleaned on `session.deleted`
## SKILL MCP
@@ -55,3 +56,4 @@ features/
- **Sequential delegation**: Use `delegate_task` parallel
- **Trust self-reports**: ALWAYS verify
- **Main thread blocks**: No heavy I/O in loader init
- **Direct state mutation**: Use managers for boulder/session state

View File

@@ -776,7 +776,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
parentModel: { providerID: "old", modelID: "old-model" },
}
const currentMessage: CurrentMessage = {
agent: "Sisyphus",
agent: "sisyphus",
model: { providerID: "anthropic", modelID: "claude-opus-4-5" },
}
@@ -784,7 +784,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
const promptBody = buildNotificationPromptBody(task, currentMessage)
// #then - uses currentMessage values, not task.parentModel/parentAgent
expect(promptBody.agent).toBe("Sisyphus")
expect(promptBody.agent).toBe("sisyphus")
expect(promptBody.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-5" })
})
@@ -827,11 +827,11 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
status: "completed",
startedAt: new Date(),
completedAt: new Date(),
parentAgent: "Sisyphus",
parentAgent: "sisyphus",
parentModel: { providerID: "anthropic", modelID: "claude-opus" },
}
const currentMessage: CurrentMessage = {
agent: "Sisyphus",
agent: "sisyphus",
model: { providerID: "anthropic" },
}
@@ -839,7 +839,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
const promptBody = buildNotificationPromptBody(task, currentMessage)
// #then - model not passed due to incomplete data
expect(promptBody.agent).toBe("Sisyphus")
expect(promptBody.agent).toBe("sisyphus")
expect("model" in promptBody).toBe(false)
})
@@ -856,7 +856,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
status: "completed",
startedAt: new Date(),
completedAt: new Date(),
parentAgent: "Sisyphus",
parentAgent: "sisyphus",
parentModel: { providerID: "anthropic", modelID: "claude-opus" },
}
@@ -864,7 +864,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
const promptBody = buildNotificationPromptBody(task, null)
// #then - falls back to task.parentAgent, no model
expect(promptBody.agent).toBe("Sisyphus")
expect(promptBody.agent).toBe("sisyphus")
expect("model" in promptBody).toBe(false)
})
})

View File

@@ -7,7 +7,8 @@ import type {
} from "./types"
import { log, getAgentToolRestrictions } from "../../shared"
import { ConcurrencyManager } from "./concurrency"
import type { BackgroundTaskConfig } from "../../config/schema"
import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
import { isInsideTmux } from "../../shared/tmux"
import { subagentSessions } from "../claude-code-session-state"
import { getTaskToastManager } from "../task-toast-manager"
@@ -54,6 +55,14 @@ interface QueueItem {
input: LaunchInput
}
export interface SubagentSessionCreatedEvent {
sessionID: string
parentID: string
title: string
}
export type OnSubagentSessionCreated = (event: SubagentSessionCreatedEvent) => Promise<void>
export class BackgroundManager {
private static cleanupManagers = new Set<BackgroundManager>()
private static cleanupRegistered = false
@@ -68,12 +77,20 @@ export class BackgroundManager {
private concurrencyManager: ConcurrencyManager
private shutdownTriggered = false
private config?: BackgroundTaskConfig
private tmuxEnabled: boolean
private onSubagentSessionCreated?: OnSubagentSessionCreated
private queuesByKey: Map<string, QueueItem[]> = new Map()
private processingKeys: Set<string> = new Set()
constructor(ctx: PluginInput, config?: BackgroundTaskConfig) {
constructor(
ctx: PluginInput,
config?: BackgroundTaskConfig,
options?: {
tmuxConfig?: TmuxConfig
onSubagentSessionCreated?: OnSubagentSessionCreated
}
) {
this.tasks = new Map()
this.notifications = new Map()
this.pendingByParent = new Map()
@@ -81,6 +98,8 @@ export class BackgroundManager {
this.directory = ctx.directory
this.concurrencyManager = new ConcurrencyManager(config)
this.config = config
this.tmuxEnabled = options?.tmuxConfig?.enabled ?? false
this.onSubagentSessionCreated = options?.onSubagentSessionCreated
this.registerProcessCleanup()
}
@@ -222,6 +241,29 @@ export class BackgroundManager {
const sessionID = createResult.data.id
subagentSessions.add(sessionID)
log("[background-agent] tmux callback check", {
hasCallback: !!this.onSubagentSessionCreated,
tmuxEnabled: this.tmuxEnabled,
isInsideTmux: isInsideTmux(),
sessionID,
parentID: input.parentSessionID,
})
if (this.onSubagentSessionCreated && this.tmuxEnabled && isInsideTmux()) {
log("[background-agent] Invoking tmux callback NOW", { sessionID })
await this.onSubagentSessionCreated({
sessionID,
parentID: input.parentSessionID,
title: input.description,
}).catch((err) => {
log("[background-agent] Failed to spawn tmux pane:", err)
})
log("[background-agent] tmux callback completed, waiting 200ms")
await new Promise(r => setTimeout(r, 200))
} else {
log("[background-agent] SKIP tmux callback - conditions not met")
}
// Update task to running state
task.status = "running"
task.startedAt = new Date()

View File

@@ -55,7 +55,7 @@ ${REFACTOR_TEMPLATE}
},
"start-work": {
description: "(builtin) Start Sisyphus work session from Prometheus plan",
agent: "Atlas",
agent: "atlas",
template: `<command-instruction>
${START_WORK_TEMPLATE}
</command-instruction>
@@ -81,7 +81,7 @@ export function loadBuiltinCommands(
for (const [name, definition] of Object.entries(BUILTIN_COMMAND_DEFINITIONS)) {
if (!disabled.has(name as BuiltinCommandName)) {
const { argumentHint: _argumentHint, ...openCodeCompatible } = definition
commands[name] = openCodeCompatible as CommandDefinition
commands[name] = { ...openCodeCompatible, name } as CommandDefinition
}
}

View File

@@ -17,7 +17,7 @@ export const RALPH_LOOP_TEMPLATE = `You are starting a Ralph Loop - a self-refer
## Exit Conditions
1. **Completion**: Output \`<promise>DONE</promise>\` (or custom promise text) when fully complete
1. **Completion**: Output your completion promise tag when fully complete
2. **Max Iterations**: Loop stops automatically at limit
3. **Cancel**: User runs \`/cancel-ralph\` command

View File

@@ -25,7 +25,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
}
\`\`\`
5. **Read the plan file** and start executing tasks according to Orchestrator Sisyphus workflow
5. **Read the plan file** and start executing tasks according to atlas workflow
## OUTPUT FORMAT
@@ -69,4 +69,4 @@ Reading plan and beginning execution...
- The session_id is injected by the hook - use it directly
- Always update boulder.json BEFORE starting work
- Read the FULL plan file before delegating any tasks
- Follow Orchestrator Sisyphus delegation protocols (7-section format)`
- Follow atlas delegation protocols (7-section format)`

View File

@@ -0,0 +1,336 @@
---
name: agent-browser
description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
---
# Browser Automation with agent-browser
## Quick start
```bash
agent-browser open <url> # Navigate to page
agent-browser snapshot -i # Get interactive elements with refs
agent-browser click @e1 # Click element by ref
agent-browser fill @e2 "text" # Fill input by ref
agent-browser close # Close browser
```
## Core workflow
1. Navigate: `agent-browser open <url>`
2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
3. Interact using refs from the snapshot
4. Re-snapshot after navigation or significant DOM changes
## Commands
### Navigation
```bash
agent-browser open <url> # Navigate to URL
agent-browser back # Go back
agent-browser forward # Go forward
agent-browser reload # Reload page
agent-browser close # Close browser
```
### Snapshot (page analysis)
```bash
agent-browser snapshot # Full accessibility tree
agent-browser snapshot -i # Interactive elements only (recommended)
agent-browser snapshot -c # Compact output
agent-browser snapshot -d 3 # Limit depth to 3
agent-browser snapshot -s "#main" # Scope to CSS selector
```
### Interactions (use @refs from snapshot)
```bash
agent-browser click @e1 # Click
agent-browser dblclick @e1 # Double-click
agent-browser focus @e1 # Focus element
agent-browser fill @e2 "text" # Clear and type
agent-browser type @e2 "text" # Type without clearing
agent-browser press Enter # Press key
agent-browser press Control+a # Key combination
agent-browser keydown Shift # Hold key down
agent-browser keyup Shift # Release key
agent-browser hover @e1 # Hover
agent-browser check @e1 # Check checkbox
agent-browser uncheck @e1 # Uncheck checkbox
agent-browser select @e1 "value" # Select dropdown
agent-browser scroll down 500 # Scroll page
agent-browser scrollintoview @e1 # Scroll element into view
agent-browser drag @e1 @e2 # Drag and drop
agent-browser upload @e1 file.pdf # Upload files
```
### Get information
```bash
agent-browser get text @e1 # Get element text
agent-browser get html @e1 # Get innerHTML
agent-browser get value @e1 # Get input value
agent-browser get attr @e1 href # Get attribute
agent-browser get title # Get page title
agent-browser get url # Get current URL
agent-browser get count ".item" # Count matching elements
agent-browser get box @e1 # Get bounding box
```
### Check state
```bash
agent-browser is visible @e1 # Check if visible
agent-browser is enabled @e1 # Check if enabled
agent-browser is checked @e1 # Check if checked
```
### Screenshots & PDF
```bash
agent-browser screenshot # Screenshot to stdout
agent-browser screenshot path.png # Save to file
agent-browser screenshot --full # Full page
agent-browser pdf output.pdf # Save as PDF
```
### Video recording
```bash
agent-browser record start ./demo.webm # Start recording (uses current URL + state)
agent-browser click @e1 # Perform actions
agent-browser record stop # Stop and save video
agent-browser record restart ./take2.webm # Stop current + start new recording
```
Recording creates a fresh context but preserves cookies/storage from your session.
### Wait
```bash
agent-browser wait @e1 # Wait for element
agent-browser wait 2000 # Wait milliseconds
agent-browser wait --text "Success" # Wait for text
agent-browser wait --url "**/dashboard" # Wait for URL pattern
agent-browser wait --load networkidle # Wait for network idle
agent-browser wait --fn "window.ready" # Wait for JS condition
```
### Mouse control
```bash
agent-browser mouse move 100 200 # Move mouse
agent-browser mouse down left # Press button
agent-browser mouse up left # Release button
agent-browser mouse wheel 100 # Scroll wheel
```
### Semantic locators (alternative to refs)
```bash
agent-browser find role button click --name "Submit"
agent-browser find text "Sign In" click
agent-browser find label "Email" fill "user@test.com"
agent-browser find first ".item" click
agent-browser find nth 2 "a" text
```
### Browser settings
```bash
agent-browser set viewport 1920 1080 # Set viewport size
agent-browser set device "iPhone 14" # Emulate device
agent-browser set geo 37.7749 -122.4194 # Set geolocation
agent-browser set offline on # Toggle offline mode
agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
agent-browser set credentials user pass # HTTP basic auth
agent-browser set media dark # Emulate color scheme
```
### Cookies & Storage
```bash
agent-browser cookies # Get all cookies
agent-browser cookies set name value # Set cookie
agent-browser cookies clear # Clear cookies
agent-browser storage local # Get all localStorage
agent-browser storage local key # Get specific key
agent-browser storage local set k v # Set value
agent-browser storage local clear # Clear all
agent-browser storage session # Get all sessionStorage
agent-browser storage session key # Get specific key
agent-browser storage session set k v # Set value
agent-browser storage session clear # Clear all
```
### Network
```bash
agent-browser network route <url> # Intercept requests
agent-browser network route <url> --abort # Block requests
agent-browser network route <url> --body '{}' # Mock response
agent-browser network unroute [url] # Remove routes
agent-browser network requests # View tracked requests
agent-browser network requests --filter api # Filter requests
```
### Tabs & Windows
```bash
agent-browser tab # List tabs
agent-browser tab new [url] # New tab
agent-browser tab 2 # Switch to tab
agent-browser tab close # Close tab
agent-browser window new # New window
```
### Frames
```bash
agent-browser frame "#iframe" # Switch to iframe
agent-browser frame main # Back to main frame
```
### Dialogs
```bash
agent-browser dialog accept [text] # Accept dialog
agent-browser dialog dismiss # Dismiss dialog
```
### JavaScript
```bash
agent-browser eval "document.title" # Run JavaScript
```
## Global Options
| Option | Description |
|--------|-------------|
| `--session <name>` | Isolated browser session (`AGENT_BROWSER_SESSION` env) |
| `--profile <path>` | Persistent browser profile (`AGENT_BROWSER_PROFILE` env) |
| `--headers <json>` | HTTP headers scoped to URL's origin |
| `--executable-path <path>` | Custom browser binary (`AGENT_BROWSER_EXECUTABLE_PATH` env) |
| `--args <args>` | Browser launch args (`AGENT_BROWSER_ARGS` env) |
| `--user-agent <ua>` | Custom User-Agent (`AGENT_BROWSER_USER_AGENT` env) |
| `--proxy <url>` | Proxy server (`AGENT_BROWSER_PROXY` env) |
| `--proxy-bypass <hosts>` | Hosts to bypass proxy (`AGENT_BROWSER_PROXY_BYPASS` env) |
| `-p, --provider <name>` | Cloud browser provider (`AGENT_BROWSER_PROVIDER` env) |
| `--json` | Machine-readable JSON output |
| `--headed` | Show browser window (not headless) |
| `--cdp <port\|wss://url>` | Connect via Chrome DevTools Protocol |
| `--debug` | Debug output |
## Example: Form submission
```bash
agent-browser open https://example.com/form
agent-browser snapshot -i
# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
agent-browser fill @e1 "user@example.com"
agent-browser fill @e2 "password123"
agent-browser click @e3
agent-browser wait --load networkidle
agent-browser snapshot -i # Check result
```
## Example: Authentication with saved state
```bash
# Login once
agent-browser open https://app.example.com/login
agent-browser snapshot -i
agent-browser fill @e1 "username"
agent-browser fill @e2 "password"
agent-browser click @e3
agent-browser wait --url "**/dashboard"
agent-browser state save auth.json
# Later sessions: load saved state
agent-browser state load auth.json
agent-browser open https://app.example.com/dashboard
```
### Header-based Auth (Skip login flows)
```bash
# Headers scoped to api.example.com only
agent-browser open api.example.com --headers '{"Authorization": "Bearer <token>"}'
# Navigate to another domain - headers NOT sent (safe)
agent-browser open other-site.com
# Global headers (all domains)
agent-browser set headers '{"X-Custom-Header": "value"}'
```
## Sessions & Persistent Profiles
### Sessions (parallel browsers)
```bash
agent-browser --session test1 open site-a.com
agent-browser --session test2 open site-b.com
agent-browser session list
```
### Persistent Profiles
Persists cookies, localStorage, IndexedDB, service workers, cache, login sessions across browser restarts.
```bash
agent-browser --profile ~/.myapp-profile open myapp.com
# Or via env var
AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
```
- Use different profile paths for different projects
- Login once → restart browser → still logged in
- Stores: cookies, localStorage, IndexedDB, service workers, browser cache
## JSON output (for parsing)
Add `--json` for machine-readable output:
```bash
agent-browser snapshot -i --json
agent-browser get text @e1 --json
```
## Debugging
```bash
agent-browser open example.com --headed # Show browser window
agent-browser console # View console messages
agent-browser errors # View page errors
agent-browser record start ./debug.webm # Record from current page
agent-browser record stop # Save recording
agent-browser connect 9222 # Local CDP port
agent-browser --cdp "wss://browser-service.com/cdp?token=..." snapshot # Remote via WebSocket
agent-browser console --clear # Clear console
agent-browser errors --clear # Clear errors
agent-browser highlight @e1 # Highlight element
agent-browser trace start # Start recording trace
agent-browser trace stop trace.zip # Stop and save trace
```
---
## Installation
### Step 1: Install agent-browser CLI
```bash
bun add -g agent-browser
```
### Step 2: Install Playwright browsers
**IMPORTANT**: `agent-browser install` may fail on some platforms (e.g., darwin-arm64) with "No binary found" error. In that case, install Playwright browsers directly:
```bash
# Create a temp project and install playwright
cd /tmp && bun init -y && bun add playwright
# Install Chromium browser
bun playwright install chromium
```
This downloads Chrome for Testing to `~/Library/Caches/ms-playwright/`.
### Verify installation
```bash
agent-browser open https://example.com --headed
```
If the browser opens successfully, installation is complete.
### Troubleshooting
| Error | Solution |
|-------|----------|
| `No binary found for darwin-arm64` | Run `bun playwright install chromium` in a project with playwright dependency |
| `Executable doesn't exist at .../chromium-XXXX` | Re-run `bun playwright install chromium` |
| Browser doesn't open | Ensure `--headed` flag is used for visible browser |
---
Run `agent-browser --help` for all commands. Repo: https://github.com/vercel-labs/agent-browser

View File

@@ -0,0 +1,213 @@
---
name: dev-browser
description: Browser automation with persistent page state. Use when users ask to navigate websites, fill forms, take screenshots, extract web data, test web apps, or automate browser workflows. Trigger phrases include "go to [url]", "click on", "fill out the form", "take a screenshot", "scrape", "automate", "test the website", "log into", or any browser interaction request.
---
# Dev Browser Skill
Browser automation that maintains page state across script executions. Write small, focused scripts to accomplish tasks incrementally. Once you've proven out part of a workflow and there is repeated work to be done, you can write a script to do the repeated work in a single execution.
## Choosing Your Approach
- **Local/source-available sites**: Read the source code first to write selectors directly
- **Unknown page layouts**: Use `getAISnapshot()` to discover elements and `selectSnapshotRef()` to interact with them
- **Visual feedback**: Take screenshots to see what the user sees
## Setup
> **Installation**: See [references/installation.md](references/installation.md) for detailed setup instructions including Windows support.
Two modes available. Ask the user if unclear which to use.
### Standalone Mode (Default)
Launches a new Chromium browser for fresh automation sessions.
```bash
./skills/dev-browser/server.sh &
```
Add `--headless` flag if user requests it. **Wait for the `Ready` message before running scripts.**
### Extension Mode
Connects to user's existing Chrome browser. Use this when:
- The user is already logged into sites and wants you to do things behind an authed experience that isn't local dev.
- The user asks you to use the extension
**Important**: The core flow is still the same. You create named pages inside of their browser.
**Start the relay server:**
```bash
cd skills/dev-browser && npm i && npm run start-extension &
```
Wait for `Waiting for extension to connect...` followed by `Extension connected` in the console. To know that a client has connected and the browser is ready to be controlled.
**Workflow:**
1. Scripts call `client.page("name")` just like the normal mode to create new pages / connect to existing ones.
2. Automation runs on the user's actual browser session
If the extension hasn't connected yet, tell the user to launch and activate it. Download link: https://github.com/SawyerHood/dev-browser/releases
## Writing Scripts
> **Run all scripts from `skills/dev-browser/` directory.** The `@/` import alias requires this directory's config.
Execute scripts inline using heredocs:
```bash
cd skills/dev-browser && npx tsx <<'EOF'
import { connect, waitForPageLoad } from "@/client.js";
const client = await connect();
// Create page with custom viewport size (optional)
const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
await page.goto("https://example.com");
await waitForPageLoad(page);
console.log({ title: await page.title(), url: page.url() });
await client.disconnect();
EOF
```
**Write to `tmp/` files only when** the script needs reuse, is complex, or user explicitly requests it.
### Key Principles
1. **Small scripts**: Each script does ONE thing (navigate, click, fill, check)
2. **Evaluate state**: Log/return state at the end to decide next steps
3. **Descriptive page names**: Use `"checkout"`, `"login"`, not `"main"`
4. **Disconnect to exit**: `await client.disconnect()` - pages persist on server
5. **Plain JS in evaluate**: `page.evaluate()` runs in browser - no TypeScript syntax
## Workflow Loop
Follow this pattern for complex tasks:
1. **Write a script** to perform one action
2. **Run it** and observe the output
3. **Evaluate** - did it work? What's the current state?
4. **Decide** - is the task complete or do we need another script?
5. **Repeat** until task is done
### No TypeScript in Browser Context
Code passed to `page.evaluate()` runs in the browser, which doesn't understand TypeScript:
```typescript
// ✅ Correct: plain JavaScript
const text = await page.evaluate(() => {
return document.body.innerText;
});
// ❌ Wrong: TypeScript syntax will fail at runtime
const text = await page.evaluate(() => {
const el: HTMLElement = document.body; // Type annotation breaks in browser!
return el.innerText;
});
```
## Scraping Data
For scraping large datasets, intercept and replay network requests rather than scrolling the DOM. See [references/scraping.md](references/scraping.md) for the complete guide covering request capture, schema discovery, and paginated API replay.
## Client API
```typescript
const client = await connect();
// Get or create named page (viewport only applies to new pages)
const page = await client.page("name");
const pageWithSize = await client.page("name", { viewport: { width: 1920, height: 1080 } });
const pages = await client.list(); // List all page names
await client.close("name"); // Close a page
await client.disconnect(); // Disconnect (pages persist)
// ARIA Snapshot methods
const snapshot = await client.getAISnapshot("name"); // Get accessibility tree
const element = await client.selectSnapshotRef("name", "e5"); // Get element by ref
```
The `page` object is a standard Playwright Page.
## Waiting
```typescript
import { waitForPageLoad } from "@/client.js";
await waitForPageLoad(page); // After navigation
await page.waitForSelector(".results"); // For specific elements
await page.waitForURL("**/success"); // For specific URL
```
## Inspecting Page State
### Screenshots
```typescript
await page.screenshot({ path: "tmp/screenshot.png" });
await page.screenshot({ path: "tmp/full.png", fullPage: true });
```
### ARIA Snapshot (Element Discovery)
Use `getAISnapshot()` to discover page elements. Returns YAML-formatted accessibility tree:
```yaml
- banner:
- link "Hacker News" [ref=e1]
- navigation:
- link "new" [ref=e2]
- main:
- list:
- listitem:
- link "Article Title" [ref=e8]
- link "328 comments" [ref=e9]
- contentinfo:
- textbox [ref=e10]
- /placeholder: "Search"
```
**Interpreting refs:**
- `[ref=eN]` - Element reference for interaction (visible, clickable elements only)
- `[checked]`, `[disabled]`, `[expanded]` - Element states
- `[level=N]` - Heading level
- `/url:`, `/placeholder:` - Element properties
**Interacting with refs:**
```typescript
const snapshot = await client.getAISnapshot("hackernews");
console.log(snapshot); // Find the ref you need
const element = await client.selectSnapshotRef("hackernews", "e2");
await element.click();
```
## Error Recovery
Page state persists after failures. Debug with:
```bash
cd skills/dev-browser && npx tsx <<'EOF'
import { connect } from "@/client.js";
const client = await connect();
const page = await client.page("hackernews");
await page.screenshot({ path: "tmp/debug.png" });
console.log({
url: page.url(),
title: await page.title(),
bodyText: await page.textContent("body").then((t) => t?.slice(0, 200)),
});
await client.disconnect();
EOF
```

View File

@@ -0,0 +1,193 @@
# Dev Browser Installation Guide
This guide covers installation for all platforms: macOS, Linux, and Windows.
## Prerequisites
- [Node.js](https://nodejs.org) v18 or later with npm
- Git (for cloning the skill)
## Installation
### Step 1: Clone the Skill
```bash
# Clone dev-browser to a temporary location
git clone https://github.com/sawyerhood/dev-browser /tmp/dev-browser-skill
# Copy to skills directory (adjust path as needed)
# For oh-my-opencode: already bundled
# For manual installation:
mkdir -p ~/.config/opencode/skills
cp -r /tmp/dev-browser-skill/skills/dev-browser ~/.config/opencode/skills/dev-browser
# Cleanup
rm -rf /tmp/dev-browser-skill
```
**Windows (PowerShell):**
```powershell
# Clone dev-browser to temp location
git clone https://github.com/sawyerhood/dev-browser $env:TEMP\dev-browser-skill
# Copy to skills directory
New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\.config\opencode\skills"
Copy-Item -Recurse "$env:TEMP\dev-browser-skill\skills\dev-browser" "$env:USERPROFILE\.config\opencode\skills\dev-browser"
# Cleanup
Remove-Item -Recurse -Force "$env:TEMP\dev-browser-skill"
```
### Step 2: Install Dependencies
```bash
cd ~/.config/opencode/skills/dev-browser
npm install
```
**Windows (PowerShell):**
```powershell
cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
npm install
```
### Step 3: Start the Server
#### Standalone Mode (New Browser Instance)
**macOS/Linux:**
```bash
cd ~/.config/opencode/skills/dev-browser
./server.sh &
# Or for headless:
./server.sh --headless &
```
**Windows (PowerShell):**
```powershell
cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
Start-Process -NoNewWindow -FilePath "node" -ArgumentList "server.js"
# Or for headless:
Start-Process -NoNewWindow -FilePath "node" -ArgumentList "server.js", "--headless"
```
**Windows (CMD):**
```cmd
cd %USERPROFILE%\.config\opencode\skills\dev-browser
start /B node server.js
```
Wait for the `Ready` message before running scripts.
#### Extension Mode (Use Existing Chrome)
**macOS/Linux:**
```bash
cd ~/.config/opencode/skills/dev-browser
npm run start-extension &
```
**Windows (PowerShell):**
```powershell
cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
Start-Process -NoNewWindow -FilePath "npm" -ArgumentList "run", "start-extension"
```
Wait for `Extension connected` message.
## Chrome Extension Setup (Optional)
The Chrome extension allows controlling your existing Chrome browser with all your logged-in sessions.
### Installation
1. Download `extension.zip` from [latest release](https://github.com/sawyerhood/dev-browser/releases/latest)
2. Extract to a permanent location:
- **macOS/Linux:** `~/.dev-browser-extension`
- **Windows:** `%USERPROFILE%\.dev-browser-extension`
3. Open Chrome → `chrome://extensions`
4. Enable "Developer mode" (toggle in top right)
5. Click "Load unpacked" → select the extracted folder
### Usage
1. Click the Dev Browser extension icon in Chrome toolbar
2. Toggle to "Active"
3. Start the extension relay server (see above)
4. Use dev-browser scripts - they'll control your existing Chrome
## Troubleshooting
### Server Won't Start
**Check Node.js version:**
```bash
node --version # Should be v18+
```
**Check port availability:**
```bash
# macOS/Linux
lsof -i :3000
# Windows
netstat -ano | findstr :3000
```
### Playwright Installation Issues
If Chromium fails to install:
```bash
npx playwright install chromium
```
### Windows-Specific Issues
**Execution Policy:**
If PowerShell scripts are blocked:
```powershell
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
```
**Path Issues:**
Use forward slashes or escaped backslashes in paths:
```powershell
# Good
cd "$env:USERPROFILE/.config/opencode/skills/dev-browser"
# Also good
cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
```
### Extension Not Connecting
1. Ensure extension is "Active" (click icon to toggle)
2. Check relay server is running (`npm run start-extension`)
3. Look for `Extension connected` message in console
4. Try reloading the extension in `chrome://extensions`
## Permissions
To skip permission prompts in Claude Code, add to `~/.claude/settings.json`:
```json
{
"permissions": {
"allow": ["Skill(dev-browser:dev-browser)", "Bash(npx tsx:*)"]
}
}
```
## Updating
```bash
cd ~/.config/opencode/skills/dev-browser
git pull
npm install
```
**Windows:**
```powershell
cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
git pull
npm install
```

View File

@@ -0,0 +1,155 @@
# Data Scraping Guide
For large datasets (followers, posts, search results), **intercept and replay network requests** rather than scrolling and parsing the DOM. This is faster, more reliable, and handles pagination automatically.
## Why Not Scroll?
Scrolling is slow, unreliable, and wastes time. APIs return structured data with pagination built in. Always prefer API replay.
## Start Small, Then Scale
**Don't try to automate everything at once.** Work incrementally:
1. **Capture one request** - verify you're intercepting the right endpoint
2. **Inspect one response** - understand the schema before writing extraction code
3. **Extract a few items** - make sure your parsing logic works
4. **Then scale up** - add pagination loop only after the basics work
This prevents wasting time debugging a complex script when the issue is a simple path like `data.user.timeline` vs `data.user.result.timeline`.
## Step-by-Step Workflow
### 1. Capture Request Details
First, intercept a request to understand URL structure and required headers:
```typescript
import { connect, waitForPageLoad } from "@/client.js";
import * as fs from "node:fs";
const client = await connect();
const page = await client.page("site");
let capturedRequest = null;
page.on("request", (request) => {
const url = request.url();
// Look for API endpoints (adjust pattern for your target site)
if (url.includes("/api/") || url.includes("/graphql/")) {
capturedRequest = {
url: url,
headers: request.headers(),
method: request.method(),
};
fs.writeFileSync("tmp/request-details.json", JSON.stringify(capturedRequest, null, 2));
console.log("Captured request:", url.substring(0, 80) + "...");
}
});
await page.goto("https://example.com/profile");
await waitForPageLoad(page);
await page.waitForTimeout(3000);
await client.disconnect();
```
### 2. Capture Response to Understand Schema
Save a raw response to inspect the data structure:
```typescript
page.on("response", async (response) => {
const url = response.url();
if (url.includes("UserTweets") || url.includes("/api/data")) {
const json = await response.json();
fs.writeFileSync("tmp/api-response.json", JSON.stringify(json, null, 2));
console.log("Captured response");
}
});
```
Then analyze the structure to find:
- Where the data array lives (e.g., `data.user.result.timeline.instructions[].entries`)
- Where pagination cursors are (e.g., `cursor-bottom` entries)
- What fields you need to extract
### 3. Replay API with Pagination
Once you understand the schema, replay requests directly:
```typescript
import { connect } from "@/client.js";
import * as fs from "node:fs";
const client = await connect();
const page = await client.page("site");
const results = new Map(); // Use Map for deduplication
const headers = JSON.parse(fs.readFileSync("tmp/request-details.json", "utf8")).headers;
const baseUrl = "https://example.com/api/data";
let cursor = null;
let hasMore = true;
while (hasMore) {
// Build URL with pagination cursor
const params = { count: 20 };
if (cursor) params.cursor = cursor;
const url = `${baseUrl}?params=${encodeURIComponent(JSON.stringify(params))}`;
// Execute fetch in browser context (has auth cookies/headers)
const response = await page.evaluate(
async ({ url, headers }) => {
const res = await fetch(url, { headers });
return res.json();
},
{ url, headers }
);
// Extract data and cursor (adjust paths for your API)
const entries = response?.data?.entries || [];
for (const entry of entries) {
if (entry.type === "cursor-bottom") {
cursor = entry.value;
} else if (entry.id && !results.has(entry.id)) {
results.set(entry.id, {
id: entry.id,
text: entry.content,
timestamp: entry.created_at,
});
}
}
console.log(`Fetched page, total: ${results.size}`);
// Check stop conditions
if (!cursor || entries.length === 0) hasMore = false;
// Rate limiting - be respectful
await new Promise((r) => setTimeout(r, 500));
}
// Export results
const data = Array.from(results.values());
fs.writeFileSync("tmp/results.json", JSON.stringify(data, null, 2));
console.log(`Saved ${data.length} items`);
await client.disconnect();
```
## Key Patterns
| Pattern | Description |
| ----------------------- | ------------------------------------------------------ |
| `page.on('request')` | Capture outgoing request URL + headers |
| `page.on('response')` | Capture response data to understand schema |
| `page.evaluate(fetch)` | Replay requests in browser context (inherits auth) |
| `Map` for deduplication | APIs often return overlapping data across pages |
| Cursor-based pagination | Look for `cursor`, `next_token`, `offset` in responses |
## Tips
- **Extension mode**: `page.context().cookies()` doesn't work - capture auth headers from intercepted requests instead
- **Rate limiting**: Add 500ms+ delays between requests to avoid blocks
- **Stop conditions**: Check for empty results, missing cursor, or reaching a date/ID threshold
- **GraphQL APIs**: URL params often include `variables` and `features` JSON objects - capture and reuse them

View File

@@ -1,6 +1,6 @@
---
name: git-master
description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
---
# Git Master Agent

View File

@@ -1,2 +1,2 @@
export * from "./types"
export { createBuiltinSkills } from "./skills"
export { createBuiltinSkills, type CreateBuiltinSkillsOptions } from "./skills"

View File

@@ -0,0 +1,89 @@
import { describe, test, expect } from "bun:test"
import { createBuiltinSkills } from "./skills"
describe("createBuiltinSkills", () => {
test("returns playwright skill by default", () => {
// #given - no options (default)
// #when
const skills = createBuiltinSkills()
// #then
const browserSkill = skills.find((s) => s.name === "playwright")
expect(browserSkill).toBeDefined()
expect(browserSkill!.description).toContain("browser")
expect(browserSkill!.mcpConfig).toHaveProperty("playwright")
})
test("returns playwright skill when browserProvider is 'playwright'", () => {
// #given
const options = { browserProvider: "playwright" as const }
// #when
const skills = createBuiltinSkills(options)
// #then
const playwrightSkill = skills.find((s) => s.name === "playwright")
const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
expect(playwrightSkill).toBeDefined()
expect(agentBrowserSkill).toBeUndefined()
})
test("returns agent-browser skill when browserProvider is 'agent-browser'", () => {
// #given
const options = { browserProvider: "agent-browser" as const }
// #when
const skills = createBuiltinSkills(options)
// #then
const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
const playwrightSkill = skills.find((s) => s.name === "playwright")
expect(agentBrowserSkill).toBeDefined()
expect(agentBrowserSkill!.description).toContain("browser")
expect(agentBrowserSkill!.allowedTools).toContain("Bash(agent-browser:*)")
expect(agentBrowserSkill!.template).toContain("agent-browser")
expect(playwrightSkill).toBeUndefined()
})
test("agent-browser skill template is inlined (not loaded from file)", () => {
// #given
const options = { browserProvider: "agent-browser" as const }
// #when
const skills = createBuiltinSkills(options)
const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
// #then - template should contain substantial content (inlined, not fallback)
expect(agentBrowserSkill!.template).toContain("## Quick start")
expect(agentBrowserSkill!.template).toContain("## Commands")
expect(agentBrowserSkill!.template).toContain("agent-browser open")
expect(agentBrowserSkill!.template).toContain("agent-browser snapshot")
})
test("always includes frontend-ui-ux and git-master skills", () => {
// #given - both provider options
// #when
const defaultSkills = createBuiltinSkills()
const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
// #then
for (const skills of [defaultSkills, agentBrowserSkills]) {
expect(skills.find((s) => s.name === "frontend-ui-ux")).toBeDefined()
expect(skills.find((s) => s.name === "git-master")).toBeDefined()
}
})
test("returns exactly 4 skills regardless of provider", () => {
// #given
// #when
const defaultSkills = createBuiltinSkills()
const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
// #then
expect(defaultSkills).toHaveLength(4)
expect(agentBrowserSkills).toHaveLength(4)
})
})

View File

@@ -1,4 +1,5 @@
import type { BuiltinSkill } from "./types"
import type { BrowserAutomationProvider } from "../../config/schema"
const playwrightSkill: BuiltinSkill = {
name: "playwright",
@@ -14,6 +15,303 @@ This skill provides browser automation capabilities via the Playwright MCP serve
},
}
const agentBrowserSkill: BuiltinSkill = {
name: "agent-browser",
description: "MUST USE for any browser-related tasks. Browser automation via agent-browser CLI - verification, browsing, information gathering, web scraping, testing, screenshots, and all browser interactions.",
template: `# Browser Automation with agent-browser
## Quick start
\`\`\`bash
agent-browser open <url> # Navigate to page
agent-browser snapshot -i # Get interactive elements with refs
agent-browser click @e1 # Click element by ref
agent-browser fill @e2 "text" # Fill input by ref
agent-browser close # Close browser
\`\`\`
## Core workflow
1. Navigate: \`agent-browser open <url>\`
2. Snapshot: \`agent-browser snapshot -i\` (returns elements with refs like \`@e1\`, \`@e2\`)
3. Interact using refs from the snapshot
4. Re-snapshot after navigation or significant DOM changes
## Commands
### Navigation
\`\`\`bash
agent-browser open <url> # Navigate to URL
agent-browser back # Go back
agent-browser forward # Go forward
agent-browser reload # Reload page
agent-browser close # Close browser
\`\`\`
### Snapshot (page analysis)
\`\`\`bash
agent-browser snapshot # Full accessibility tree
agent-browser snapshot -i # Interactive elements only (recommended)
agent-browser snapshot -c # Compact output
agent-browser snapshot -d 3 # Limit depth to 3
agent-browser snapshot -s "#main" # Scope to CSS selector
\`\`\`
### Interactions (use @refs from snapshot)
\`\`\`bash
agent-browser click @e1 # Click
agent-browser dblclick @e1 # Double-click
agent-browser focus @e1 # Focus element
agent-browser fill @e2 "text" # Clear and type
agent-browser type @e2 "text" # Type without clearing
agent-browser press Enter # Press key
agent-browser press Control+a # Key combination
agent-browser keydown Shift # Hold key down
agent-browser keyup Shift # Release key
agent-browser hover @e1 # Hover
agent-browser check @e1 # Check checkbox
agent-browser uncheck @e1 # Uncheck checkbox
agent-browser select @e1 "value" # Select dropdown
agent-browser scroll down 500 # Scroll page
agent-browser scrollintoview @e1 # Scroll element into view
agent-browser drag @e1 @e2 # Drag and drop
agent-browser upload @e1 file.pdf # Upload files
\`\`\`
### Get information
\`\`\`bash
agent-browser get text @e1 # Get element text
agent-browser get html @e1 # Get innerHTML
agent-browser get value @e1 # Get input value
agent-browser get attr @e1 href # Get attribute
agent-browser get title # Get page title
agent-browser get url # Get current URL
agent-browser get count ".item" # Count matching elements
agent-browser get box @e1 # Get bounding box
\`\`\`
### Check state
\`\`\`bash
agent-browser is visible @e1 # Check if visible
agent-browser is enabled @e1 # Check if enabled
agent-browser is checked @e1 # Check if checked
\`\`\`
### Screenshots & PDF
\`\`\`bash
agent-browser screenshot # Screenshot to stdout
agent-browser screenshot path.png # Save to file
agent-browser screenshot --full # Full page
agent-browser pdf output.pdf # Save as PDF
\`\`\`
### Video recording
\`\`\`bash
agent-browser record start ./demo.webm # Start recording (uses current URL + state)
agent-browser click @e1 # Perform actions
agent-browser record stop # Stop and save video
agent-browser record restart ./take2.webm # Stop current + start new recording
\`\`\`
Recording creates a fresh context but preserves cookies/storage from your session.
### Wait
\`\`\`bash
agent-browser wait @e1 # Wait for element
agent-browser wait 2000 # Wait milliseconds
agent-browser wait --text "Success" # Wait for text
agent-browser wait --url "**/dashboard" # Wait for URL pattern
agent-browser wait --load networkidle # Wait for network idle
agent-browser wait --fn "window.ready" # Wait for JS condition
\`\`\`
### Mouse control
\`\`\`bash
agent-browser mouse move 100 200 # Move mouse
agent-browser mouse down left # Press button
agent-browser mouse up left # Release button
agent-browser mouse wheel 100 # Scroll wheel
\`\`\`
### Semantic locators (alternative to refs)
\`\`\`bash
agent-browser find role button click --name "Submit"
agent-browser find text "Sign In" click
agent-browser find label "Email" fill "user@test.com"
agent-browser find first ".item" click
agent-browser find nth 2 "a" text
\`\`\`
### Browser settings
\`\`\`bash
agent-browser set viewport 1920 1080 # Set viewport size
agent-browser set device "iPhone 14" # Emulate device
agent-browser set geo 37.7749 -122.4194 # Set geolocation
agent-browser set offline on # Toggle offline mode
agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
agent-browser set credentials user pass # HTTP basic auth
agent-browser set media dark # Emulate color scheme
\`\`\`
### Cookies & Storage
\`\`\`bash
agent-browser cookies # Get all cookies
agent-browser cookies set name value # Set cookie
agent-browser cookies clear # Clear cookies
agent-browser storage local # Get all localStorage
agent-browser storage local key # Get specific key
agent-browser storage local set k v # Set value
agent-browser storage local clear # Clear all
agent-browser storage session # Get all sessionStorage
agent-browser storage session key # Get specific key
agent-browser storage session set k v # Set value
agent-browser storage session clear # Clear all
\`\`\`
### Network
\`\`\`bash
agent-browser network route <url> # Intercept requests
agent-browser network route <url> --abort # Block requests
agent-browser network route <url> --body '{}' # Mock response
agent-browser network unroute [url] # Remove routes
agent-browser network requests # View tracked requests
agent-browser network requests --filter api # Filter requests
\`\`\`
### Tabs & Windows
\`\`\`bash
agent-browser tab # List tabs
agent-browser tab new [url] # New tab
agent-browser tab 2 # Switch to tab
agent-browser tab close # Close tab
agent-browser window new # New window
\`\`\`
### Frames
\`\`\`bash
agent-browser frame "#iframe" # Switch to iframe
agent-browser frame main # Back to main frame
\`\`\`
### Dialogs
\`\`\`bash
agent-browser dialog accept [text] # Accept dialog
agent-browser dialog dismiss # Dismiss dialog
\`\`\`
### JavaScript
\`\`\`bash
agent-browser eval "document.title" # Run JavaScript
\`\`\`
## Global Options
| Option | Description |
|--------|-------------|
| \`--session <name>\` | Isolated browser session (\`AGENT_BROWSER_SESSION\` env) |
| \`--profile <path>\` | Persistent browser profile (\`AGENT_BROWSER_PROFILE\` env) |
| \`--headers <json>\` | HTTP headers scoped to URL's origin |
| \`--executable-path <path>\` | Custom browser binary (\`AGENT_BROWSER_EXECUTABLE_PATH\` env) |
| \`--args <args>\` | Browser launch args (\`AGENT_BROWSER_ARGS\` env) |
| \`--user-agent <ua>\` | Custom User-Agent (\`AGENT_BROWSER_USER_AGENT\` env) |
| \`--proxy <url>\` | Proxy server (\`AGENT_BROWSER_PROXY\` env) |
| \`--proxy-bypass <hosts>\` | Hosts to bypass proxy (\`AGENT_BROWSER_PROXY_BYPASS\` env) |
| \`-p, --provider <name>\` | Cloud browser provider (\`AGENT_BROWSER_PROVIDER\` env) |
| \`--json\` | Machine-readable JSON output |
| \`--headed\` | Show browser window (not headless) |
| \`--cdp <port\\|wss://url>\` | Connect via Chrome DevTools Protocol |
| \`--debug\` | Debug output |
## Example: Form submission
\`\`\`bash
agent-browser open https://example.com/form
agent-browser snapshot -i
# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
agent-browser fill @e1 "user@example.com"
agent-browser fill @e2 "password123"
agent-browser click @e3
agent-browser wait --load networkidle
agent-browser snapshot -i # Check result
\`\`\`
## Example: Authentication with saved state
\`\`\`bash
# Login once
agent-browser open https://app.example.com/login
agent-browser snapshot -i
agent-browser fill @e1 "username"
agent-browser fill @e2 "password"
agent-browser click @e3
agent-browser wait --url "**/dashboard"
agent-browser state save auth.json
# Later sessions: load saved state
agent-browser state load auth.json
agent-browser open https://app.example.com/dashboard
\`\`\`
### Header-based Auth (Skip login flows)
\`\`\`bash
# Headers scoped to api.example.com only
agent-browser open api.example.com --headers '{"Authorization": "Bearer <token>"}'
# Navigate to another domain - headers NOT sent (safe)
agent-browser open other-site.com
# Global headers (all domains)
agent-browser set headers '{"X-Custom-Header": "value"}'
\`\`\`
## Sessions & Persistent Profiles
### Sessions (parallel browsers)
\`\`\`bash
agent-browser --session test1 open site-a.com
agent-browser --session test2 open site-b.com
agent-browser session list
\`\`\`
### Persistent Profiles
Persists cookies, localStorage, IndexedDB, service workers, cache, login sessions across browser restarts.
\`\`\`bash
agent-browser --profile ~/.myapp-profile open myapp.com
# Or via env var
AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
\`\`\`
- Use different profile paths for different projects
- Login once → restart browser → still logged in
- Stores: cookies, localStorage, IndexedDB, service workers, browser cache
## JSON output (for parsing)
Add \`--json\` for machine-readable output:
\`\`\`bash
agent-browser snapshot -i --json
agent-browser get text @e1 --json
\`\`\`
## Debugging
\`\`\`bash
agent-browser open example.com --headed # Show browser window
agent-browser console # View console messages
agent-browser errors # View page errors
agent-browser record start ./debug.webm # Record from current page
agent-browser record stop # Save recording
agent-browser connect 9222 # Local CDP port
agent-browser --cdp "wss://browser-service.com/cdp?token=..." snapshot # Remote via WebSocket
agent-browser console --clear # Clear console
agent-browser errors --clear # Clear errors
agent-browser highlight @e1 # Highlight element
agent-browser trace start # Start recording trace
agent-browser trace stop trace.zip # Stop and save trace
\`\`\`
---
Install: \`bun add -g agent-browser && agent-browser install\`. Run \`agent-browser --help\` for all commands. Repo: https://github.com/vercel-labs/agent-browser`,
allowedTools: ["Bash(agent-browser:*)"],
}
const frontendUiUxSkill: BuiltinSkill = {
name: "frontend-ui-ux",
description: "Designer-turned-developer who crafts stunning UI/UX even without design mockups",
@@ -95,7 +393,7 @@ Interpret creatively and make unexpected choices that feel genuinely designed fo
const gitMasterSkill: BuiltinSkill = {
name: "git-master",
description:
"MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
"MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
template: `# Git Master Agent
You are a Git expert combining three specializations:
@@ -1198,6 +1496,234 @@ POTENTIAL ACTIONS:
- Bisect without proper good/bad boundaries -> Wasted time`,
}
export function createBuiltinSkills(): BuiltinSkill[] {
return [playwrightSkill, frontendUiUxSkill, gitMasterSkill]
const devBrowserSkill: BuiltinSkill = {
name: "dev-browser",
description:
"Browser automation with persistent page state. Use when users ask to navigate websites, fill forms, take screenshots, extract web data, test web apps, or automate browser workflows. Trigger phrases include 'go to [url]', 'click on', 'fill out the form', 'take a screenshot', 'scrape', 'automate', 'test the website', 'log into', or any browser interaction request.",
template: `# Dev Browser Skill
Browser automation that maintains page state across script executions. Write small, focused scripts to accomplish tasks incrementally. Once you've proven out part of a workflow and there is repeated work to be done, you can write a script to do the repeated work in a single execution.
## Choosing Your Approach
- **Local/source-available sites**: Read the source code first to write selectors directly
- **Unknown page layouts**: Use \`getAISnapshot()\` to discover elements and \`selectSnapshotRef()\` to interact with them
- **Visual feedback**: Take screenshots to see what the user sees
## Setup
**IMPORTANT**: Before using this skill, ensure the server is running. See [references/installation.md](references/installation.md) for platform-specific setup instructions (macOS, Linux, Windows).
Two modes available. Ask the user if unclear which to use.
### Standalone Mode (Default)
Launches a new Chromium browser for fresh automation sessions.
**macOS/Linux:**
\`\`\`bash
./skills/dev-browser/server.sh &
\`\`\`
**Windows (PowerShell):**
\`\`\`powershell
Start-Process -NoNewWindow -FilePath "node" -ArgumentList "skills/dev-browser/server.js"
\`\`\`
Add \`--headless\` flag if user requests it. **Wait for the \`Ready\` message before running scripts.**
### Extension Mode
Connects to user's existing Chrome browser. Use this when:
- The user is already logged into sites and wants you to do things behind an authed experience that isn't local dev.
- The user asks you to use the extension
**Important**: The core flow is still the same. You create named pages inside of their browser.
**Start the relay server:**
**macOS/Linux:**
\`\`\`bash
cd skills/dev-browser && npm i && npm run start-extension &
\`\`\`
**Windows (PowerShell):**
\`\`\`powershell
cd skills/dev-browser; npm i; Start-Process -NoNewWindow -FilePath "npm" -ArgumentList "run", "start-extension"
\`\`\`
Wait for \`Waiting for extension to connect...\` followed by \`Extension connected\` in the console.
If the extension hasn't connected yet, tell the user to launch and activate it. Download link: https://github.com/SawyerHood/dev-browser/releases
## Writing Scripts
> **Run all scripts from \`skills/dev-browser/\` directory.** The \`@/\` import alias requires this directory's config.
Execute scripts inline using heredocs:
**macOS/Linux:**
\`\`\`bash
cd skills/dev-browser && npx tsx <<'EOF'
import { connect, waitForPageLoad } from "@/client.js";
const client = await connect();
const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
await page.goto("https://example.com");
await waitForPageLoad(page);
console.log({ title: await page.title(), url: page.url() });
await client.disconnect();
EOF
\`\`\`
**Windows (PowerShell):**
\`\`\`powershell
cd skills/dev-browser
@"
import { connect, waitForPageLoad } from "@/client.js";
const client = await connect();
const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
await page.goto("https://example.com");
await waitForPageLoad(page);
console.log({ title: await page.title(), url: page.url() });
await client.disconnect();
"@ | npx tsx --input-type=module
\`\`\`
### Key Principles
1. **Small scripts**: Each script does ONE thing (navigate, click, fill, check)
2. **Evaluate state**: Log/return state at the end to decide next steps
3. **Descriptive page names**: Use \`"checkout"\`, \`"login"\`, not \`"main"\`
4. **Disconnect to exit**: \`await client.disconnect()\` - pages persist on server
5. **Plain JS in evaluate**: \`page.evaluate()\` runs in browser - no TypeScript syntax
## Workflow Loop
1. **Write a script** to perform one action
2. **Run it** and observe the output
3. **Evaluate** - did it work? What's the current state?
4. **Decide** - is the task complete or do we need another script?
5. **Repeat** until task is done
### No TypeScript in Browser Context
Code passed to \`page.evaluate()\` runs in the browser, which doesn't understand TypeScript:
\`\`\`typescript
// Correct: plain JavaScript
const text = await page.evaluate(() => {
return document.body.innerText;
});
// Wrong: TypeScript syntax will fail at runtime
const text = await page.evaluate(() => {
const el: HTMLElement = document.body; // Type annotation breaks in browser!
return el.innerText;
});
\`\`\`
## Scraping Data
For scraping large datasets, intercept and replay network requests rather than scrolling the DOM. See [references/scraping.md](references/scraping.md) for the complete guide.
## Client API
\`\`\`typescript
const client = await connect();
// Get or create named page
const page = await client.page("name");
const pageWithSize = await client.page("name", { viewport: { width: 1920, height: 1080 } });
const pages = await client.list(); // List all page names
await client.close("name"); // Close a page
await client.disconnect(); // Disconnect (pages persist)
// ARIA Snapshot methods
const snapshot = await client.getAISnapshot("name"); // Get accessibility tree
const element = await client.selectSnapshotRef("name", "e5"); // Get element by ref
\`\`\`
## Waiting
\`\`\`typescript
import { waitForPageLoad } from "@/client.js";
await waitForPageLoad(page); // After navigation
await page.waitForSelector(".results"); // For specific elements
await page.waitForURL("**/success"); // For specific URL
\`\`\`
## Screenshots
\`\`\`typescript
await page.screenshot({ path: "tmp/screenshot.png" });
await page.screenshot({ path: "tmp/full.png", fullPage: true });
\`\`\`
## ARIA Snapshot (Element Discovery)
Use \`getAISnapshot()\` to discover page elements. Returns YAML-formatted accessibility tree:
\`\`\`yaml
- banner:
- link "Hacker News" [ref=e1]
- navigation:
- link "new" [ref=e2]
- main:
- list:
- listitem:
- link "Article Title" [ref=e8]
\`\`\`
**Interacting with refs:**
\`\`\`typescript
const snapshot = await client.getAISnapshot("hackernews");
console.log(snapshot); // Find the ref you need
const element = await client.selectSnapshotRef("hackernews", "e2");
await element.click();
\`\`\`
## Error Recovery
Page state persists after failures. Debug with:
\`\`\`bash
cd skills/dev-browser && npx tsx <<'EOF'
import { connect } from "@/client.js";
const client = await connect();
const page = await client.page("hackernews");
await page.screenshot({ path: "tmp/debug.png" });
console.log({
url: page.url(),
title: await page.title(),
bodyText: await page.textContent("body").then((t) => t?.slice(0, 200)),
});
await client.disconnect();
EOF
\`\`\``,
}
export interface CreateBuiltinSkillsOptions {
browserProvider?: BrowserAutomationProvider
}
export function createBuiltinSkills(options: CreateBuiltinSkillsOptions = {}): BuiltinSkill[] {
const { browserProvider = "playwright" } = options
const browserSkill = browserProvider === "agent-browser" ? agentBrowserSkill : playwrightSkill
return [browserSkill, frontendUiUxSkill, gitMasterSkill, devBrowserSkill]
}

View File

@@ -77,7 +77,13 @@ export async function loadMcpConfigs(): Promise<McpLoadResult> {
for (const [name, serverConfig] of Object.entries(config.mcpServers)) {
if (serverConfig.disabled) {
log(`Skipping disabled MCP server "${name}"`, { path })
log(`Disabling MCP server "${name}"`, { path })
delete servers[name]
const existingIndex = loadedServers.findIndex((s) => s.name === name)
if (existingIndex !== -1) {
loadedServers.splice(existingIndex, 1)
log(`Removed previously loaded MCP server "${name}"`, { path })
}
continue
}

View File

@@ -37,7 +37,7 @@ describe("claude-code-session-state", () => {
setSessionAgent(sessionID, "Prometheus (Planner)")
// #when - try to overwrite
setSessionAgent(sessionID, "Sisyphus")
setSessionAgent(sessionID, "sisyphus")
// #then - first agent preserved
expect(getSessionAgent(sessionID)).toBe("Prometheus (Planner)")
@@ -58,10 +58,10 @@ describe("claude-code-session-state", () => {
setSessionAgent(sessionID, "Prometheus (Planner)")
// #when - force update
updateSessionAgent(sessionID, "Sisyphus")
updateSessionAgent(sessionID, "sisyphus")
// #then
expect(getSessionAgent(sessionID)).toBe("Sisyphus")
expect(getSessionAgent(sessionID)).toBe("sisyphus")
})
})
@@ -129,7 +129,7 @@ describe("claude-code-session-state", () => {
// #given - user switches to custom agent "MyCustomAgent"
const sessionID = "test-session-custom"
const customAgent = "MyCustomAgent"
const defaultAgent = "Sisyphus"
const defaultAgent = "sisyphus"
// User switches to custom agent (via UI)
setSessionAgent(sessionID, customAgent)

View File

@@ -21,7 +21,7 @@ describe("createContextInjectorMessagesTransformHook", () => {
sessionID,
role,
time: { created: Date.now() },
agent: "Sisyphus",
agent: "sisyphus",
model: { providerID: "test", modelID: "test" },
path: { cwd: "/", root: "/" },
},

View File

@@ -265,3 +265,66 @@ describe("resolveMultipleSkillsAsync", () => {
expect(result.notFound).toEqual([])
})
})
describe("resolveSkillContent with browserProvider", () => {
it("should resolve agent-browser skill when browserProvider is 'agent-browser'", () => {
// #given: browserProvider set to agent-browser
const options = { browserProvider: "agent-browser" as const }
// #when: resolving content for 'agent-browser'
const result = resolveSkillContent("agent-browser", options)
// #then: returns agent-browser template
expect(result).not.toBeNull()
expect(result).toContain("agent-browser")
})
it("should return null for agent-browser when browserProvider is default", () => {
// #given: no browserProvider (defaults to playwright)
// #when: resolving content for 'agent-browser'
const result = resolveSkillContent("agent-browser")
// #then: returns null because agent-browser is not in default builtin skills
expect(result).toBeNull()
})
it("should return null for playwright when browserProvider is agent-browser", () => {
// #given: browserProvider set to agent-browser
const options = { browserProvider: "agent-browser" as const }
// #when: resolving content for 'playwright'
const result = resolveSkillContent("playwright", options)
// #then: returns null because playwright is replaced by agent-browser
expect(result).toBeNull()
})
})
describe("resolveMultipleSkills with browserProvider", () => {
it("should resolve agent-browser when browserProvider is set", () => {
// #given: agent-browser and git-master requested with browserProvider
const skillNames = ["agent-browser", "git-master"]
const options = { browserProvider: "agent-browser" as const }
// #when: resolving multiple skills
const result = resolveMultipleSkills(skillNames, options)
// #then: both resolved
expect(result.resolved.has("agent-browser")).toBe(true)
expect(result.resolved.has("git-master")).toBe(true)
expect(result.notFound).toHaveLength(0)
})
it("should not resolve agent-browser without browserProvider option", () => {
// #given: agent-browser requested without browserProvider
const skillNames = ["agent-browser"]
// #when: resolving multiple skills
const result = resolveMultipleSkills(skillNames)
// #then: agent-browser not found
expect(result.resolved.has("agent-browser")).toBe(false)
expect(result.notFound).toContain("agent-browser")
})
})

View File

@@ -3,24 +3,27 @@ import { discoverSkills } from "./loader"
import type { LoadedSkill } from "./types"
import { parseFrontmatter } from "../../shared/frontmatter"
import { readFileSync } from "node:fs"
import type { GitMasterConfig } from "../../config/schema"
import type { GitMasterConfig, BrowserAutomationProvider } from "../../config/schema"
export interface SkillResolutionOptions {
gitMasterConfig?: GitMasterConfig
browserProvider?: BrowserAutomationProvider
}
let cachedSkills: LoadedSkill[] | null = null
const cachedSkillsByProvider = new Map<string, LoadedSkill[]>()
function clearSkillCache(): void {
cachedSkills = null
cachedSkillsByProvider.clear()
}
async function getAllSkills(): Promise<LoadedSkill[]> {
if (cachedSkills) return cachedSkills
async function getAllSkills(options?: SkillResolutionOptions): Promise<LoadedSkill[]> {
const cacheKey = options?.browserProvider ?? "playwright"
const cached = cachedSkillsByProvider.get(cacheKey)
if (cached) return cached
const [discoveredSkills, builtinSkillDefs] = await Promise.all([
discoverSkills({ includeClaudeCodePaths: true }),
Promise.resolve(createBuiltinSkills()),
Promise.resolve(createBuiltinSkills({ browserProvider: options?.browserProvider })),
])
const builtinSkillsAsLoaded: LoadedSkill[] = builtinSkillDefs.map((skill) => ({
@@ -44,8 +47,9 @@ async function getAllSkills(): Promise<LoadedSkill[]> {
const discoveredNames = new Set(discoveredSkills.map((s) => s.name))
const uniqueBuiltins = builtinSkillsAsLoaded.filter((s) => !discoveredNames.has(s.name))
cachedSkills = [...discoveredSkills, ...uniqueBuiltins]
return cachedSkills
const allSkills = [...discoveredSkills, ...uniqueBuiltins]
cachedSkillsByProvider.set(cacheKey, allSkills)
return allSkills
}
async function extractSkillTemplate(skill: LoadedSkill): Promise<string> {
@@ -118,7 +122,7 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
}
export function resolveSkillContent(skillName: string, options?: SkillResolutionOptions): string | null {
const skills = createBuiltinSkills()
const skills = createBuiltinSkills({ browserProvider: options?.browserProvider })
const skill = skills.find((s) => s.name === skillName)
if (!skill) return null
@@ -133,7 +137,7 @@ export function resolveMultipleSkills(skillNames: string[], options?: SkillResol
resolved: Map<string, string>
notFound: string[]
} {
const skills = createBuiltinSkills()
const skills = createBuiltinSkills({ browserProvider: options?.browserProvider })
const skillMap = new Map(skills.map((s) => [s.name, s.template]))
const resolved = new Map<string, string>()
@@ -159,7 +163,7 @@ export async function resolveSkillContentAsync(
skillName: string,
options?: SkillResolutionOptions
): Promise<string | null> {
const allSkills = await getAllSkills()
const allSkills = await getAllSkills(options)
const skill = allSkills.find((s) => s.name === skillName)
if (!skill) return null
@@ -179,7 +183,7 @@ export async function resolveMultipleSkillsAsync(
resolved: Map<string, string>
notFound: string[]
}> {
const allSkills = await getAllSkills()
const allSkills = await getAllSkills(options)
const skillMap = new Map<string, LoadedSkill>()
for (const skill of allSkills) {
skillMap.set(skill.name, skill)

View File

@@ -30,7 +30,7 @@ describe("TaskToastManager", () => {
const task = {
id: "task_1",
description: "Test task",
agent: "Sisyphus-Junior",
agent: "sisyphus-junior",
isBackground: true,
skills: ["playwright", "git-master"],
}
@@ -127,7 +127,7 @@ describe("TaskToastManager", () => {
const task = {
id: "task_1",
description: "Full info task",
agent: "Sisyphus-Junior",
agent: "sisyphus-junior",
isBackground: true,
skills: ["frontend-ui-ux"],
}
@@ -149,9 +149,9 @@ describe("TaskToastManager", () => {
const task = {
id: "task_1",
description: "Task with category default model",
agent: "Sisyphus-Junior",
agent: "sisyphus-junior",
isBackground: false,
modelInfo: { model: "google/gemini-3-pro-preview", type: "category-default" as const },
modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const },
}
// #when - addTask is called
@@ -169,7 +169,7 @@ describe("TaskToastManager", () => {
const task = {
id: "task_1b",
description: "Task with system default model",
agent: "Sisyphus-Junior",
agent: "sisyphus-junior",
isBackground: false,
modelInfo: { model: "anthropic/claude-sonnet-4-5", type: "system-default" as const },
}
@@ -190,7 +190,7 @@ describe("TaskToastManager", () => {
const task = {
id: "task_2",
description: "Task with inherited model",
agent: "Sisyphus-Junior",
agent: "sisyphus-junior",
isBackground: false,
modelInfo: { model: "cliproxy/claude-opus-4-5", type: "inherited" as const },
}
@@ -211,7 +211,7 @@ describe("TaskToastManager", () => {
const task = {
id: "task_3",
description: "Task with user model",
agent: "Sisyphus-Junior",
agent: "sisyphus-junior",
isBackground: false,
modelInfo: { model: "my-provider/my-model", type: "user-defined" as const },
}

View File

@@ -0,0 +1,97 @@
import type { TmuxConfig } from "../../config/schema"
import type { PaneAction, WindowState } from "./types"
import { spawnTmuxPane, closeTmuxPane, enforceMainPaneWidth, replaceTmuxPane } from "../../shared/tmux"
import { log } from "../../shared"
export interface ActionResult {
success: boolean
paneId?: string
error?: string
}
export interface ExecuteActionsResult {
success: boolean
spawnedPaneId?: string
results: Array<{ action: PaneAction; result: ActionResult }>
}
export interface ExecuteContext {
config: TmuxConfig
serverUrl: string
windowState: WindowState
}
async function enforceMainPane(windowState: WindowState): Promise<void> {
if (!windowState.mainPane) return
await enforceMainPaneWidth(windowState.mainPane.paneId, windowState.windowWidth)
}
export async function executeAction(
action: PaneAction,
ctx: ExecuteContext
): Promise<ActionResult> {
if (action.type === "close") {
const success = await closeTmuxPane(action.paneId)
if (success) {
await enforceMainPane(ctx.windowState)
}
return { success }
}
if (action.type === "replace") {
const result = await replaceTmuxPane(
action.paneId,
action.newSessionId,
action.description,
ctx.config,
ctx.serverUrl
)
return {
success: result.success,
paneId: result.paneId,
}
}
const result = await spawnTmuxPane(
action.sessionId,
action.description,
ctx.config,
ctx.serverUrl,
action.targetPaneId,
action.splitDirection
)
if (result.success) {
await enforceMainPane(ctx.windowState)
}
return {
success: result.success,
paneId: result.paneId,
}
}
export async function executeActions(
actions: PaneAction[],
ctx: ExecuteContext
): Promise<ExecuteActionsResult> {
const results: Array<{ action: PaneAction; result: ActionResult }> = []
let spawnedPaneId: string | undefined
for (const action of actions) {
log("[action-executor] executing", { type: action.type })
const result = await executeAction(action, ctx)
results.push({ action, result })
if (!result.success) {
log("[action-executor] action failed", { type: action.type, error: result.error })
return { success: false, results }
}
if ((action.type === "spawn" || action.type === "replace") && result.paneId) {
spawnedPaneId = result.paneId
}
}
return { success: true, spawnedPaneId, results }
}

View File

@@ -0,0 +1,354 @@
import { describe, it, expect } from "bun:test"
import {
decideSpawnActions,
calculateCapacity,
canSplitPane,
canSplitPaneAnyDirection,
getBestSplitDirection,
type SessionMapping
} from "./decision-engine"
import type { WindowState, CapacityConfig, TmuxPaneInfo } from "./types"
import { MIN_PANE_WIDTH, MIN_PANE_HEIGHT } from "./types"
const MIN_SPLIT_WIDTH = 2 * MIN_PANE_WIDTH + 1
const MIN_SPLIT_HEIGHT = 2 * MIN_PANE_HEIGHT + 1
describe("canSplitPane", () => {
const createPane = (width: number, height: number): TmuxPaneInfo => ({
paneId: "%1",
width,
height,
left: 100,
top: 0,
title: "test",
isActive: false,
})
it("returns true for horizontal split when width >= 2*MIN+1", () => {
//#given - pane with exactly minimum splittable width (107)
const pane = createPane(MIN_SPLIT_WIDTH, 20)
//#when
const result = canSplitPane(pane, "-h")
//#then
expect(result).toBe(true)
})
it("returns false for horizontal split when width < 2*MIN+1", () => {
//#given - pane just below minimum splittable width
const pane = createPane(MIN_SPLIT_WIDTH - 1, 20)
//#when
const result = canSplitPane(pane, "-h")
//#then
expect(result).toBe(false)
})
it("returns true for vertical split when height >= 2*MIN+1", () => {
//#given - pane with exactly minimum splittable height (23)
const pane = createPane(50, MIN_SPLIT_HEIGHT)
//#when
const result = canSplitPane(pane, "-v")
//#then
expect(result).toBe(true)
})
it("returns false for vertical split when height < 2*MIN+1", () => {
//#given - pane just below minimum splittable height
const pane = createPane(50, MIN_SPLIT_HEIGHT - 1)
//#when
const result = canSplitPane(pane, "-v")
//#then
expect(result).toBe(false)
})
})
describe("canSplitPaneAnyDirection", () => {
const createPane = (width: number, height: number): TmuxPaneInfo => ({
paneId: "%1",
width,
height,
left: 100,
top: 0,
title: "test",
isActive: false,
})
it("returns true when can split horizontally but not vertically", () => {
//#given
const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_HEIGHT - 1)
//#when
const result = canSplitPaneAnyDirection(pane)
//#then
expect(result).toBe(true)
})
it("returns true when can split vertically but not horizontally", () => {
//#given
const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT)
//#when
const result = canSplitPaneAnyDirection(pane)
//#then
expect(result).toBe(true)
})
it("returns false when cannot split in any direction", () => {
//#given - pane too small in both dimensions
const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT - 1)
//#when
const result = canSplitPaneAnyDirection(pane)
//#then
expect(result).toBe(false)
})
})
describe("getBestSplitDirection", () => {
const createPane = (width: number, height: number): TmuxPaneInfo => ({
paneId: "%1",
width,
height,
left: 100,
top: 0,
title: "test",
isActive: false,
})
it("returns -h when only horizontal split possible", () => {
//#given
const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_HEIGHT - 1)
//#when
const result = getBestSplitDirection(pane)
//#then
expect(result).toBe("-h")
})
it("returns -v when only vertical split possible", () => {
//#given
const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT)
//#when
const result = getBestSplitDirection(pane)
//#then
expect(result).toBe("-v")
})
it("returns null when no split possible", () => {
//#given
const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT - 1)
//#when
const result = getBestSplitDirection(pane)
//#then
expect(result).toBe(null)
})
it("returns -h when width >= height and both splits possible", () => {
//#given - wider than tall
const pane = createPane(MIN_SPLIT_WIDTH + 10, MIN_SPLIT_HEIGHT)
//#when
const result = getBestSplitDirection(pane)
//#then
expect(result).toBe("-h")
})
it("returns -v when height > width and both splits possible", () => {
//#given - taller than wide (height needs to be > width for -v)
const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_WIDTH + 10)
//#when
const result = getBestSplitDirection(pane)
//#then
expect(result).toBe("-v")
})
})
describe("decideSpawnActions", () => {
const defaultConfig: CapacityConfig = {
mainPaneMinWidth: 120,
agentPaneWidth: 40,
}
const createWindowState = (
windowWidth: number,
windowHeight: number,
agentPanes: Array<{ paneId: string; width: number; height: number; left: number; top: number }> = []
): WindowState => ({
windowWidth,
windowHeight,
mainPane: { paneId: "%0", width: Math.floor(windowWidth / 2), height: windowHeight, left: 0, top: 0, title: "main", isActive: true },
agentPanes: agentPanes.map((p, i) => ({
...p,
title: `agent-${i}`,
isActive: false,
})),
})
describe("minimum size enforcement", () => {
it("returns canSpawn=false when window too small", () => {
//#given - window smaller than minimum pane size
const state = createWindowState(50, 5)
//#when
const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
//#then
expect(result.canSpawn).toBe(false)
expect(result.reason).toContain("too small")
})
it("returns canSpawn=true when main pane can be split", () => {
//#given - main pane width >= 2*MIN_PANE_WIDTH+1 = 107
const state = createWindowState(220, 44)
//#when
const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
//#then
expect(result.canSpawn).toBe(true)
expect(result.actions.length).toBe(1)
expect(result.actions[0].type).toBe("spawn")
})
it("closes oldest pane when existing panes are too small to split", () => {
//#given - existing pane is below minimum splittable size
const state = createWindowState(220, 30, [
{ paneId: "%1", width: 50, height: 15, left: 110, top: 0 },
])
const mappings: SessionMapping[] = [
{ sessionId: "old-ses", paneId: "%1", createdAt: new Date("2024-01-01") },
]
//#when
const result = decideSpawnActions(state, "ses1", "test", defaultConfig, mappings)
//#then
expect(result.canSpawn).toBe(true)
expect(result.actions.length).toBe(2)
expect(result.actions[0].type).toBe("close")
expect(result.actions[1].type).toBe("spawn")
})
it("can spawn when existing pane is large enough to split", () => {
//#given - existing pane is above minimum splittable size
const state = createWindowState(320, 50, [
{ paneId: "%1", width: MIN_SPLIT_WIDTH + 10, height: MIN_SPLIT_HEIGHT + 10, left: 160, top: 0 },
])
//#when
const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
//#then
expect(result.canSpawn).toBe(true)
expect(result.actions.length).toBe(1)
expect(result.actions[0].type).toBe("spawn")
})
})
describe("basic spawn decisions", () => {
it("returns canSpawn=true when capacity allows new pane", () => {
//#given - 220x44 window, mainPane width=110 >= MIN_SPLIT_WIDTH(107)
const state = createWindowState(220, 44)
//#when
const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
//#then
expect(result.canSpawn).toBe(true)
expect(result.actions.length).toBe(1)
expect(result.actions[0].type).toBe("spawn")
})
it("spawns with splitDirection", () => {
//#given
const state = createWindowState(212, 44, [
{ paneId: "%1", width: MIN_SPLIT_WIDTH, height: MIN_SPLIT_HEIGHT, left: 106, top: 0 },
])
//#when
const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
//#then
expect(result.canSpawn).toBe(true)
expect(result.actions[0].type).toBe("spawn")
if (result.actions[0].type === "spawn") {
expect(result.actions[0].sessionId).toBe("ses1")
expect(result.actions[0].splitDirection).toBeDefined()
}
})
it("returns canSpawn=false when no main pane", () => {
//#given
const state: WindowState = { windowWidth: 212, windowHeight: 44, mainPane: null, agentPanes: [] }
//#when
const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
//#then
expect(result.canSpawn).toBe(false)
expect(result.reason).toBe("no main pane found")
})
})
})
describe("calculateCapacity", () => {
it("calculates 2D grid capacity (cols x rows)", () => {
//#given - 212x44 window (user's actual screen)
//#when
const capacity = calculateCapacity(212, 44)
//#then - availableWidth=106, cols=(106+1)/(52+1)=2, rows=(44+1)/(11+1)=3 (accounting for dividers)
expect(capacity.cols).toBe(2)
expect(capacity.rows).toBe(3)
expect(capacity.total).toBe(6)
})
it("returns 0 cols when agent area too narrow", () => {
//#given - window too narrow for even 1 agent pane
//#when
const capacity = calculateCapacity(100, 44)
//#then - availableWidth=50, cols=50/53=0
expect(capacity.cols).toBe(0)
expect(capacity.total).toBe(0)
})
it("returns 0 rows when window too short", () => {
//#given - window too short
//#when
const capacity = calculateCapacity(212, 10)
//#then - rows=10/11=0
expect(capacity.rows).toBe(0)
expect(capacity.total).toBe(0)
})
it("scales with larger screens but caps at MAX_GRID_SIZE=4", () => {
//#given - larger 4K-like screen (400x100)
//#when
const capacity = calculateCapacity(400, 100)
//#then - cols capped at 4, rows capped at 4 (MAX_GRID_SIZE)
expect(capacity.cols).toBe(3)
expect(capacity.rows).toBe(4)
expect(capacity.total).toBe(12)
})
})

View File

@@ -0,0 +1,386 @@
import type { WindowState, PaneAction, SpawnDecision, CapacityConfig, TmuxPaneInfo, SplitDirection } from "./types"
import { MIN_PANE_WIDTH, MIN_PANE_HEIGHT } from "./types"
export interface SessionMapping {
sessionId: string
paneId: string
createdAt: Date
}
export interface GridCapacity {
cols: number
rows: number
total: number
}
export interface GridSlot {
row: number
col: number
}
export interface GridPlan {
cols: number
rows: number
slotWidth: number
slotHeight: number
}
export interface SpawnTarget {
targetPaneId: string
splitDirection: SplitDirection
}
const MAIN_PANE_RATIO = 0.5
const MAX_COLS = 2
const MAX_ROWS = 3
const MAX_GRID_SIZE = 4
const DIVIDER_SIZE = 1
const MIN_SPLIT_WIDTH = 2 * MIN_PANE_WIDTH + DIVIDER_SIZE
const MIN_SPLIT_HEIGHT = 2 * MIN_PANE_HEIGHT + DIVIDER_SIZE
export function getColumnCount(paneCount: number): number {
if (paneCount <= 0) return 1
return Math.min(MAX_COLS, Math.max(1, Math.ceil(paneCount / MAX_ROWS)))
}
export function getColumnWidth(agentAreaWidth: number, paneCount: number): number {
const cols = getColumnCount(paneCount)
const dividersWidth = (cols - 1) * DIVIDER_SIZE
return Math.floor((agentAreaWidth - dividersWidth) / cols)
}
export function isSplittableAtCount(agentAreaWidth: number, paneCount: number): boolean {
const columnWidth = getColumnWidth(agentAreaWidth, paneCount)
return columnWidth >= MIN_SPLIT_WIDTH
}
export function findMinimalEvictions(agentAreaWidth: number, currentCount: number): number | null {
for (let k = 1; k <= currentCount; k++) {
if (isSplittableAtCount(agentAreaWidth, currentCount - k)) {
return k
}
}
return null
}
export function canSplitPane(pane: TmuxPaneInfo, direction: SplitDirection): boolean {
if (direction === "-h") {
return pane.width >= MIN_SPLIT_WIDTH
}
return pane.height >= MIN_SPLIT_HEIGHT
}
export function canSplitPaneAnyDirection(pane: TmuxPaneInfo): boolean {
return pane.width >= MIN_SPLIT_WIDTH || pane.height >= MIN_SPLIT_HEIGHT
}
export function getBestSplitDirection(pane: TmuxPaneInfo): SplitDirection | null {
const canH = pane.width >= MIN_SPLIT_WIDTH
const canV = pane.height >= MIN_SPLIT_HEIGHT
if (!canH && !canV) return null
if (canH && !canV) return "-h"
if (!canH && canV) return "-v"
return pane.width >= pane.height ? "-h" : "-v"
}
export function calculateCapacity(
windowWidth: number,
windowHeight: number
): GridCapacity {
const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
const cols = Math.min(MAX_GRID_SIZE, Math.max(0, Math.floor((availableWidth + DIVIDER_SIZE) / (MIN_PANE_WIDTH + DIVIDER_SIZE))))
const rows = Math.min(MAX_GRID_SIZE, Math.max(0, Math.floor((windowHeight + DIVIDER_SIZE) / (MIN_PANE_HEIGHT + DIVIDER_SIZE))))
const total = cols * rows
return { cols, rows, total }
}
export function computeGridPlan(
windowWidth: number,
windowHeight: number,
paneCount: number
): GridPlan {
const capacity = calculateCapacity(windowWidth, windowHeight)
const { cols: maxCols, rows: maxRows } = capacity
if (maxCols === 0 || maxRows === 0 || paneCount === 0) {
return { cols: 1, rows: 1, slotWidth: 0, slotHeight: 0 }
}
let bestCols = 1
let bestRows = 1
let bestArea = Infinity
for (let rows = 1; rows <= maxRows; rows++) {
for (let cols = 1; cols <= maxCols; cols++) {
if (cols * rows >= paneCount) {
const area = cols * rows
if (area < bestArea || (area === bestArea && rows < bestRows)) {
bestCols = cols
bestRows = rows
bestArea = area
}
}
}
}
const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
const slotWidth = Math.floor(availableWidth / bestCols)
const slotHeight = Math.floor(windowHeight / bestRows)
return { cols: bestCols, rows: bestRows, slotWidth, slotHeight }
}
export function mapPaneToSlot(
pane: TmuxPaneInfo,
plan: GridPlan,
mainPaneWidth: number
): GridSlot {
const rightAreaX = mainPaneWidth
const relativeX = Math.max(0, pane.left - rightAreaX)
const relativeY = pane.top
const col = plan.slotWidth > 0
? Math.min(plan.cols - 1, Math.floor(relativeX / plan.slotWidth))
: 0
const row = plan.slotHeight > 0
? Math.min(plan.rows - 1, Math.floor(relativeY / plan.slotHeight))
: 0
return { row, col }
}
function buildOccupancy(
agentPanes: TmuxPaneInfo[],
plan: GridPlan,
mainPaneWidth: number
): Map<string, TmuxPaneInfo> {
const occupancy = new Map<string, TmuxPaneInfo>()
for (const pane of agentPanes) {
const slot = mapPaneToSlot(pane, plan, mainPaneWidth)
const key = `${slot.row}:${slot.col}`
occupancy.set(key, pane)
}
return occupancy
}
function findFirstEmptySlot(
occupancy: Map<string, TmuxPaneInfo>,
plan: GridPlan
): GridSlot {
for (let row = 0; row < plan.rows; row++) {
for (let col = 0; col < plan.cols; col++) {
const key = `${row}:${col}`
if (!occupancy.has(key)) {
return { row, col }
}
}
}
return { row: plan.rows - 1, col: plan.cols - 1 }
}
function findSplittableTarget(
state: WindowState,
preferredDirection?: SplitDirection
): SpawnTarget | null {
if (!state.mainPane) return null
const existingCount = state.agentPanes.length
if (existingCount === 0) {
const virtualMainPane: TmuxPaneInfo = {
...state.mainPane,
width: state.windowWidth,
}
if (canSplitPane(virtualMainPane, "-h")) {
return { targetPaneId: state.mainPane.paneId, splitDirection: "-h" }
}
return null
}
const plan = computeGridPlan(state.windowWidth, state.windowHeight, existingCount + 1)
const mainPaneWidth = Math.floor(state.windowWidth * MAIN_PANE_RATIO)
const occupancy = buildOccupancy(state.agentPanes, plan, mainPaneWidth)
const targetSlot = findFirstEmptySlot(occupancy, plan)
const leftKey = `${targetSlot.row}:${targetSlot.col - 1}`
const leftPane = occupancy.get(leftKey)
if (leftPane && canSplitPane(leftPane, "-h")) {
return { targetPaneId: leftPane.paneId, splitDirection: "-h" }
}
const aboveKey = `${targetSlot.row - 1}:${targetSlot.col}`
const abovePane = occupancy.get(aboveKey)
if (abovePane && canSplitPane(abovePane, "-v")) {
return { targetPaneId: abovePane.paneId, splitDirection: "-v" }
}
const splittablePanes = state.agentPanes
.map(p => ({ pane: p, direction: getBestSplitDirection(p) }))
.filter(({ direction }) => direction !== null)
.sort((a, b) => (b.pane.width * b.pane.height) - (a.pane.width * a.pane.height))
if (splittablePanes.length > 0) {
const best = splittablePanes[0]
return { targetPaneId: best.pane.paneId, splitDirection: best.direction! }
}
return null
}
export function findSpawnTarget(state: WindowState): SpawnTarget | null {
return findSplittableTarget(state)
}
function findOldestSession(mappings: SessionMapping[]): SessionMapping | null {
if (mappings.length === 0) return null
return mappings.reduce((oldest, current) =>
current.createdAt < oldest.createdAt ? current : oldest
)
}
function findOldestAgentPane(
agentPanes: TmuxPaneInfo[],
sessionMappings: SessionMapping[]
): TmuxPaneInfo | null {
if (agentPanes.length === 0) return null
const paneIdToAge = new Map<string, Date>()
for (const mapping of sessionMappings) {
paneIdToAge.set(mapping.paneId, mapping.createdAt)
}
const panesWithAge = agentPanes
.map(p => ({ pane: p, age: paneIdToAge.get(p.paneId) }))
.filter(({ age }) => age !== undefined)
.sort((a, b) => a.age!.getTime() - b.age!.getTime())
if (panesWithAge.length > 0) {
return panesWithAge[0].pane
}
return agentPanes.reduce((oldest, p) => {
if (p.top < oldest.top || (p.top === oldest.top && p.left < oldest.left)) {
return p
}
return oldest
})
}
export function decideSpawnActions(
state: WindowState,
sessionId: string,
description: string,
_config: CapacityConfig,
sessionMappings: SessionMapping[]
): SpawnDecision {
if (!state.mainPane) {
return { canSpawn: false, actions: [], reason: "no main pane found" }
}
const agentAreaWidth = Math.floor(state.windowWidth * (1 - MAIN_PANE_RATIO))
const currentCount = state.agentPanes.length
if (agentAreaWidth < MIN_PANE_WIDTH) {
return {
canSpawn: false,
actions: [],
reason: `window too small for agent panes: ${state.windowWidth}x${state.windowHeight}`,
}
}
const oldestPane = findOldestAgentPane(state.agentPanes, sessionMappings)
const oldestMapping = oldestPane
? sessionMappings.find(m => m.paneId === oldestPane.paneId)
: null
if (currentCount === 0) {
const virtualMainPane: TmuxPaneInfo = { ...state.mainPane, width: state.windowWidth }
if (canSplitPane(virtualMainPane, "-h")) {
return {
canSpawn: true,
actions: [{
type: "spawn",
sessionId,
description,
targetPaneId: state.mainPane.paneId,
splitDirection: "-h"
}]
}
}
return { canSpawn: false, actions: [], reason: "mainPane too small to split" }
}
if (isSplittableAtCount(agentAreaWidth, currentCount)) {
const spawnTarget = findSplittableTarget(state)
if (spawnTarget) {
return {
canSpawn: true,
actions: [{
type: "spawn",
sessionId,
description,
targetPaneId: spawnTarget.targetPaneId,
splitDirection: spawnTarget.splitDirection
}]
}
}
}
const minEvictions = findMinimalEvictions(agentAreaWidth, currentCount)
if (minEvictions === 1 && oldestPane) {
return {
canSpawn: true,
actions: [
{
type: "close",
paneId: oldestPane.paneId,
sessionId: oldestMapping?.sessionId || ""
},
{
type: "spawn",
sessionId,
description,
targetPaneId: state.mainPane.paneId,
splitDirection: "-h"
}
],
reason: "closed 1 pane to make room for split"
}
}
if (oldestPane) {
return {
canSpawn: true,
actions: [{
type: "replace",
paneId: oldestPane.paneId,
oldSessionId: oldestMapping?.sessionId || "",
newSessionId: sessionId,
description
}],
reason: "replaced oldest pane (no split possible)"
}
}
return {
canSpawn: false,
actions: [],
reason: "no pane available to replace"
}
}
export function decideCloseAction(
state: WindowState,
sessionId: string,
sessionMappings: SessionMapping[]
): PaneAction | null {
const mapping = sessionMappings.find((m) => m.sessionId === sessionId)
if (!mapping) return null
const paneExists = state.agentPanes.some((p) => p.paneId === mapping.paneId)
if (!paneExists) return null
return { type: "close", paneId: mapping.paneId, sessionId }
}

View File

@@ -0,0 +1,5 @@
export * from "./manager"
export * from "./types"
export * from "./pane-state-querier"
export * from "./decision-engine"
export * from "./action-executor"

View File

@@ -0,0 +1,690 @@
import { describe, test, expect, mock, beforeEach } from 'bun:test'
import type { TmuxConfig } from '../../config/schema'
import type { WindowState, PaneAction } from './types'
import type { ActionResult, ExecuteContext } from './action-executor'
type ExecuteActionsResult = {
success: boolean
spawnedPaneId?: string
results: Array<{ action: PaneAction; result: ActionResult }>
}
const mockQueryWindowState = mock<(paneId: string) => Promise<WindowState | null>>(
async () => ({
windowWidth: 212,
windowHeight: 44,
mainPane: { paneId: '%0', width: 106, height: 44, left: 0, top: 0, title: 'main', isActive: true },
agentPanes: [],
})
)
const mockPaneExists = mock<(paneId: string) => Promise<boolean>>(async () => true)
const mockExecuteActions = mock<(
actions: PaneAction[],
ctx: ExecuteContext
) => Promise<ExecuteActionsResult>>(async () => ({
success: true,
spawnedPaneId: '%mock',
results: [],
}))
const mockExecuteAction = mock<(
action: PaneAction,
ctx: ExecuteContext
) => Promise<ActionResult>>(async () => ({ success: true }))
const mockIsInsideTmux = mock<() => boolean>(() => true)
const mockGetCurrentPaneId = mock<() => string | undefined>(() => '%0')
mock.module('./pane-state-querier', () => ({
queryWindowState: mockQueryWindowState,
paneExists: mockPaneExists,
getRightmostAgentPane: (state: WindowState) =>
state.agentPanes.length > 0
? state.agentPanes.reduce((r, p) => (p.left > r.left ? p : r))
: null,
getOldestAgentPane: (state: WindowState) =>
state.agentPanes.length > 0
? state.agentPanes.reduce((o, p) => (p.left < o.left ? p : o))
: null,
}))
mock.module('./action-executor', () => ({
executeActions: mockExecuteActions,
executeAction: mockExecuteAction,
}))
mock.module('../../shared/tmux', () => ({
isInsideTmux: mockIsInsideTmux,
getCurrentPaneId: mockGetCurrentPaneId,
POLL_INTERVAL_BACKGROUND_MS: 2000,
SESSION_TIMEOUT_MS: 600000,
SESSION_MISSING_GRACE_MS: 6000,
SESSION_READY_POLL_INTERVAL_MS: 100,
SESSION_READY_TIMEOUT_MS: 500,
}))
const trackedSessions = new Set<string>()
function createMockContext(overrides?: {
sessionStatusResult?: { data?: Record<string, { type: string }> }
}) {
return {
serverUrl: new URL('http://localhost:4096'),
client: {
session: {
status: mock(async () => {
if (overrides?.sessionStatusResult) {
return overrides.sessionStatusResult
}
const data: Record<string, { type: string }> = {}
for (const sessionId of trackedSessions) {
data[sessionId] = { type: 'running' }
}
return { data }
}),
},
},
} as any
}
function createSessionCreatedEvent(
id: string,
parentID: string | undefined,
title: string
) {
return {
type: 'session.created',
properties: {
info: { id, parentID, title },
},
}
}
function createWindowState(overrides?: Partial<WindowState>): WindowState {
return {
windowWidth: 220,
windowHeight: 44,
mainPane: { paneId: '%0', width: 110, height: 44, left: 0, top: 0, title: 'main', isActive: true },
agentPanes: [],
...overrides,
}
}
describe('TmuxSessionManager', () => {
beforeEach(() => {
mockQueryWindowState.mockClear()
mockPaneExists.mockClear()
mockExecuteActions.mockClear()
mockExecuteAction.mockClear()
mockIsInsideTmux.mockClear()
mockGetCurrentPaneId.mockClear()
trackedSessions.clear()
mockQueryWindowState.mockImplementation(async () => createWindowState())
mockExecuteActions.mockImplementation(async (actions) => {
for (const action of actions) {
if (action.type === 'spawn') {
trackedSessions.add(action.sessionId)
}
}
return {
success: true,
spawnedPaneId: '%mock',
results: [],
}
})
})
describe('constructor', () => {
test('enabled when config.enabled=true and isInsideTmux=true', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
//#when
const manager = new TmuxSessionManager(ctx, config)
//#then
expect(manager).toBeDefined()
})
test('disabled when config.enabled=true but isInsideTmux=false', async () => {
//#given
mockIsInsideTmux.mockReturnValue(false)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
//#when
const manager = new TmuxSessionManager(ctx, config)
//#then
expect(manager).toBeDefined()
})
test('disabled when config.enabled=false', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: false,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
//#when
const manager = new TmuxSessionManager(ctx, config)
//#then
expect(manager).toBeDefined()
})
})
describe('onSessionCreated', () => {
test('first agent spawns from source pane via decision engine', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
mockQueryWindowState.mockImplementation(async () => createWindowState())
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
const event = createSessionCreatedEvent(
'ses_child',
'ses_parent',
'Background: Test Task'
)
//#when
await manager.onSessionCreated(event)
//#then
expect(mockQueryWindowState).toHaveBeenCalledTimes(1)
expect(mockExecuteActions).toHaveBeenCalledTimes(1)
const call = mockExecuteActions.mock.calls[0]
expect(call).toBeDefined()
const actionsArg = call![0]
expect(actionsArg).toHaveLength(1)
expect(actionsArg[0].type).toBe('spawn')
if (actionsArg[0].type === 'spawn') {
expect(actionsArg[0].sessionId).toBe('ses_child')
expect(actionsArg[0].description).toBe('Background: Test Task')
expect(actionsArg[0].targetPaneId).toBe('%0')
expect(actionsArg[0].splitDirection).toBe('-h')
}
})
test('second agent spawns with correct split direction', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
let callCount = 0
mockQueryWindowState.mockImplementation(async () => {
callCount++
if (callCount === 1) {
return createWindowState()
}
return createWindowState({
agentPanes: [
{
paneId: '%1',
width: 40,
height: 44,
left: 100,
top: 0,
title: 'omo-subagent-Task 1',
isActive: false,
},
],
})
})
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
//#when - first agent
await manager.onSessionCreated(
createSessionCreatedEvent('ses_1', 'ses_parent', 'Task 1')
)
mockExecuteActions.mockClear()
//#when - second agent
await manager.onSessionCreated(
createSessionCreatedEvent('ses_2', 'ses_parent', 'Task 2')
)
//#then
expect(mockExecuteActions).toHaveBeenCalledTimes(1)
const call = mockExecuteActions.mock.calls[0]
expect(call).toBeDefined()
const actionsArg = call![0]
expect(actionsArg).toHaveLength(1)
expect(actionsArg[0].type).toBe('spawn')
})
test('does NOT spawn pane when session has no parentID', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
const event = createSessionCreatedEvent('ses_root', undefined, 'Root Session')
//#when
await manager.onSessionCreated(event)
//#then
expect(mockExecuteActions).toHaveBeenCalledTimes(0)
})
test('does NOT spawn pane when disabled', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: false,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
const event = createSessionCreatedEvent(
'ses_child',
'ses_parent',
'Background: Test Task'
)
//#when
await manager.onSessionCreated(event)
//#then
expect(mockExecuteActions).toHaveBeenCalledTimes(0)
})
test('does NOT spawn pane for non session.created event type', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
const event = {
type: 'session.deleted',
properties: {
info: { id: 'ses_child', parentID: 'ses_parent', title: 'Task' },
},
}
//#when
await manager.onSessionCreated(event)
//#then
expect(mockExecuteActions).toHaveBeenCalledTimes(0)
})
test('replaces oldest agent when unsplittable (small window)', async () => {
//#given - small window where split is not possible
mockIsInsideTmux.mockReturnValue(true)
mockQueryWindowState.mockImplementation(async () =>
createWindowState({
windowWidth: 160,
windowHeight: 11,
agentPanes: [
{
paneId: '%1',
width: 40,
height: 11,
left: 80,
top: 0,
title: 'omo-subagent-Task 1',
isActive: false,
},
],
})
)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 120,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
//#when
await manager.onSessionCreated(
createSessionCreatedEvent('ses_new', 'ses_parent', 'New Task')
)
//#then - with small window, replace action is used instead of close+spawn
expect(mockExecuteActions).toHaveBeenCalledTimes(1)
const call = mockExecuteActions.mock.calls[0]
expect(call).toBeDefined()
const actionsArg = call![0]
expect(actionsArg).toHaveLength(1)
expect(actionsArg[0].type).toBe('replace')
})
})
describe('onSessionDeleted', () => {
test('closes pane when tracked session is deleted', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
let stateCallCount = 0
mockQueryWindowState.mockImplementation(async () => {
stateCallCount++
if (stateCallCount === 1) {
return createWindowState()
}
return createWindowState({
agentPanes: [
{
paneId: '%mock',
width: 40,
height: 44,
left: 100,
top: 0,
title: 'omo-subagent-Task',
isActive: false,
},
],
})
})
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
await manager.onSessionCreated(
createSessionCreatedEvent(
'ses_child',
'ses_parent',
'Background: Test Task'
)
)
mockExecuteAction.mockClear()
//#when
await manager.onSessionDeleted({ sessionID: 'ses_child' })
//#then
expect(mockExecuteAction).toHaveBeenCalledTimes(1)
const call = mockExecuteAction.mock.calls[0]
expect(call).toBeDefined()
expect(call![0]).toEqual({
type: 'close',
paneId: '%mock',
sessionId: 'ses_child',
})
})
test('does nothing when untracked session is deleted', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
//#when
await manager.onSessionDeleted({ sessionID: 'ses_unknown' })
//#then
expect(mockExecuteAction).toHaveBeenCalledTimes(0)
})
})
describe('cleanup', () => {
test('closes all tracked panes', async () => {
//#given
mockIsInsideTmux.mockReturnValue(true)
let callCount = 0
mockExecuteActions.mockImplementation(async () => {
callCount++
return {
success: true,
spawnedPaneId: `%${callCount}`,
results: [],
}
})
const { TmuxSessionManager } = await import('./manager')
const ctx = createMockContext()
const config: TmuxConfig = {
enabled: true,
layout: 'main-vertical',
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
const manager = new TmuxSessionManager(ctx, config)
await manager.onSessionCreated(
createSessionCreatedEvent('ses_1', 'ses_parent', 'Task 1')
)
await manager.onSessionCreated(
createSessionCreatedEvent('ses_2', 'ses_parent', 'Task 2')
)
mockExecuteAction.mockClear()
//#when
await manager.cleanup()
//#then
expect(mockExecuteAction).toHaveBeenCalledTimes(2)
})
})
})
describe('DecisionEngine', () => {
describe('calculateCapacity', () => {
test('calculates correct 2D grid capacity', async () => {
//#given
const { calculateCapacity } = await import('./decision-engine')
//#when
const result = calculateCapacity(212, 44)
//#then - availableWidth=106, cols=(106+1)/(52+1)=2, rows=(44+1)/(11+1)=3 (accounting for dividers)
expect(result.cols).toBe(2)
expect(result.rows).toBe(3)
expect(result.total).toBe(6)
})
test('returns 0 cols when agent area too narrow', async () => {
//#given
const { calculateCapacity } = await import('./decision-engine')
//#when
const result = calculateCapacity(100, 44)
//#then - availableWidth=50, cols=50/53=0
expect(result.cols).toBe(0)
expect(result.total).toBe(0)
})
})
describe('decideSpawnActions', () => {
test('returns spawn action with splitDirection when under capacity', async () => {
//#given
const { decideSpawnActions } = await import('./decision-engine')
const state: WindowState = {
windowWidth: 212,
windowHeight: 44,
mainPane: {
paneId: '%0',
width: 106,
height: 44,
left: 0,
top: 0,
title: 'main',
isActive: true,
},
agentPanes: [],
}
//#when
const decision = decideSpawnActions(
state,
'ses_1',
'Test Task',
{ mainPaneMinWidth: 120, agentPaneWidth: 40 },
[]
)
//#then
expect(decision.canSpawn).toBe(true)
expect(decision.actions).toHaveLength(1)
expect(decision.actions[0].type).toBe('spawn')
if (decision.actions[0].type === 'spawn') {
expect(decision.actions[0].sessionId).toBe('ses_1')
expect(decision.actions[0].description).toBe('Test Task')
expect(decision.actions[0].targetPaneId).toBe('%0')
expect(decision.actions[0].splitDirection).toBe('-h')
}
})
test('returns replace when split not possible', async () => {
//#given - small window where split is never possible
const { decideSpawnActions } = await import('./decision-engine')
const state: WindowState = {
windowWidth: 160,
windowHeight: 11,
mainPane: {
paneId: '%0',
width: 80,
height: 11,
left: 0,
top: 0,
title: 'main',
isActive: true,
},
agentPanes: [
{
paneId: '%1',
width: 80,
height: 11,
left: 80,
top: 0,
title: 'omo-subagent-Old',
isActive: false,
},
],
}
const sessionMappings = [
{ sessionId: 'ses_old', paneId: '%1', createdAt: new Date('2024-01-01') },
]
//#when
const decision = decideSpawnActions(
state,
'ses_new',
'New Task',
{ mainPaneMinWidth: 120, agentPaneWidth: 40 },
sessionMappings
)
//#then - agent area (80) < MIN_SPLIT_WIDTH (105), so replace is used
expect(decision.canSpawn).toBe(true)
expect(decision.actions).toHaveLength(1)
expect(decision.actions[0].type).toBe('replace')
})
test('returns canSpawn=false when window too small', async () => {
//#given
const { decideSpawnActions } = await import('./decision-engine')
const state: WindowState = {
windowWidth: 60,
windowHeight: 5,
mainPane: {
paneId: '%0',
width: 30,
height: 5,
left: 0,
top: 0,
title: 'main',
isActive: true,
},
agentPanes: [],
}
//#when
const decision = decideSpawnActions(
state,
'ses_1',
'Test Task',
{ mainPaneMinWidth: 120, agentPaneWidth: 40 },
[]
)
//#then
expect(decision.canSpawn).toBe(false)
expect(decision.reason).toContain('too small')
})
})
})

View File

@@ -0,0 +1,396 @@
import type { PluginInput } from "@opencode-ai/plugin"
import type { TmuxConfig } from "../../config/schema"
import type { TrackedSession, CapacityConfig } from "./types"
import {
isInsideTmux,
getCurrentPaneId,
POLL_INTERVAL_BACKGROUND_MS,
SESSION_MISSING_GRACE_MS,
SESSION_READY_POLL_INTERVAL_MS,
SESSION_READY_TIMEOUT_MS,
} from "../../shared/tmux"
import { log } from "../../shared"
import { queryWindowState } from "./pane-state-querier"
import { decideSpawnActions, decideCloseAction, type SessionMapping } from "./decision-engine"
import { executeActions, executeAction } from "./action-executor"
type OpencodeClient = PluginInput["client"]
interface SessionCreatedEvent {
type: string
properties?: { info?: { id?: string; parentID?: string; title?: string } }
}
const SESSION_TIMEOUT_MS = 10 * 60 * 1000
/**
* State-first Tmux Session Manager
*
* Architecture:
* 1. QUERY: Get actual tmux pane state (source of truth)
* 2. DECIDE: Pure function determines actions based on state
* 3. EXECUTE: Execute actions with verification
* 4. UPDATE: Update internal cache only after tmux confirms success
*
* The internal `sessions` Map is just a cache for sessionId<->paneId mapping.
* The REAL source of truth is always queried from tmux.
*/
export class TmuxSessionManager {
private client: OpencodeClient
private tmuxConfig: TmuxConfig
private serverUrl: string
private sourcePaneId: string | undefined
private sessions = new Map<string, TrackedSession>()
private pendingSessions = new Set<string>()
private pollInterval?: ReturnType<typeof setInterval>
constructor(ctx: PluginInput, tmuxConfig: TmuxConfig) {
this.client = ctx.client
this.tmuxConfig = tmuxConfig
const defaultPort = process.env.OPENCODE_PORT ?? "4096"
this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
this.sourcePaneId = getCurrentPaneId()
log("[tmux-session-manager] initialized", {
configEnabled: this.tmuxConfig.enabled,
tmuxConfig: this.tmuxConfig,
serverUrl: this.serverUrl,
sourcePaneId: this.sourcePaneId,
})
}
private isEnabled(): boolean {
return this.tmuxConfig.enabled && isInsideTmux()
}
private getCapacityConfig(): CapacityConfig {
return {
mainPaneMinWidth: this.tmuxConfig.main_pane_min_width,
agentPaneWidth: this.tmuxConfig.agent_pane_min_width,
}
}
private getSessionMappings(): SessionMapping[] {
return Array.from(this.sessions.values()).map((s) => ({
sessionId: s.sessionId,
paneId: s.paneId,
createdAt: s.createdAt,
}))
}
private async waitForSessionReady(sessionId: string): Promise<boolean> {
const startTime = Date.now()
while (Date.now() - startTime < SESSION_READY_TIMEOUT_MS) {
try {
const statusResult = await this.client.session.status({ path: undefined })
const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
if (allStatuses[sessionId]) {
log("[tmux-session-manager] session ready", {
sessionId,
status: allStatuses[sessionId].type,
waitedMs: Date.now() - startTime,
})
return true
}
} catch (err) {
log("[tmux-session-manager] session status check error", { error: String(err) })
}
await new Promise((resolve) => setTimeout(resolve, SESSION_READY_POLL_INTERVAL_MS))
}
log("[tmux-session-manager] session ready timeout", {
sessionId,
timeoutMs: SESSION_READY_TIMEOUT_MS,
})
return false
}
async onSessionCreated(event: SessionCreatedEvent): Promise<void> {
const enabled = this.isEnabled()
log("[tmux-session-manager] onSessionCreated called", {
enabled,
tmuxConfigEnabled: this.tmuxConfig.enabled,
isInsideTmux: isInsideTmux(),
eventType: event.type,
infoId: event.properties?.info?.id,
infoParentID: event.properties?.info?.parentID,
})
if (!enabled) return
if (event.type !== "session.created") return
const info = event.properties?.info
if (!info?.id || !info?.parentID) return
const sessionId = info.id
const title = info.title ?? "Subagent"
if (this.sessions.has(sessionId) || this.pendingSessions.has(sessionId)) {
log("[tmux-session-manager] session already tracked or pending", { sessionId })
return
}
if (!this.sourcePaneId) {
log("[tmux-session-manager] no source pane id")
return
}
this.pendingSessions.add(sessionId)
try {
const state = await queryWindowState(this.sourcePaneId)
if (!state) {
log("[tmux-session-manager] failed to query window state")
return
}
log("[tmux-session-manager] window state queried", {
windowWidth: state.windowWidth,
mainPane: state.mainPane?.paneId,
agentPaneCount: state.agentPanes.length,
agentPanes: state.agentPanes.map((p) => p.paneId),
})
const decision = decideSpawnActions(
state,
sessionId,
title,
this.getCapacityConfig(),
this.getSessionMappings()
)
log("[tmux-session-manager] spawn decision", {
canSpawn: decision.canSpawn,
reason: decision.reason,
actionCount: decision.actions.length,
actions: decision.actions.map((a) => {
if (a.type === "close") return { type: "close", paneId: a.paneId }
if (a.type === "replace") return { type: "replace", paneId: a.paneId, newSessionId: a.newSessionId }
return { type: "spawn", sessionId: a.sessionId }
}),
})
if (!decision.canSpawn) {
log("[tmux-session-manager] cannot spawn", { reason: decision.reason })
return
}
const result = await executeActions(
decision.actions,
{ config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
)
for (const { action, result: actionResult } of result.results) {
if (action.type === "close" && actionResult.success) {
this.sessions.delete(action.sessionId)
log("[tmux-session-manager] removed closed session from cache", {
sessionId: action.sessionId,
})
}
if (action.type === "replace" && actionResult.success) {
this.sessions.delete(action.oldSessionId)
log("[tmux-session-manager] removed replaced session from cache", {
oldSessionId: action.oldSessionId,
newSessionId: action.newSessionId,
})
}
}
if (result.success && result.spawnedPaneId) {
const sessionReady = await this.waitForSessionReady(sessionId)
if (!sessionReady) {
log("[tmux-session-manager] session not ready after timeout, tracking anyway", {
sessionId,
paneId: result.spawnedPaneId,
})
}
const now = Date.now()
this.sessions.set(sessionId, {
sessionId,
paneId: result.spawnedPaneId,
description: title,
createdAt: new Date(now),
lastSeenAt: new Date(now),
})
log("[tmux-session-manager] pane spawned and tracked", {
sessionId,
paneId: result.spawnedPaneId,
sessionReady,
})
this.startPolling()
} else {
log("[tmux-session-manager] spawn failed", {
success: result.success,
results: result.results.map((r) => ({
type: r.action.type,
success: r.result.success,
error: r.result.error,
})),
})
}
} finally {
this.pendingSessions.delete(sessionId)
}
}
async onSessionDeleted(event: { sessionID: string }): Promise<void> {
if (!this.isEnabled()) return
if (!this.sourcePaneId) return
const tracked = this.sessions.get(event.sessionID)
if (!tracked) return
log("[tmux-session-manager] onSessionDeleted", { sessionId: event.sessionID })
const state = await queryWindowState(this.sourcePaneId)
if (!state) {
this.sessions.delete(event.sessionID)
return
}
const closeAction = decideCloseAction(state, event.sessionID, this.getSessionMappings())
if (closeAction) {
await executeAction(closeAction, { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state })
}
this.sessions.delete(event.sessionID)
if (this.sessions.size === 0) {
this.stopPolling()
}
}
private startPolling(): void {
if (this.pollInterval) return
this.pollInterval = setInterval(
() => this.pollSessions(),
POLL_INTERVAL_BACKGROUND_MS,
)
log("[tmux-session-manager] polling started")
}
private stopPolling(): void {
if (this.pollInterval) {
clearInterval(this.pollInterval)
this.pollInterval = undefined
log("[tmux-session-manager] polling stopped")
}
}
private async pollSessions(): Promise<void> {
if (this.sessions.size === 0) {
this.stopPolling()
return
}
try {
const statusResult = await this.client.session.status({ path: undefined })
const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
log("[tmux-session-manager] pollSessions", {
trackedSessions: Array.from(this.sessions.keys()),
allStatusKeys: Object.keys(allStatuses),
})
const now = Date.now()
const sessionsToClose: string[] = []
for (const [sessionId, tracked] of this.sessions.entries()) {
const status = allStatuses[sessionId]
const isIdle = status?.type === "idle"
if (status) {
tracked.lastSeenAt = new Date(now)
}
const missingSince = !status ? now - tracked.lastSeenAt.getTime() : 0
const missingTooLong = missingSince >= SESSION_MISSING_GRACE_MS
const isTimedOut = now - tracked.createdAt.getTime() > SESSION_TIMEOUT_MS
log("[tmux-session-manager] session check", {
sessionId,
statusType: status?.type,
isIdle,
missingSince,
missingTooLong,
isTimedOut,
shouldClose: isIdle || missingTooLong || isTimedOut,
})
if (isIdle || missingTooLong || isTimedOut) {
sessionsToClose.push(sessionId)
}
}
for (const sessionId of sessionsToClose) {
log("[tmux-session-manager] closing session due to poll", { sessionId })
await this.closeSessionById(sessionId)
}
} catch (err) {
log("[tmux-session-manager] poll error", { error: String(err) })
}
}
private async closeSessionById(sessionId: string): Promise<void> {
const tracked = this.sessions.get(sessionId)
if (!tracked) return
log("[tmux-session-manager] closing session pane", {
sessionId,
paneId: tracked.paneId,
})
const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
if (state) {
await executeAction(
{ type: "close", paneId: tracked.paneId, sessionId },
{ config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
)
}
this.sessions.delete(sessionId)
if (this.sessions.size === 0) {
this.stopPolling()
}
}
createEventHandler(): (input: { event: { type: string; properties?: unknown } }) => Promise<void> {
return async (input) => {
await this.onSessionCreated(input.event as SessionCreatedEvent)
}
}
async cleanup(): Promise<void> {
this.stopPolling()
if (this.sessions.size > 0) {
log("[tmux-session-manager] closing all panes", { count: this.sessions.size })
const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
if (state) {
const closePromises = Array.from(this.sessions.values()).map((s) =>
executeAction(
{ type: "close", paneId: s.paneId, sessionId: s.sessionId },
{ config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
).catch((err) =>
log("[tmux-session-manager] cleanup error for pane", {
paneId: s.paneId,
error: String(err),
}),
),
)
await Promise.all(closePromises)
}
this.sessions.clear()
}
log("[tmux-session-manager] cleanup complete")
}
}

View File

@@ -0,0 +1,73 @@
import { spawn } from "bun"
import type { WindowState, TmuxPaneInfo } from "./types"
import { getTmuxPath } from "../../tools/interactive-bash/utils"
import { log } from "../../shared"
export async function queryWindowState(sourcePaneId: string): Promise<WindowState | null> {
const tmux = await getTmuxPath()
if (!tmux) return null
const proc = spawn(
[
tmux,
"list-panes",
"-t",
sourcePaneId,
"-F",
"#{pane_id},#{pane_width},#{pane_height},#{pane_left},#{pane_top},#{pane_title},#{pane_active},#{window_width},#{window_height}",
],
{ stdout: "pipe", stderr: "pipe" }
)
const exitCode = await proc.exited
const stdout = await new Response(proc.stdout).text()
if (exitCode !== 0) {
log("[pane-state-querier] list-panes failed", { exitCode })
return null
}
const lines = stdout.trim().split("\n").filter(Boolean)
if (lines.length === 0) return null
let windowWidth = 0
let windowHeight = 0
const panes: TmuxPaneInfo[] = []
for (const line of lines) {
const [paneId, widthStr, heightStr, leftStr, topStr, title, activeStr, windowWidthStr, windowHeightStr] = line.split(",")
const width = parseInt(widthStr, 10)
const height = parseInt(heightStr, 10)
const left = parseInt(leftStr, 10)
const top = parseInt(topStr, 10)
const isActive = activeStr === "1"
windowWidth = parseInt(windowWidthStr, 10)
windowHeight = parseInt(windowHeightStr, 10)
if (!isNaN(width) && !isNaN(left) && !isNaN(height) && !isNaN(top)) {
panes.push({ paneId, width, height, left, top, title, isActive })
}
}
panes.sort((a, b) => a.left - b.left || a.top - b.top)
const mainPane = panes.find((p) => p.paneId === sourcePaneId)
if (!mainPane) {
log("[pane-state-querier] CRITICAL: sourcePaneId not found in panes", {
sourcePaneId,
availablePanes: panes.map((p) => p.paneId),
})
return null
}
const agentPanes = panes.filter((p) => p.paneId !== mainPane.paneId)
log("[pane-state-querier] window state", {
windowWidth,
windowHeight,
mainPane: mainPane.paneId,
agentPaneCount: agentPanes.length,
})
return { windowWidth, windowHeight, mainPane, agentPanes }
}

View File

@@ -0,0 +1,45 @@
export interface TrackedSession {
sessionId: string
paneId: string
description: string
createdAt: Date
lastSeenAt: Date
}
export const MIN_PANE_WIDTH = 52
export const MIN_PANE_HEIGHT = 11
export interface TmuxPaneInfo {
paneId: string
width: number
height: number
left: number
top: number
title: string
isActive: boolean
}
export interface WindowState {
windowWidth: number
windowHeight: number
mainPane: TmuxPaneInfo | null
agentPanes: TmuxPaneInfo[]
}
export type SplitDirection = "-h" | "-v"
export type PaneAction =
| { type: "close"; paneId: string; sessionId: string }
| { type: "spawn"; sessionId: string; description: string; targetPaneId: string; splitDirection: SplitDirection }
| { type: "replace"; paneId: string; oldSessionId: string; newSessionId: string; description: string }
export interface SpawnDecision {
canSpawn: boolean
actions: PaneAction[]
reason?: string
}
export interface CapacityConfig {
mainPaneMinWidth: number
agentPaneWidth: number
}

View File

@@ -1,16 +1,14 @@
# HOOKS KNOWLEDGE BASE
## OVERVIEW
31 lifecycle hooks intercepting/modifying agent behavior. Events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, onSummarize.
32 lifecycle hooks intercepting/modifying agent behavior. Events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, onSummarize.
## STRUCTURE
```
hooks/
├── atlas/ # Main orchestration (773 lines)
├── anthropic-context-window-limit-recovery/ # Auto-summarize
├── todo-continuation-enforcer.ts # Force TODO completion
├── atlas/ # Main orchestration (752 lines)
├── anthropic-context-window-limit-recovery/ # Auto-summarize
├── todo-continuation-enforcer.ts # Force TODO completion (16k lines)
├── ralph-loop/ # Self-referential dev loop
├── claude-code-hooks/ # settings.json compat layer - see AGENTS.md
├── comment-checker/ # Prevents AI slop
@@ -28,44 +26,61 @@ hooks/
├── prometheus-md-only/ # Planner read-only mode
├── agent-usage-reminder/ # Specialized agent hints
├── auto-update-checker/ # Plugin update check
── tool-output-truncator.ts # Prevents context bloat
── tool-output-truncator.ts # Prevents context bloat
├── compaction-context-injector/ # Injects context on compaction
├── delegate-task-retry/ # Retries failed delegations
├── interactive-bash-session/ # Tmux session management
├── non-interactive-env/ # Non-TTY environment handling
├── start-work/ # Sisyphus work session starter
├── task-resume-info/ # Resume info for cancelled tasks
├── question-label-truncator/ # Auto-truncates question labels
├── category-skill-reminder/ # Reminds of category skills
├── empty-task-response-detector.ts # Detects empty responses
├── sisyphus-junior-notepad/ # Sisyphus Junior notepad
└── index.ts # Hook aggregation + registration
```
## HOOK EVENTS
| Event | Timing | Can Block | Use Case |
|-------|--------|-----------|----------|
| PreToolUse | Before tool | Yes | Validate/modify inputs |
| PostToolUse | After tool | No | Append warnings, truncate |
| UserPromptSubmit | On prompt | Yes | Keyword detection |
| Stop | Session idle | No | Auto-continue |
| onSummarize | Compaction | No | Preserve state |
| UserPromptSubmit | `chat.message` | Yes | Keyword detection, slash commands |
| PreToolUse | `tool.execute.before` | Yes | Validate/modify inputs, inject context |
| PostToolUse | `tool.execute.after` | No | Truncate output, error recovery |
| Stop | `event` (session.stop) | No | Auto-continue, notifications |
| onSummarize | Compaction | No | Preserve state, inject summary context |
## EXECUTION ORDER
**chat.message**: keywordDetector → claudeCodeHooks → autoSlashCommand → startWorkralphLoop
**tool.execute.before**: claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → rulesInjector
**tool.execute.after**: editErrorRecovery → delegateTaskRetry → commentChecker → toolOutputTruncator → claudeCodeHooks
- **UserPromptSubmit**: keywordDetector → claudeCodeHooks → autoSlashCommand → startWork
- **PreToolUse**: questionLabelTruncator → claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → prometheusMdOnly → sisyphusJuniorNotepad → atlasHook
- **PostToolUse**: claudeCodeHooks → toolOutputTruncator → contextWindowMonitor → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → emptyTaskResponseDetector → agentUsageReminder → interactiveBashSession → editErrorRecovery → delegateTaskRetry → atlasHook → taskResumeInfo
## HOW TO ADD
1. Create `src/hooks/name/` with `index.ts` exporting `createMyHook(ctx)`
2. Add hook name to `HookNameSchema` in `src/config/schema.ts`
3. Register in `src/index.ts`:
```typescript
const myHook = isHookEnabled("my-hook") ? createMyHook(ctx) : null
```
3. Register in `src/index.ts` and add to relevant lifecycle methods
## PATTERNS
## HOOK PATTERNS
- **Session-scoped state**: `Map<sessionID, Set<string>>`
- **Conditional execution**: Check `input.tool` before processing
- **Output modification**: `output.output += "\n${REMINDER}"`
**Simple Single-Event**:
```typescript
export function createToolOutputTruncatorHook(ctx) {
return { "tool.execute.after": async (input, output) => { ... } }
}
```
**Multi-Event with State**:
```typescript
export function createThinkModeHook() {
const state = new Map<string, ThinkModeState>()
return {
"chat.params": async (output, sessionID) => { ... },
"event": async ({ event }) => { /* cleanup */ }
}
}
```
## ANTI-PATTERNS
- **Blocking non-critical**: Use PostToolUse warnings instead
- **Heavy computation**: Keep PreToolUse light
- **Redundant injection**: Track injected files
- **Heavy computation**: Keep PreToolUse light to avoid latency
- **Redundant injection**: Track injected files to avoid context bloat
- **Direct state mutation**: Use `output.output +=` instead of replacing

View File

@@ -123,7 +123,7 @@ describe("atlas hook", () => {
test("should append standalone verification when no boulder state but caller is Atlas", async () => {
// #given - no boulder state, but caller is Atlas
const sessionID = "session-no-boulder-test"
setupMessageStorage(sessionID, "Atlas")
setupMessageStorage(sessionID, "atlas")
const hook = createAtlasHook(createMockPluginInput())
const output = {
@@ -141,7 +141,7 @@ describe("atlas hook", () => {
// #then - standalone verification reminder appended
expect(output.output).toContain("Task completed successfully")
expect(output.output).toContain("MANDATORY:")
expect(output.output).toContain("delegate_task(resume=")
expect(output.output).toContain("delegate_task(session_id=")
cleanupMessageStorage(sessionID)
})
@@ -149,7 +149,7 @@ describe("atlas hook", () => {
test("should transform output when caller is Atlas with boulder state", async () => {
// #given - Atlas caller with boulder state
const sessionID = "session-transform-test"
setupMessageStorage(sessionID, "Atlas")
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
@@ -180,7 +180,7 @@ describe("atlas hook", () => {
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
expect(output.output).toContain("test-plan")
expect(output.output).toContain("LIE")
expect(output.output).toContain("delegate_task(resume=")
expect(output.output).toContain("delegate_task(session_id=")
cleanupMessageStorage(sessionID)
})
@@ -188,7 +188,7 @@ describe("atlas hook", () => {
test("should still transform when plan is complete (shows progress)", async () => {
// #given - boulder state with complete plan, Atlas caller
const sessionID = "session-complete-plan-test"
setupMessageStorage(sessionID, "Atlas")
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "complete-plan.md")
writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")
@@ -225,7 +225,7 @@ describe("atlas hook", () => {
test("should append session ID to boulder state if not present", async () => {
// #given - boulder state without session-append-test, Atlas caller
const sessionID = "session-append-test"
setupMessageStorage(sessionID, "Atlas")
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -261,7 +261,7 @@ describe("atlas hook", () => {
test("should not duplicate existing session ID", async () => {
// #given - boulder state already has session-dup-test, Atlas caller
const sessionID = "session-dup-test"
setupMessageStorage(sessionID, "Atlas")
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -298,7 +298,7 @@ describe("atlas hook", () => {
test("should include boulder.json path and notepad path in transformed output", async () => {
// #given - boulder state, Atlas caller
const sessionID = "session-path-test"
setupMessageStorage(sessionID, "Atlas")
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "my-feature.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3")
@@ -332,10 +332,10 @@ describe("atlas hook", () => {
cleanupMessageStorage(sessionID)
})
test("should include resume and checkbox instructions in reminder", async () => {
test("should include session_id and checkbox instructions in reminder", async () => {
// #given - boulder state, Atlas caller
const sessionID = "session-resume-test"
setupMessageStorage(sessionID, "Atlas")
setupMessageStorage(sessionID, "atlas")
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -361,8 +361,8 @@ describe("atlas hook", () => {
output
)
// #then - should include resume instructions and verification
expect(output.output).toContain("delegate_task(resume=")
// #then - should include session_id instructions and verification
expect(output.output).toContain("delegate_task(session_id=")
expect(output.output).toContain("[x]")
expect(output.output).toContain("MANDATORY:")
@@ -373,7 +373,7 @@ describe("atlas hook", () => {
const ORCHESTRATOR_SESSION = "orchestrator-write-test"
beforeEach(() => {
setupMessageStorage(ORCHESTRATOR_SESSION, "Atlas")
setupMessageStorage(ORCHESTRATOR_SESSION, "atlas")
})
afterEach(() => {
@@ -444,7 +444,7 @@ describe("atlas hook", () => {
test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => {
// #given
const nonOrchestratorSession = "non-orchestrator-session"
setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior")
setupMessageStorage(nonOrchestratorSession, "sisyphus-junior")
const hook = createAtlasHook(createMockPluginInput())
const originalOutput = "File written successfully"
@@ -601,7 +601,7 @@ describe("atlas hook", () => {
getMainSessionID: () => MAIN_SESSION_ID,
subagentSessions: new Set<string>(),
}))
setupMessageStorage(MAIN_SESSION_ID, "Atlas")
setupMessageStorage(MAIN_SESSION_ID, "atlas")
})
afterEach(() => {
@@ -845,7 +845,7 @@ describe("atlas hook", () => {
// #given - last agent is NOT Atlas
cleanupMessageStorage(MAIN_SESSION_ID)
setupMessageStorage(MAIN_SESSION_ID, "Sisyphus")
setupMessageStorage(MAIN_SESSION_ID, "sisyphus")
const mockInput = createMockPluginInput()
const hook = createAtlasHook(mockInput)

View File

@@ -11,6 +11,7 @@ import { getMainSessionID, subagentSessions } from "../../features/claude-code-s
import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../../features/hook-message-injector"
import { log } from "../../shared/logger"
import { createSystemDirective, SYSTEM_DIRECTIVE_PREFIX, SystemDirectiveTypes } from "../../shared/system-directive"
import { isCallerOrchestrator, getMessageDir } from "../../shared/session-utils"
import type { BackgroundManager } from "../../features/background-agent"
export const HOOK_NAME = "atlas"
@@ -179,13 +180,13 @@ If you were NOT given **exactly ONE atomic task**, you MUST:
`
function buildVerificationReminder(sessionId: string): string {
return `${VERIFICATION_REMINDER}
return `${VERIFICATION_REMINDER}
---
**If ANY verification fails, use this immediately:**
\`\`\`
delegate_task(resume="${sessionId}", prompt="fix: [describe the specific failure]")
delegate_task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
\`\`\``
}
@@ -380,28 +381,6 @@ interface ToolExecuteAfterOutput {
metadata: Record<string, unknown>
}
function getMessageDir(sessionID: string): string | null {
if (!existsSync(MESSAGE_STORAGE)) return null
const directPath = join(MESSAGE_STORAGE, sessionID)
if (existsSync(directPath)) return directPath
for (const dir of readdirSync(MESSAGE_STORAGE)) {
const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
if (existsSync(sessionPath)) return sessionPath
}
return null
}
function isCallerOrchestrator(sessionID?: string): boolean {
if (!sessionID) return false
const messageDir = getMessageDir(sessionID)
if (!messageDir) return false
const nearest = findNearestMessageWithFields(messageDir)
return nearest?.agent?.toLowerCase() === "atlas"
}
interface SessionState {
lastEventWasAbortError?: boolean
lastContinuationInjectedAt?: number
@@ -498,7 +477,7 @@ export function createAtlasHook(
await ctx.client.session.prompt({
path: { id: sessionID },
body: {
agent: "Atlas",
agent: "atlas",
...(model !== undefined ? { model } : {}),
parts: [{ type: "text", text: prompt }],
},
@@ -672,7 +651,7 @@ export function createAtlasHook(
if (input.tool === "delegate_task") {
const prompt = output.args.prompt as string | undefined
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
output.args.prompt = prompt + `\n<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>`
output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
log(`[${HOOK_NAME}] Injected single-task directive to delegate_task`, {
sessionID: input.sessionID,
})
@@ -711,8 +690,8 @@ export function createAtlasHook(
return
}
const outputStr = output.output && typeof output.output === "string" ? output.output : ""
const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task resumed")
const outputStr = output.output && typeof output.output === "string" ? output.output : ""
const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued")
if (isBackgroundLaunch) {
return

View File

@@ -6,6 +6,7 @@ import { log } from "../../shared/logger"
import { getConfigLoadErrors, clearConfigLoadErrors } from "../../shared/config-errors"
import { runBunInstall } from "../../cli/config-manager"
import { isModelCacheAvailable } from "../../shared/model-availability"
import { hasConnectedProvidersCache, updateConnectedProvidersCache } from "../../shared/connected-providers-cache"
import type { AutoUpdateCheckerOptions } from "./types"
const SISYPHUS_SPINNER = ["·", "•", "●", "○", "◌", "◦", " "]
@@ -77,6 +78,7 @@ export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdat
await showConfigErrorsIfAny(ctx)
await showModelCacheWarningIfNeeded(ctx)
await updateAndShowConnectedProvidersCacheStatus(ctx)
if (localDevVersion) {
if (showStartupToast) {
@@ -186,6 +188,29 @@ async function showModelCacheWarningIfNeeded(ctx: PluginInput): Promise<void> {
log("[auto-update-checker] Model cache warning shown")
}
async function updateAndShowConnectedProvidersCacheStatus(ctx: PluginInput): Promise<void> {
const hadCache = hasConnectedProvidersCache()
updateConnectedProvidersCache(ctx.client).catch(() => {})
if (!hadCache) {
await ctx.client.tui
.showToast({
body: {
title: "Connected Providers Cache",
message: "Building provider cache for first time. Restart OpenCode for full model filtering.",
variant: "info" as const,
duration: 8000,
},
})
.catch(() => {})
log("[auto-update-checker] Connected providers cache toast shown (first run)")
} else {
log("[auto-update-checker] Connected providers cache exists, updating in background")
}
}
async function showConfigErrorsIfAny(ctx: PluginInput): Promise<void> {
const errors = getConfigLoadErrors()
if (errors.length === 0) return

View File

@@ -1,87 +0,0 @@
import type { BackgroundManager } from "../../features/background-agent"
interface CompactingInput {
sessionID: string
}
interface CompactingOutput {
context: string[]
prompt?: string
}
/**
* Background agent compaction hook - preserves task state during context compaction.
*
* When OpenCode compacts session context to save tokens, this hook injects
* information about running and recently completed background tasks so the
* agent doesn't lose awareness of delegated work.
*/
export function createBackgroundCompactionHook(manager: BackgroundManager) {
return {
"experimental.session.compacting": async (
input: CompactingInput,
output: CompactingOutput
): Promise<void> => {
const { sessionID } = input
// Get running tasks for this session
const running = manager.getRunningTasks()
.filter(t => t.parentSessionID === sessionID)
.map(t => ({
id: t.id,
agent: t.agent,
description: t.description,
startedAt: t.startedAt,
}))
// Get recently completed tasks (still in memory within 5-min retention)
const completed = manager.getCompletedTasks()
.filter(t => t.parentSessionID === sessionID)
.slice(-10) // Last 10 completed
.map(t => ({
id: t.id,
agent: t.agent,
description: t.description,
status: t.status,
}))
// Early exit if nothing to preserve
if (running.length === 0 && completed.length === 0) return
const sections: string[] = ["<background-tasks>"]
// Running tasks section
if (running.length > 0) {
sections.push("## Running Background Tasks")
sections.push("")
for (const t of running) {
const elapsed = t.startedAt
? Math.floor((Date.now() - t.startedAt.getTime()) / 1000)
: 0
sections.push(`- **\`${t.id}\`** (${t.agent}): ${t.description} [${elapsed}s elapsed]`)
}
sections.push("")
sections.push("> **Note:** You WILL be notified when tasks complete.")
sections.push("> Do NOT poll - continue productive work.")
sections.push("")
}
// Completed tasks section
if (completed.length > 0) {
sections.push("## Recently Completed Tasks")
sections.push("")
for (const t of completed) {
const statusLabel = t.status === "completed" ? "[DONE]" : t.status === "error" ? "[ERROR]" : "[PENDING]"
sections.push(`- ${statusLabel} **\`${t.id}\`**: ${t.description}`)
}
sections.push("")
}
sections.push("## Retrieval")
sections.push('Use `background_output(task_id="<id>")` to retrieve task results.')
sections.push("</background-tasks>")
output.context.push(sections.join("\n"))
}
}
}

View File

@@ -0,0 +1,346 @@
import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
import { createCategorySkillReminderHook } from "./index"
import { updateSessionAgent, clearSessionAgent, _resetForTesting } from "../../features/claude-code-session-state"
import * as sharedModule from "../../shared"
describe("category-skill-reminder hook", () => {
let logCalls: Array<{ msg: string; data?: unknown }>
let logSpy: ReturnType<typeof spyOn>
beforeEach(() => {
_resetForTesting()
logCalls = []
logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => {
logCalls.push({ msg, data })
})
})
afterEach(() => {
logSpy?.mockRestore()
})
function createMockPluginInput() {
return {
client: {
tui: {
showToast: async () => {},
},
},
} as any
}
describe("target agent detection", () => {
test("should inject reminder for sisyphus agent after 3 tool calls", async () => {
// #given - sisyphus agent session with multiple tool calls
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "sisyphus-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "file content", metadata: {} }
// #when - 3 edit tool calls are made
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
// #then - reminder should be injected
expect(output.output).toContain("[Category+Skill Reminder]")
expect(output.output).toContain("delegate_task")
clearSessionAgent(sessionID)
})
test("should inject reminder for atlas agent", async () => {
// #given - atlas agent session
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "atlas-session"
updateSessionAgent(sessionID, "Atlas")
const output = { title: "", output: "result", metadata: {} }
// #when - 3 tool calls are made
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "3" }, output)
// #then - reminder should be injected
expect(output.output).toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should inject reminder for sisyphus-junior agent", async () => {
// #given - sisyphus-junior agent session
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "junior-session"
updateSessionAgent(sessionID, "sisyphus-junior")
const output = { title: "", output: "result", metadata: {} }
// #when - 3 tool calls are made
await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "3" }, output)
// #then - reminder should be injected
expect(output.output).toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should NOT inject reminder for non-target agents", async () => {
// #given - librarian agent session (not a target)
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "librarian-session"
updateSessionAgent(sessionID, "librarian")
const output = { title: "", output: "result", metadata: {} }
// #when - 3 tool calls are made
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
// #then - reminder should NOT be injected
expect(output.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should detect agent from input.agent when session state is empty", async () => {
// #given - no session state, agent provided in input
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "input-agent-session"
const output = { title: "", output: "result", metadata: {} }
// #when - 3 tool calls with agent in input
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1", agent: "Sisyphus" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2", agent: "Sisyphus" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3", agent: "Sisyphus" }, output)
// #then - reminder should be injected
expect(output.output).toContain("[Category+Skill Reminder]")
})
})
describe("delegation tool tracking", () => {
test("should NOT inject reminder if delegate_task is used", async () => {
// #given - sisyphus agent that uses delegate_task
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "delegation-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// #when - delegate_task is used, then more tool calls
await hook["tool.execute.after"]({ tool: "delegate_task", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
// #then - reminder should NOT be injected (delegation was used)
expect(output.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should NOT inject reminder if call_omo_agent is used", async () => {
// #given - sisyphus agent that uses call_omo_agent
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "omo-agent-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// #when - call_omo_agent is used first
await hook["tool.execute.after"]({ tool: "call_omo_agent", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
// #then - reminder should NOT be injected
expect(output.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should NOT inject reminder if task tool is used", async () => {
// #given - sisyphus agent that uses task tool
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "task-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// #when - task tool is used
await hook["tool.execute.after"]({ tool: "task", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
// #then - reminder should NOT be injected
expect(output.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
})
describe("tool call counting", () => {
test("should NOT inject reminder before 3 tool calls", async () => {
// #given - sisyphus agent with only 2 tool calls
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "few-calls-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// #when - only 2 tool calls are made
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
// #then - reminder should NOT be injected yet
expect(output.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should only inject reminder once per session", async () => {
// #given - sisyphus agent session
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "once-session"
updateSessionAgent(sessionID, "Sisyphus")
const output1 = { title: "", output: "result1", metadata: {} }
const output2 = { title: "", output: "result2", metadata: {} }
// #when - 6 tool calls are made (should trigger at 3, not again at 6)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
// #then - reminder should be in output1 but not output2
expect(output1.output).toContain("[Category+Skill Reminder]")
expect(output2.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should only count delegatable work tools", async () => {
// #given - sisyphus agent with mixed tool calls
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "mixed-tools-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// #when - non-delegatable tools are called (should not count)
await hook["tool.execute.after"]({ tool: "lsp_goto_definition", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "lsp_find_references", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "lsp_symbols", sessionID, callID: "3" }, output)
// #then - reminder should NOT be injected (LSP tools don't count)
expect(output.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
})
describe("event handling", () => {
test("should reset state on session.deleted event", async () => {
// #given - sisyphus agent with reminder already shown
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "delete-session"
updateSessionAgent(sessionID, "Sisyphus")
const output1 = { title: "", output: "result1", metadata: {} }
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
expect(output1.output).toContain("[Category+Skill Reminder]")
// #when - session is deleted and new session starts
await hook.event({ event: { type: "session.deleted", properties: { info: { id: sessionID } } } })
const output2 = { title: "", output: "result2", metadata: {} }
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
// #then - reminder should be shown again (state was reset)
expect(output2.output).toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should reset state on session.compacted event", async () => {
// #given - sisyphus agent with reminder already shown
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "compact-session"
updateSessionAgent(sessionID, "Sisyphus")
const output1 = { title: "", output: "result1", metadata: {} }
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
expect(output1.output).toContain("[Category+Skill Reminder]")
// #when - session is compacted
await hook.event({ event: { type: "session.compacted", properties: { sessionID } } })
const output2 = { title: "", output: "result2", metadata: {} }
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
// #then - reminder should be shown again (state was reset)
expect(output2.output).toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
})
describe("case insensitivity", () => {
test("should handle tool names case-insensitively", async () => {
// #given - sisyphus agent with mixed case tool names
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "case-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// #when - tool calls with different cases
await hook["tool.execute.after"]({ tool: "EDIT", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "Edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
// #then - reminder should be injected (all counted)
expect(output.output).toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
test("should handle delegation tool names case-insensitively", async () => {
// #given - sisyphus agent using DELEGATE_TASK in uppercase
const hook = createCategorySkillReminderHook(createMockPluginInput())
const sessionID = "case-delegate-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// #when - DELEGATE_TASK in uppercase is used
await hook["tool.execute.after"]({ tool: "DELEGATE_TASK", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
// #then - reminder should NOT be injected (delegation was detected)
expect(output.output).not.toContain("[Category+Skill Reminder]")
clearSessionAgent(sessionID)
})
})
})

View File

@@ -0,0 +1,165 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { getSessionAgent } from "../../features/claude-code-session-state"
import { log } from "../../shared"
/**
* Target agents that should receive category+skill reminders.
* These are orchestrator agents that delegate work to specialized agents.
*/
const TARGET_AGENTS = new Set([
"sisyphus",
"sisyphus-junior",
"atlas",
])
/**
* Tools that indicate the agent is doing work that could potentially be delegated.
* When these tools are used, we remind the agent about the category+skill system.
*/
const DELEGATABLE_WORK_TOOLS = new Set([
"edit",
"write",
"bash",
"read",
"grep",
"glob",
])
/**
* Tools that indicate the agent is already using delegation properly.
*/
const DELEGATION_TOOLS = new Set([
"delegate_task",
"call_omo_agent",
"task",
])
const REMINDER_MESSAGE = `
[Category+Skill Reminder]
You are an orchestrator agent. Consider whether this work should be delegated:
**DELEGATE when:**
- UI/Frontend work → category: "visual-engineering", skills: ["frontend-ui-ux"]
- Complex logic/architecture → category: "ultrabrain"
- Quick/trivial tasks → category: "quick"
- Git operations → skills: ["git-master"]
- Browser automation → skills: ["playwright"] or ["agent-browser"]
**DO IT YOURSELF when:**
- Gathering context/exploring codebase
- Simple edits that are part of a larger task you're coordinating
- Tasks requiring your full context understanding
Example delegation:
\`\`\`
delegate_task(
category="visual-engineering",
load_skills=["frontend-ui-ux"],
description="Implement responsive navbar with animations",
run_in_background=true
)
\`\`\`
`
interface ToolExecuteInput {
tool: string
sessionID: string
callID: string
agent?: string
}
interface ToolExecuteOutput {
title: string
output: string
metadata: unknown
}
interface SessionState {
delegationUsed: boolean
reminderShown: boolean
toolCallCount: number
}
export function createCategorySkillReminderHook(_ctx: PluginInput) {
const sessionStates = new Map<string, SessionState>()
function getOrCreateState(sessionID: string): SessionState {
if (!sessionStates.has(sessionID)) {
sessionStates.set(sessionID, {
delegationUsed: false,
reminderShown: false,
toolCallCount: 0,
})
}
return sessionStates.get(sessionID)!
}
function isTargetAgent(sessionID: string, inputAgent?: string): boolean {
const agent = getSessionAgent(sessionID) ?? inputAgent
if (!agent) return false
const agentLower = agent.toLowerCase()
return TARGET_AGENTS.has(agentLower) ||
agentLower.includes("sisyphus") ||
agentLower.includes("atlas")
}
const toolExecuteAfter = async (
input: ToolExecuteInput,
output: ToolExecuteOutput,
) => {
const { tool, sessionID } = input
const toolLower = tool.toLowerCase()
if (!isTargetAgent(sessionID, input.agent)) {
return
}
const state = getOrCreateState(sessionID)
if (DELEGATION_TOOLS.has(toolLower)) {
state.delegationUsed = true
log("[category-skill-reminder] Delegation tool used", { sessionID, tool })
return
}
if (!DELEGATABLE_WORK_TOOLS.has(toolLower)) {
return
}
state.toolCallCount++
if (state.toolCallCount >= 3 && !state.delegationUsed && !state.reminderShown) {
output.output += REMINDER_MESSAGE
state.reminderShown = true
log("[category-skill-reminder] Reminder injected", {
sessionID,
toolCallCount: state.toolCallCount
})
}
}
const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
const props = event.properties as Record<string, unknown> | undefined
if (event.type === "session.deleted") {
const sessionInfo = props?.info as { id?: string } | undefined
if (sessionInfo?.id) {
sessionStates.delete(sessionInfo.id)
}
}
if (event.type === "session.compacted") {
const sessionID = (props?.sessionID ??
(props?.info as { id?: string } | undefined)?.id) as string | undefined
if (sessionID) {
sessionStates.delete(sessionID)
}
}
}
return {
"tool.execute.after": toolExecuteAfter,
event: eventHandler,
}
}

View File

@@ -1,51 +1,48 @@
# CLAUDE CODE HOOKS COMPATIBILITY
## OVERVIEW
Full Claude Code settings.json hook compatibility. 5 lifecycle events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, PreCompact.
Full Claude Code `settings.json` hook compatibility layer. Intercepts OpenCode events to execute external scripts/commands defined in Claude Code configuration.
## STRUCTURE
```
claude-code-hooks/
├── index.ts # Main factory (401 lines)
├── config.ts # Loads ~/.claude/settings.json
├── config-loader.ts # Extended config
├── config-loader.ts # Extended config (disabledHooks)
├── pre-tool-use.ts # PreToolUse executor
├── post-tool-use.ts # PostToolUse executor
├── user-prompt-submit.ts # UserPromptSubmit executor
├── stop.ts # Stop hook executor
├── stop.ts # Stop hook executor (with active state tracking)
├── pre-compact.ts # PreCompact executor
├── transcript.ts # Tool use recording
├── tool-input-cache.ts # Pre→post caching
── types.ts # Hook types
└── todo.ts # Todo JSON fix
├── tool-input-cache.ts # Pre→post input caching
── types.ts # Hook & IO type definitions
```
## HOOK LIFECYCLE
| Event | When | Can Block | Context |
|-------|------|-----------|---------|
| PreToolUse | Before tool | Yes | sessionId, toolName, toolInput |
| PostToolUse | After tool | Warn | + toolOutput, transcriptPath |
| UserPromptSubmit | On message | Yes | sessionId, prompt, parts |
| Stop | Session idle | inject | sessionId, parentSessionId |
| PreCompact | Before summarize | No | sessionId |
| Event | Timing | Can Block | Context Provided |
|-------|--------|-----------|------------------|
| PreToolUse | Before tool exec | Yes | sessionId, toolName, toolInput, cwd |
| PostToolUse | After tool exec | Warn | + toolOutput, transcriptPath |
| UserPromptSubmit | On message send | Yes | sessionId, prompt, parts, cwd |
| Stop | Session idle/end | Inject | sessionId, parentSessionId, cwd |
| PreCompact | Before summarize | No | sessionId, cwd |
## CONFIG SOURCES
Priority (highest first):
1. `.claude/settings.json` (project)
2. `~/.claude/settings.json` (user)
1. `.claude/settings.json` (Project-local)
2. `~/.claude/settings.json` (Global user)
## HOOK EXECUTION
1. Hooks loaded from settings.json
2. Matchers filter by tool name
3. Commands via subprocess with `$SESSION_ID`, `$TOOL_NAME`
4. Exit codes: 0=pass, 1=warn, 2=block
- **Matchers**: Hooks filter by tool name or event type via regex/glob.
- **Commands**: Executed via subprocess with env vars (`$SESSION_ID`, `$TOOL_NAME`).
- **Exit Codes**:
- `0`: Pass (Success)
- `1`: Warn (Continue with system message)
- `2`: Block (Abort operation/prompt)
## ANTI-PATTERNS
- **Heavy PreToolUse**: Runs before EVERY tool call
- **Blocking non-critical**: Use PostToolUse warnings
- **Heavy PreToolUse**: Runs before EVERY tool; keep logic light to avoid latency.
- **Blocking non-critical**: Prefer PostToolUse warnings for non-fatal issues.
- **Direct state mutation**: Use `updatedInput` in PreToolUse instead of side effects.
- **Ignoring Exit Codes**: Ensure scripts return `2` to properly block sensitive tools.

View File

@@ -33,7 +33,13 @@ When summarizing this session, you MUST include the following sections in your s
- Pending items from the original request
- Follow-up tasks identified during the work
## 5. MUST NOT Do (Critical Constraints)
## 5. Active Working Context (For Seamless Continuation)
- **Files**: Paths of files currently being edited or frequently referenced
- **Code in Progress**: Key code snippets, function signatures, or data structures under active development
- **External References**: Documentation URLs, library APIs, or external resources being consulted
- **State & Variables**: Important variable names, configuration values, or runtime state relevant to ongoing work
## 6. MUST NOT Do (Critical Constraints)
- Things that were explicitly forbidden
- Approaches that failed and should not be retried
- User's explicit restrictions or preferences

View File

@@ -22,11 +22,14 @@ export { createNonInteractiveEnvHook } from "./non-interactive-env";
export { createInteractiveBashSessionHook } from "./interactive-bash-session";
export { createThinkingBlockValidatorHook } from "./thinking-block-validator";
export { createCategorySkillReminderHook } from "./category-skill-reminder";
export { createRalphLoopHook, type RalphLoopHook } from "./ralph-loop";
export { createAutoSlashCommandHook } from "./auto-slash-command";
export { createEditErrorRecoveryHook } from "./edit-error-recovery";
export { createPrometheusMdOnlyHook } from "./prometheus-md-only";
export { createSisyphusJuniorNotepadHook } from "./sisyphus-junior-notepad";
export { createTaskResumeInfoHook } from "./task-resume-info";
export { createStartWorkHook } from "./start-work";
export { createAtlasHook } from "./atlas";
export { createDelegateTaskRetryHook } from "./delegate-task-retry";
export { createQuestionLabelTruncatorHook } from "./question-label-truncator";

View File

@@ -169,10 +169,10 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
## AGENTS / **CATEGORY + SKILLS** UTILIZATION PRINCIPLES (by capability, not by name)
- **Codebase Exploration**: Spawn exploration agents using BACKGROUND TASKS for file patterns, internal implementations, project structure
- **Documentation & References**: Use librarian-type agents via BACKGROUND TASKS for API references, examples, external library docs
- **Planning & Strategy**: NEVER plan yourself - ALWAYS spawn a dedicated planning agent for work breakdown
- MUST USE PLAN AGENT. MUST USE PLAN AGENT. MUST USE PLAN AGENT.
- ALWAYS ASK PLAN AGENT TO WHAT CATEGORY + SKILLS / AGENTS TO LEVERAGE.
- IF IMPLEMENT TASK, MUST ADD TODO NOW: "CONSULT WITH PLAN AGENT WITH CATEGORY + SKILLS"
- **Planning & Strategy**: NEVER plan yourself - ALWAYS spawn the Plan agent for work breakdown
- MUST invoke: \`delegate_task(subagent_type="plan", prompt="<gathered context + user request>")\`
- In your prompt to the Plan agent, ASK it to recommend which CATEGORY + SKILLS / AGENTS to leverage for implementation.
- IF IMPLEMENT TASK, MUST ADD TODO NOW: "Consult Plan agent via delegate_task(subagent_type='plan') for work breakdown with category + skills recommendations"
- **High-IQ Reasoning**: Leverage specialized agents for architecture decisions, code review, strategic planning
- **SPECIAL TASKS COVERED WITH CATEGORY + LOAD_SKILLS**: Delegate to specialized agents with category+skills for design and implementation, as following guide:
- CATEGORY + SKILL GUIDE
@@ -192,7 +192,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
## WORKFLOW
1. Analyze the request and identify required capabilities
2. Spawn exploration/librarian agents via delegate_task(background=true) in PARALLEL (10+ if needed)
3. Always Use Plan agent with gathered context to create detailed work breakdown
3. Spawn Plan agent: \`delegate_task(subagent_type="plan", prompt="<context + request>")\` to create detailed work breakdown
4. Execute with continuous verification against original requirements
## VERIFICATION GUARANTEE (NON-NEGOTIABLE)
@@ -266,9 +266,9 @@ Write these criteria explicitly. Share with user if scope is non-trivial.
THE USER ASKED FOR X. DELIVER EXACTLY X. NOT A SUBSET. NOT A DEMO. NOT A STARTING POINT.
1. EXPLORES + LIBRARIANS
2. GATHER -> PLAN AGENT SPAWN
3. WORK BY DELEGATING TO ANOTHER AGENTS
1. EXPLORES + LIBRARIANS (background)
2. GATHER -> delegate_task(subagent_type="plan", prompt="<context + request>")
3. WORK BY DELEGATING TO CATEGORY + SKILLS AGENTS
NOW.

View File

@@ -419,7 +419,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
}
// #when - ultrawork keyword detected with Sisyphus agent
await hook["chat.message"]({ sessionID, agent: "Sisyphus" }, output)
await hook["chat.message"]({ sessionID, agent: "sisyphus" }, output)
// #then - should use normal ultrawork message with agent utilization instructions
const textPart = output.parts.find(p => p.type === "text")
@@ -471,7 +471,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
message: {} as Record<string, unknown>,
parts: [{ type: "text", text: "ultrawork implement" }],
}
await hook["chat.message"]({ sessionID: sisyphusSessionID, agent: "Sisyphus" }, sisyphusOutput)
await hook["chat.message"]({ sessionID: sisyphusSessionID, agent: "sisyphus" }, sisyphusOutput)
// #then - each session should have the correct message type
const prometheusTextPart = prometheusOutput.parts.find(p => p.type === "text")
@@ -492,7 +492,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
const sessionID = "same-session-agent-switch"
// Simulate: session state was updated to sisyphus (by index.ts updateSessionAgent)
updateSessionAgent(sessionID, "Sisyphus")
updateSessionAgent(sessionID, "sisyphus")
const output = {
message: {} as Record<string, unknown>,

View File

@@ -277,7 +277,7 @@ describe("prometheus-md-only", () => {
describe("with non-Prometheus agent in message storage", () => {
beforeEach(() => {
setupMessageStorage(TEST_SESSION_ID, "Sisyphus")
setupMessageStorage(TEST_SESSION_ID, "sisyphus")
})
test("should not affect non-Prometheus agents", async () => {

View File

@@ -89,10 +89,10 @@ export function createPrometheusMdOnlyHook(ctx: PluginInput) {
const toolName = input.tool
// Inject read-only warning for task tools called by Prometheus
if (TASK_TOOLS.includes(toolName)) {
const prompt = output.args.prompt as string | undefined
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
output.args.prompt = prompt + PLANNING_CONSULT_WARNING
if (TASK_TOOLS.includes(toolName)) {
const prompt = output.args.prompt as string | undefined
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
output.args.prompt = PLANNING_CONSULT_WARNING + prompt
log(`[${HOOK_NAME}] Injected read-only planning warning to ${toolName}`, {
sessionID: input.sessionID,
tool: toolName,

View File

@@ -0,0 +1,136 @@
import { describe, it, expect } from "bun:test";
import { createQuestionLabelTruncatorHook } from "./index";
describe("createQuestionLabelTruncatorHook", () => {
const hook = createQuestionLabelTruncatorHook();
describe("tool.execute.before", () => {
it("truncates labels exceeding 30 characters with ellipsis", async () => {
// #given
const longLabel = "This is a very long label that exceeds thirty characters";
const input = { tool: "AskUserQuestion" };
const output = {
args: {
questions: [
{
question: "Choose an option",
options: [
{ label: longLabel, description: "A long option" },
],
},
],
},
};
// #when
await hook["tool.execute.before"]?.(input as any, output as any);
// #then
const truncatedLabel = (output.args as any).questions[0].options[0].label;
expect(truncatedLabel.length).toBeLessThanOrEqual(30);
expect(truncatedLabel).toBe("This is a very long label t...");
expect(truncatedLabel.endsWith("...")).toBe(true);
});
it("preserves labels within 30 characters", async () => {
// #given
const shortLabel = "Short label";
const input = { tool: "AskUserQuestion" };
const output = {
args: {
questions: [
{
question: "Choose an option",
options: [
{ label: shortLabel, description: "A short option" },
],
},
],
},
};
// #when
await hook["tool.execute.before"]?.(input as any, output as any);
// #then
const resultLabel = (output.args as any).questions[0].options[0].label;
expect(resultLabel).toBe(shortLabel);
});
it("handles exactly 30 character labels without truncation", async () => {
// #given
const exactLabel = "Exactly thirty chars here!!!!!"; // 30 chars
expect(exactLabel.length).toBe(30);
const input = { tool: "ask_user_question" };
const output = {
args: {
questions: [
{
question: "Choose",
options: [{ label: exactLabel }],
},
],
},
};
// #when
await hook["tool.execute.before"]?.(input as any, output as any);
// #then
const resultLabel = (output.args as any).questions[0].options[0].label;
expect(resultLabel).toBe(exactLabel);
});
it("ignores non-AskUserQuestion tools", async () => {
// #given
const input = { tool: "Bash" };
const output = {
args: { command: "echo hello" },
};
const originalArgs = { ...output.args };
// #when
await hook["tool.execute.before"]?.(input as any, output as any);
// #then
expect(output.args).toEqual(originalArgs);
});
it("handles multiple questions with multiple options", async () => {
// #given
const input = { tool: "AskUserQuestion" };
const output = {
args: {
questions: [
{
question: "Q1",
options: [
{ label: "Very long label number one that needs truncation" },
{ label: "Short" },
],
},
{
question: "Q2",
options: [
{ label: "Another extremely long label for testing purposes" },
],
},
],
},
};
// #when
await hook["tool.execute.before"]?.(input as any, output as any);
// #then
const q1opts = (output.args as any).questions[0].options;
const q2opts = (output.args as any).questions[1].options;
expect(q1opts[0].label).toBe("Very long label number one ...");
expect(q1opts[0].label.length).toBeLessThanOrEqual(30);
expect(q1opts[1].label).toBe("Short");
expect(q2opts[0].label).toBe("Another extremely long labe...");
expect(q2opts[0].label.length).toBeLessThanOrEqual(30);
});
});
});

View File

@@ -0,0 +1,61 @@
const MAX_LABEL_LENGTH = 30;
interface QuestionOption {
label: string;
description?: string;
}
interface Question {
question: string;
header?: string;
options: QuestionOption[];
multiSelect?: boolean;
}
interface AskUserQuestionArgs {
questions: Question[];
}
function truncateLabel(label: string, maxLength: number = MAX_LABEL_LENGTH): string {
if (label.length <= maxLength) {
return label;
}
return label.substring(0, maxLength - 3) + "...";
}
function truncateQuestionLabels(args: AskUserQuestionArgs): AskUserQuestionArgs {
if (!args.questions || !Array.isArray(args.questions)) {
return args;
}
return {
...args,
questions: args.questions.map((question) => ({
...question,
options: question.options?.map((option) => ({
...option,
label: truncateLabel(option.label),
})) ?? [],
})),
};
}
export function createQuestionLabelTruncatorHook() {
return {
"tool.execute.before": async (
input: { tool: string },
output: { args: Record<string, unknown> }
): Promise<void> => {
const toolName = input.tool?.toLowerCase();
if (toolName === "askuserquestion" || toolName === "ask_user_question") {
const args = output.args as unknown as AskUserQuestionArgs | undefined;
if (args?.questions) {
const truncatedArgs = truncateQuestionLabels(args);
Object.assign(output.args, truncatedArgs);
}
}
},
};
}

View File

@@ -459,7 +459,7 @@ describe("ralph-loop", () => {
})
hook.startLoop("session-123", "Build something", { completionPromise: "COMPLETE" })
writeFileSync(transcriptPath, JSON.stringify({ content: "Task done <promise>COMPLETE</promise>" }))
writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "Task done <promise>COMPLETE</promise>" } }) + "\n")
// #when - session goes idle (transcriptPath now derived from sessionID via getTranscriptPath)
await hook.event({
@@ -703,10 +703,105 @@ describe("ralph-loop", () => {
expect(promptCalls[0].text).toContain("2/50")
})
test("should NOT detect completion from user message in transcript (issue #622)", async () => {
// #given - transcript contains user message with template text that includes completion promise
// This reproduces the bug where the RALPH_LOOP_TEMPLATE instructional text
// containing `<promise>DONE</promise>` is recorded as a user message and
// falsely triggers completion detection
const transcriptPath = join(TEST_DIR, "transcript.jsonl")
const templateText = `You are starting a Ralph Loop...
Output <promise>DONE</promise> when fully complete`
const userEntry = JSON.stringify({
type: "user",
timestamp: new Date().toISOString(),
content: templateText,
})
writeFileSync(transcriptPath, userEntry + "\n")
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
// #when - session goes idle
await hook.event({
event: {
type: "session.idle",
properties: { sessionID: "session-123" },
},
})
// #then - loop should CONTINUE (user message completion promise is instructional, not actual)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
test("should NOT detect completion from continuation prompt in transcript (issue #622)", async () => {
// #given - transcript contains continuation prompt (also a user message) with completion promise
const transcriptPath = join(TEST_DIR, "transcript.jsonl")
const continuationText = `RALPH LOOP 2/100
When FULLY complete, output: <promise>DONE</promise>
Original task: Build something`
const userEntry = JSON.stringify({
type: "user",
timestamp: new Date().toISOString(),
content: continuationText,
})
writeFileSync(transcriptPath, userEntry + "\n")
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
// #when - session goes idle
await hook.event({
event: {
type: "session.idle",
properties: { sessionID: "session-123" },
},
})
// #then - loop should CONTINUE (continuation prompt text is not actual completion)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
test("should detect completion from tool_result entry in transcript", async () => {
// #given - transcript contains a tool_result with completion promise
const transcriptPath = join(TEST_DIR, "transcript.jsonl")
const toolResultEntry = JSON.stringify({
type: "tool_result",
timestamp: new Date().toISOString(),
tool_name: "write",
tool_input: {},
tool_output: { output: "Task complete! <promise>DONE</promise>" },
})
writeFileSync(transcriptPath, toolResultEntry + "\n")
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
// #when - session goes idle
await hook.event({
event: {
type: "session.idle",
properties: { sessionID: "session-123" },
},
})
// #then - loop should complete (tool_result contains actual completion output)
expect(promptCalls.length).toBe(0)
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
expect(hook.getState()).toBeNull()
})
test("should check transcript BEFORE API to optimize performance", async () => {
// #given - transcript has completion promise
const transcriptPath = join(TEST_DIR, "transcript.jsonl")
writeFileSync(transcriptPath, JSON.stringify({ content: "<promise>DONE</promise>" }))
writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "<promise>DONE</promise>" } }) + "\n")
mockSessionMessages = [
{ info: { role: "assistant" }, parts: [{ type: "text", text: "No promise here" }] },
]
@@ -736,7 +831,7 @@ describe("ralph-loop", () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
writeFileSync(transcriptPath, JSON.stringify({ content: "<promise>DONE</promise>" }))
writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "<promise>DONE</promise>" } }) + "\n")
hook.startLoop("test-id", "Build API", { ultrawork: true })
// #when - idle event triggered
@@ -754,7 +849,7 @@ describe("ralph-loop", () => {
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => transcriptPath,
})
writeFileSync(transcriptPath, JSON.stringify({ content: "<promise>DONE</promise>" }))
writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "<promise>DONE</promise>" } }) + "\n")
hook.startLoop("test-id", "Build API")
// #when - idle event triggered

View File

@@ -100,7 +100,18 @@ export function createRalphLoopHook(
const content = readFileSync(transcriptPath, "utf-8")
const pattern = new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
return pattern.test(content)
const lines = content.split("\n").filter(l => l.trim())
for (const line of lines) {
try {
const entry = JSON.parse(line)
if (entry.type === "user") continue
if (pattern.test(line)) return true
} catch {
continue
}
}
return false
} catch {
return false
}

View File

@@ -0,0 +1,29 @@
export const HOOK_NAME = "sisyphus-junior-notepad"
export const NOTEPAD_DIRECTIVE = `
<Work_Context>
## Notepad Location (for recording learnings)
NOTEPAD PATH: .sisyphus/notepads/{plan-name}/
- learnings.md: Record patterns, conventions, successful approaches
- issues.md: Record problems, blockers, gotchas encountered
- decisions.md: Record architectural choices and rationales
- problems.md: Record unresolved issues, technical debt
You SHOULD append findings to notepad files after completing work.
IMPORTANT: Always APPEND to notepad files - never overwrite or use Edit tool.
## Plan Location (READ ONLY)
PLAN PATH: .sisyphus/plans/{plan-name}.md
CRITICAL RULE: NEVER MODIFY THE PLAN FILE
The plan file (.sisyphus/plans/*.md) is SACRED and READ-ONLY.
- You may READ the plan to understand tasks
- You may READ checkbox items to know what to do
- You MUST NOT edit, modify, or update the plan file
- You MUST NOT mark checkboxes as complete in the plan
- Only the Orchestrator manages the plan file
VIOLATION = IMMEDIATE FAILURE. The Orchestrator tracks plan state.
</Work_Context>
`

View File

@@ -0,0 +1,45 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { isCallerOrchestrator } from "../../shared/session-utils"
import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
import { log } from "../../shared/logger"
import { HOOK_NAME, NOTEPAD_DIRECTIVE } from "./constants"
export * from "./constants"
export function createSisyphusJuniorNotepadHook(ctx: PluginInput) {
return {
"tool.execute.before": async (
input: { tool: string; sessionID: string; callID: string },
output: { args: Record<string, unknown>; message?: string }
): Promise<void> => {
// 1. Check if tool is delegate_task
if (input.tool !== "delegate_task") {
return
}
// 2. Check if caller is Atlas (orchestrator)
if (!isCallerOrchestrator(input.sessionID)) {
return
}
// 3. Get prompt from output.args
const prompt = output.args.prompt as string | undefined
if (!prompt) {
return
}
// 4. Check for double injection
if (prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
return
}
// 5. Prepend directive
output.args.prompt = NOTEPAD_DIRECTIVE + prompt
// 6. Log injection
log(`[${HOOK_NAME}] Injected notepad directive to delegate_task`, {
sessionID: input.sessionID,
})
},
}
}

View File

@@ -395,7 +395,7 @@ describe("start-work hook", () => {
)
// #then
expect(updateSpy).toHaveBeenCalledWith("ses-prometheus-to-sisyphus", "Atlas")
expect(updateSpy).toHaveBeenCalledWith("ses-prometheus-to-sisyphus", "atlas")
updateSpy.mockRestore()
})
})

View File

@@ -71,7 +71,7 @@ export function createStartWorkHook(ctx: PluginInput) {
sessionID: input.sessionID,
})
updateSessionAgent(input.sessionID, "Atlas")
updateSessionAgent(input.sessionID, "atlas")
const existingState = readBoulderState(ctx.directory)
const sessionId = input.sessionID

View File

@@ -16,21 +16,21 @@ function extractSessionId(output: string): string | null {
}
export function createTaskResumeInfoHook() {
const toolExecuteAfter = async (
input: { tool: string; sessionID: string; callID: string },
output: { title: string; output: string; metadata: unknown }
) => {
if (!TARGET_TOOLS.includes(input.tool)) return
if (output.output.startsWith("Error:") || output.output.startsWith("Failed")) return
if (output.output.includes("\nto resume:")) return
const toolExecuteAfter = async (
input: { tool: string; sessionID: string; callID: string },
output: { title: string; output: string; metadata: unknown }
) => {
if (!TARGET_TOOLS.includes(input.tool)) return
if (output.output.startsWith("Error:") || output.output.startsWith("Failed")) return
if (output.output.includes("\nto continue:")) return
const sessionId = extractSessionId(output.output)
if (!sessionId) return
const sessionId = extractSessionId(output.output)
if (!sessionId) return
output.output = output.output.trimEnd() + `\n\nto resume: delegate_task(resume="${sessionId}", prompt="...")`
}
output.output = output.output.trimEnd() + `\n\nto continue: delegate_task(session_id="${sessionId}", prompt="...")`
}
return {
"tool.execute.after": toolExecuteAfter,
}
return {
"tool.execute.after": toolExecuteAfter,
}
}

View File

@@ -103,7 +103,7 @@ describe("createThinkModeHook integration", () => {
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gemini-3-pro-preview",
"gemini-3-pro",
"think about this"
)
@@ -112,7 +112,7 @@ describe("createThinkModeHook integration", () => {
// #then should upgrade to high variant and inject google thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gemini-3-pro-preview-high")
expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
expect(message.providerOptions).toBeDefined()
const googleOptions = (
message.providerOptions as Record<string, unknown>
@@ -125,7 +125,7 @@ describe("createThinkModeHook integration", () => {
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gemini-3-flash-preview",
"gemini-3-flash",
"ultrathink"
)
@@ -134,7 +134,7 @@ describe("createThinkModeHook integration", () => {
// #then should upgrade to high variant
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gemini-3-flash-preview-high")
expect(input.message.model?.modelID).toBe("gemini-3-flash-high")
expect(message.providerOptions).toBeDefined()
})
})

View File

@@ -50,7 +50,7 @@ describe("think-mode switcher", () => {
describe("Gemini models via github-copilot", () => {
it("should resolve github-copilot Gemini Pro to google config", () => {
// #given a github-copilot provider with Gemini Pro model
const config = getThinkingConfig("github-copilot", "gemini-3-pro-preview")
const config = getThinkingConfig("github-copilot", "gemini-3-pro")
// #then should return google thinking config
expect(config).not.toBeNull()
@@ -65,7 +65,7 @@ describe("think-mode switcher", () => {
// #given a github-copilot provider with Gemini Flash model
const config = getThinkingConfig(
"github-copilot",
"gemini-3-flash-preview"
"gemini-3-flash"
)
// #then should return google thinking config
@@ -159,11 +159,11 @@ describe("think-mode switcher", () => {
it("should handle Gemini preview variants", () => {
// #given Gemini preview model IDs
expect(getHighVariant("gemini-3-pro-preview")).toBe(
"gemini-3-pro-preview-high"
expect(getHighVariant("gemini-3-pro")).toBe(
"gemini-3-pro-high"
)
expect(getHighVariant("gemini-3-flash-preview")).toBe(
"gemini-3-flash-preview-high"
expect(getHighVariant("gemini-3-flash")).toBe(
"gemini-3-flash-high"
)
})

View File

@@ -89,12 +89,10 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
// Claude
"claude-sonnet-4-5": "claude-sonnet-4-5-high",
"claude-opus-4-5": "claude-opus-4-5-high",
// Gemini
"gemini-3-pro": "gemini-3-pro-high",
"gemini-3-pro-low": "gemini-3-pro-high",
"gemini-3-pro-preview": "gemini-3-pro-preview-high",
"gemini-3-flash": "gemini-3-flash-high",
"gemini-3-flash-preview": "gemini-3-flash-preview-high",
// Gemini
"gemini-3-pro": "gemini-3-pro-high",
"gemini-3-pro-low": "gemini-3-pro-high",
"gemini-3-flash": "gemini-3-flash-high",
// GPT-5
"gpt-5": "gpt-5-high",
"gpt-5-mini": "gpt-5-mini-high",

View File

@@ -835,8 +835,8 @@ describe("todo-continuation-enforcer", () => {
// OpenCode returns assistant messages with flat modelID/providerID, not nested model object
const mockMessagesWithAssistant = [
{ info: { id: "msg-1", role: "user", agent: "Sisyphus", model: { providerID: "openai", modelID: "gpt-5.2" } } },
{ info: { id: "msg-2", role: "assistant", agent: "Sisyphus", modelID: "gpt-5.2", providerID: "openai" } },
{ info: { id: "msg-1", role: "user", agent: "sisyphus", model: { providerID: "openai", modelID: "gpt-5.2" } } },
{ info: { id: "msg-2", role: "assistant", agent: "sisyphus", modelID: "gpt-5.2", providerID: "openai" } },
]
const mockInput = {
@@ -873,4 +873,193 @@ describe("todo-continuation-enforcer", () => {
expect(promptCalls.length).toBe(1)
expect(promptCalls[0].model).toEqual({ providerID: "openai", modelID: "gpt-5.2" })
})
// ============================================================
// COMPACTION AGENT FILTERING TESTS
// These tests verify that compaction agent messages are filtered
// when resolving agent info, preventing infinite continuation loops
// ============================================================
test("should skip compaction agent messages when resolving agent info", async () => {
// #given - session where last message is from compaction agent but previous was Sisyphus
const sessionID = "main-compaction-filter"
setMainSession(sessionID)
const mockMessagesWithCompaction = [
{ info: { id: "msg-1", role: "user", agent: "sisyphus", model: { providerID: "anthropic", modelID: "claude-sonnet-4-5" } } },
{ info: { id: "msg-2", role: "assistant", agent: "sisyphus", modelID: "claude-sonnet-4-5", providerID: "anthropic" } },
{ info: { id: "msg-3", role: "assistant", agent: "compaction", modelID: "claude-sonnet-4-5", providerID: "anthropic" } },
]
const mockInput = {
client: {
session: {
todo: async () => ({
data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
}),
messages: async () => ({ data: mockMessagesWithCompaction }),
prompt: async (opts: any) => {
promptCalls.push({
sessionID: opts.path.id,
agent: opts.body.agent,
model: opts.body.model,
text: opts.body.parts[0].text,
})
return {}
},
},
tui: { showToast: async () => ({}) },
},
directory: "/tmp/test",
} as any
const hook = createTodoContinuationEnforcer(mockInput, {
backgroundManager: createMockBackgroundManager(false),
})
// #when - session goes idle
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
await new Promise(r => setTimeout(r, 2500))
// #then - continuation uses Sisyphus (skipped compaction agent)
expect(promptCalls.length).toBe(1)
expect(promptCalls[0].agent).toBe("sisyphus")
})
test("should skip injection when only compaction agent messages exist", async () => {
// #given - session with only compaction agent (post-compaction, no prior agent info)
const sessionID = "main-only-compaction"
setMainSession(sessionID)
const mockMessagesOnlyCompaction = [
{ info: { id: "msg-1", role: "assistant", agent: "compaction" } },
]
const mockInput = {
client: {
session: {
todo: async () => ({
data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
}),
messages: async () => ({ data: mockMessagesOnlyCompaction }),
prompt: async (opts: any) => {
promptCalls.push({
sessionID: opts.path.id,
agent: opts.body.agent,
model: opts.body.model,
text: opts.body.parts[0].text,
})
return {}
},
},
tui: { showToast: async () => ({}) },
},
directory: "/tmp/test",
} as any
const hook = createTodoContinuationEnforcer(mockInput, {})
// #when - session goes idle
await hook.handler({
event: { type: "session.idle", properties: { sessionID } },
})
await new Promise(r => setTimeout(r, 3000))
// #then - no continuation (compaction is in default skipAgents)
expect(promptCalls).toHaveLength(0)
})
test("should skip injection when prometheus agent is after compaction", async () => {
// #given - prometheus session that was compacted
const sessionID = "main-prometheus-compacted"
setMainSession(sessionID)
const mockMessagesPrometheusCompacted = [
{ info: { id: "msg-1", role: "user", agent: "prometheus" } },
{ info: { id: "msg-2", role: "assistant", agent: "prometheus" } },
{ info: { id: "msg-3", role: "assistant", agent: "compaction" } },
]
const mockInput = {
client: {
session: {
todo: async () => ({
data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
}),
messages: async () => ({ data: mockMessagesPrometheusCompacted }),
prompt: async (opts: any) => {
promptCalls.push({
sessionID: opts.path.id,
agent: opts.body.agent,
model: opts.body.model,
text: opts.body.parts[0].text,
})
return {}
},
},
tui: { showToast: async () => ({}) },
},
directory: "/tmp/test",
} as any
const hook = createTodoContinuationEnforcer(mockInput, {})
// #when - session goes idle
await hook.handler({
event: { type: "session.idle", properties: { sessionID } },
})
await new Promise(r => setTimeout(r, 3000))
// #then - no continuation (prometheus found after filtering compaction, prometheus is in skipAgents)
expect(promptCalls).toHaveLength(0)
})
test("should inject when agent info is undefined but skipAgents is empty", async () => {
// #given - session with no agent info but skipAgents is empty
const sessionID = "main-no-agent-no-skip"
setMainSession(sessionID)
const mockMessagesNoAgent = [
{ info: { id: "msg-1", role: "user" } },
{ info: { id: "msg-2", role: "assistant" } },
]
const mockInput = {
client: {
session: {
todo: async () => ({
data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
}),
messages: async () => ({ data: mockMessagesNoAgent }),
prompt: async (opts: any) => {
promptCalls.push({
sessionID: opts.path.id,
agent: opts.body.agent,
model: opts.body.model,
text: opts.body.parts[0].text,
})
return {}
},
},
tui: { showToast: async () => ({}) },
},
directory: "/tmp/test",
} as any
const hook = createTodoContinuationEnforcer(mockInput, {
skipAgents: [],
})
// #when - session goes idle
await hook.handler({
event: { type: "session.idle", properties: { sessionID } },
})
await new Promise(r => setTimeout(r, 3000))
// #then - continuation injected (no agents to skip)
expect(promptCalls.length).toBe(1)
})
})

View File

@@ -13,7 +13,7 @@ import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-di
const HOOK_NAME = "todo-continuation-enforcer"
const DEFAULT_SKIP_AGENTS = ["prometheus"]
const DEFAULT_SKIP_AGENTS = ["prometheus", "compaction"]
export interface TodoContinuationEnforcerOptions {
backgroundManager?: BackgroundManager
@@ -373,6 +373,7 @@ export function createTodoContinuationEnforcer(
}
let resolvedInfo: ResolvedMessageInfo | undefined
let hasCompactionMessage = false
try {
const messagesResp = await ctx.client.session.messages({
path: { id: sessionID },
@@ -388,6 +389,10 @@ export function createTodoContinuationEnforcer(
}>
for (let i = messages.length - 1; i >= 0; i--) {
const info = messages[i].info
if (info?.agent === "compaction") {
hasCompactionMessage = true
continue
}
if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
resolvedInfo = {
agent: info.agent,
@@ -401,11 +406,15 @@ export function createTodoContinuationEnforcer(
log(`[${HOOK_NAME}] Failed to fetch messages for agent check`, { sessionID, error: String(err) })
}
log(`[${HOOK_NAME}] Agent check`, { sessionID, agentName: resolvedInfo?.agent, skipAgents })
log(`[${HOOK_NAME}] Agent check`, { sessionID, agentName: resolvedInfo?.agent, skipAgents, hasCompactionMessage })
if (resolvedInfo?.agent && skipAgents.includes(resolvedInfo.agent)) {
log(`[${HOOK_NAME}] Skipped: agent in skipAgents list`, { sessionID, agent: resolvedInfo.agent })
return
}
if (hasCompactionMessage && !resolvedInfo?.agent) {
log(`[${HOOK_NAME}] Skipped: compaction occurred but no agent info resolved`, { sessionID })
return
}
startCountdown(sessionID, incompleteCount, todos.length, resolvedInfo)
return

Some files were not shown because too many files have changed in this diff Show More