Compare commits

...

149 Commits

Author SHA1 Message Date
YeonGyu-Kim
78c9ad3e7f refactor(background-agent): dedupe manager prompt cleanup paths
Add targeted regression coverage for pending parent bookkeeping and launch/resume prompt failure behavior before extracting narrow helper methods inside BackgroundManager.

Keep launch-only skill content and missing-agent formatting explicit, keep resume on direct promptAsync semantics, and reuse shared helper paths for pending registration, prompt body construction, and prompt-dispatch cleanup.

Co-authored-by: Codex <noreply@openai.com>
2026-03-13 14:49:18 +09:00
YeonGyu-Kim
f3de122147 feat(hooks): add delegate-task-english-directive hook to enforce English for subagents
Appends bold uppercase English-only directive to explore, librarian,
oracle, and plan subagent prompts via tool.execute.before on the task tool.
2026-03-13 14:22:13 +09:00
YeonGyu-Kim
0303488906 Merge pull request #2550 from code-yeongyu/fix/deploy-blockers
fix: resolve all deployment blockers from v3.11.2→HEAD release review
2026-03-13 14:21:45 +09:00
YeonGyu-Kim
3e746c9a56 fix(review): resolve 3 review-work blocking issues 2026-03-13 14:09:36 +09:00
YeonGyu-Kim
786c7a84d0 fix(background-agent): prevent queue item loss on concurrent cancel and guard against cancelled task resurrection 2026-03-13 13:12:59 +09:00
YeonGyu-Kim
380889caa3 fix(delegate-task): add exception fallback for cleanup reason and correct test mock status type 2026-03-13 13:08:50 +09:00
YeonGyu-Kim
04b0c6f33c fix(atlas): pause after final verification wave for explicit user approval 2026-03-13 12:43:33 +09:00
YeonGyu-Kim
fd71c89b95 fix(background-agent): release descendant quota on pre-start task cancellation and creation failure 2026-03-13 12:37:33 +09:00
YeonGyu-Kim
11df83713e refactor(preemptive-compaction): use shared context-limit resolver to eliminate duplicated logic 2026-03-13 12:36:07 +09:00
YeonGyu-Kim
457f303adf fix(background-agent): clean global subagentSessions and SessionCategoryRegistry on dispose 2026-03-13 10:56:44 +09:00
YeonGyu-Kim
0015dd88af fix(agent-config): normalize agent names before builtin override filtering to prevent alias bypass 2026-03-13 10:55:51 +09:00
YeonGyu-Kim
9bce6314b1 fix(runtime-fallback): scope visible-assistant check to current turn and cleanup retry dedupe keys 2026-03-13 10:54:47 +09:00
YeonGyu-Kim
cbe113ebab fix(slashcommand): support parent config dirs in command execution path to match discovery 2026-03-13 10:54:15 +09:00
YeonGyu-Kim
e3f6c12347 fix(atlas): restrict idle-event session append to boulder-owned subagent sessions only 2026-03-13 10:53:45 +09:00
YeonGyu-Kim
b356c50285 fix(delegate-task): cancel child background tasks on parent abort and timeout in unstable agent flow 2026-03-13 10:49:44 +09:00
YeonGyu-Kim
38938508fa test(model-fallback): update snapshots and kimi model expectations for opencode-go integration 2026-03-13 10:48:05 +09:00
YeonGyu-Kim
2c8a8eb4f1 fix(gpt-permission-continuation): add per-session consecutive auto-continue cap to prevent infinite loops 2026-03-13 10:48:00 +09:00
github-actions[bot]
825e854cff @cpkt9762 has signed the CLA in code-yeongyu/oh-my-openagent#2539 2026-03-12 20:17:38 +00:00
github-actions[bot]
4226808432 @Gujiassh has signed the CLA in code-yeongyu/oh-my-openagent#2524 2026-03-12 16:36:59 +00:00
github-actions[bot]
0412e40780 @ricatix has signed the CLA in code-yeongyu/oh-my-openagent#2532 2026-03-12 15:23:10 +00:00
github-actions[bot]
18cbaadb52 @xodn348 has signed the CLA in code-yeongyu/oh-my-openagent#2531 2026-03-12 15:14:20 +00:00
github-actions[bot]
27538dcfe6 @apple-ouyang has signed the CLA in code-yeongyu/oh-my-openagent#2528 2026-03-12 14:39:21 +00:00
YeonGyu-Kim
e4e5f159f9 fix(tmux): wrap opencode attach commands in zsh -c shell
🤖 Generated with assistance of OhMyOpenCode
2026-03-12 20:12:38 +09:00
YeonGyu-Kim
4f4e53b436 feat(skill): re-read skills and commands from disk on every invocation
Removes in-memory caching so newly created skills mid-session are
immediately available via skill(). Clears the module-level skill cache
before each getAllSkills() call. Pre-provided skills from options are
merged as fallbacks for test compatibility.
2026-03-12 20:03:58 +09:00
YeonGyu-Kim
55b80fb7cd fix(skill-loader): discover skills from parent config dir when using profiles
OPENCODE_CONFIG_DIR pointing to profiles/ subdirectory caused skills at
~/.config/opencode/skills/ to be invisible. Added getOpenCodeSkillDirs()
with the same parent-dir fallback that getOpenCodeCommandDirs() uses.
2026-03-12 19:53:30 +09:00
YeonGyu-Kim
c85b6adb7d chore: gitignore platform binary sourcemaps and untrack existing ones 2026-03-12 19:53:20 +09:00
YeonGyu-Kim
a400adae97 feat(skill): render skills as slash commands in available items list
Skills now appear as <command> items with / prefix (e.g., /review-work)
instead of <skill> items, making them discoverable alongside regular
slash commands in the skill tool description.
2026-03-12 18:53:44 +09:00
YeonGyu-Kim
50638cf783 test(hooks): fix test isolation in session-notification-sender tests
Use namespace import pattern (import * as sender) to prevent cross-file
spy leakage in Bun's shared module state. Move restoreAllMocks to
beforeEach for proper cleanup ordering.

🤖 Generated with [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode) assistance
2026-03-12 18:37:10 +09:00
YeonGyu-Kim
8e3829f63a test(auto-slash-command): add tests for skills as slash commands 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
b4e01e9987 feat(slashcommand): support parent opencode config dirs for command discovery 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
8c2385fe31 feat(hooks): add quiet and nothrow to notification shell executions 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
c3ab066335 feat(shared): export opencode-command-dirs module 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
7937f9d777 feat(shared): add opencode-command-dirs utility for multi-level command discovery 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
53c65a7e63 feat(cli): add sisyphus-junior model fallback requirements
Add CLI_AGENT_MODEL_REQUIREMENTS entry for sisyphus-junior with
fallback chain: claude-sonnet-4-6 -> kimi-k2.5 -> big-pickle.

🤖 Generated with assistance of OhMyOpenCode
2026-03-12 18:19:06 +09:00
YeonGyu-Kim
8f6b952dc0 feat(prometheus): require explicit user approval in Final Verification Wave
Add mandatory explicit user okay before completing work in Final
Verification Wave. Present consolidated results and wait for user
confirmation before marking tasks complete.

🤖 Generated with assistance of OhMyOpenCode
2026-03-12 18:19:06 +09:00
YeonGyu-Kim
e0bf0eb7cf docs: add opencode-go provider tier documentation 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
a9fde452ac feat(opencode-go): update on-complete hook for provider display 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
338379941d feat(opencode-go): integrate into model fallback chain resolution 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
44d602b7e5 feat(opencode-go): integrate installer with config detection 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
66ec9f58ee feat(opencode-go): add CLI install flag and TUI prompts 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
89d1e105a8 feat(opencode-go): add model requirements for go-tier models 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
504b68f2ac feat(opencode-go): add provider type and availability detection 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
2bbbdc4ca9 refactor(github-triage): rewrite as read-only report-based analyzer 2026-03-12 18:19:06 +09:00
YeonGyu-Kim
ca7c0e391e fix(bun-install): default outputMode to "pipe" to prevent TUI stdout leak
runBunInstallWithDetails() defaulted to outputMode:"inherit", causing
bun install stdout/stderr to leak into the TUI when callers omitted the
option. Changed default to "pipe" so output is captured silently.

Also fixed stale mock in background-update-check.test.ts: the test was
mocking runBunInstall (unused) instead of runBunInstallWithDetails, and
returning boolean instead of BunInstallResult.
2026-03-12 18:19:06 +09:00
YeonGyu-Kim
81301a6071 feat: skip model resolution for delegated tasks when provider cache not yet created
Before provider cache exists (first run), resolveModelForDelegateTask now
returns undefined instead of guessing a model. This lets OpenCode use its
system default model when no model is specified in the prompt body.

User-specified model overrides still take priority regardless of cache state.
2026-03-12 18:19:06 +09:00
YeonGyu-Kim
62883d753f Merge pull request #2519 from code-yeongyu/fix/ultrawork-variant-no-max-override
fix: skip ultrawork variant override without SDK validation + add porcelain worktree parser
2026-03-12 17:27:57 +09:00
YeonGyu-Kim
c9d30f8be3 feat: add porcelain worktree parser with listWorktrees and parseWorktreeListPorcelain
Introduce git worktree list --porcelain parsing following upstream opencode patterns. Exports listWorktrees() for full worktree enumeration with branch info alongside existing detectWorktreePath().

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 17:25:10 +09:00
YeonGyu-Kim
2210997c89 fix: skip ultrawork variant override when SDK validation unavailable
When provider.list is not available for SDK validation, do not apply the configured ultrawork variant. This prevents models without a max variant from being incorrectly forced to max when ultrawork mode activates.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 17:24:54 +09:00
YeonGyu-Kim
feb2160a7a Merge pull request #2518 from code-yeongyu/fix-2499-ulw-oracle-verified-loop
Keep ulw-loop running until Oracle verifies completion
2026-03-12 17:15:49 +09:00
YeonGyu-Kim
37c7231a50 test: isolate connected providers cache test setup
Prevent the cache test from deleting the user cache directory and add a regression test for that setup path.

Co-authored-by: Codex <noreply@openai.com>
2026-03-12 17:08:06 +09:00
YeonGyu-Kim
1812c9f054 test(ralph-loop): cover overlapping ultrawork loops
Lock down stale-session and overwrite cases so a previous ULW verification flow cannot complete or mutate a newer loop.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 17:05:02 +09:00
YeonGyu-Kim
f31537f14c fix(ralph-loop): continue ultrawork until oracle verifies
Keep /ulw-loop iterating after the main session emits DONE so completion still depends on an actual Oracle VERIFIED result.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 17:00:25 +09:00
YeonGyu-Kim
e763885df1 Merge pull request #2516 from code-yeongyu/fix/hashline-strict-whitespace-hash
fix(hashline): use strict whitespace hashing (trimEnd only, preserve leading indentation)
2026-03-12 16:52:30 +09:00
YeonGyu-Kim
0cbc15da96 fix(hashline): use strict whitespace hashing (trimEnd only, preserve leading indentation)
Previously computeLineHash stripped ALL whitespace before hashing, making
indentation changes invisible to hash validation. This weakened the stale-line
detection guarantee, especially for indentation-sensitive files (Python, YAML).

Now only trailing whitespace and carriage returns are stripped, matching
oh-my-pi upstream behavior. Leading indentation is preserved in the hash,
so indentation-only changes correctly trigger hash mismatches.
2026-03-12 16:42:41 +09:00
YeonGyu-Kim
04b0d62a55 feat(session-notification): include session context in ready notifications
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 15:29:21 +09:00
YeonGyu-Kim
943f31f460 feat(session-notification): add ready notification content builder
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 15:29:21 +09:00
YeonGyu-Kim
8e1a4dffa9 Merge pull request #2486 from code-yeongyu/fix/issue-2357-child-session-fallback
fix: enable runtime fallback for delegated child sessions (#2357)
2026-03-12 13:53:24 +09:00
YeonGyu-Kim
abc4b2a6a4 fix(runtime-fallback): remove committed rebase conflict markers
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 13:49:46 +09:00
YeonGyu-Kim
d8da2f1ad6 fix(runtime-fallback): clear retry keys on failed session bootstrap
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 13:39:30 +09:00
YeonGyu-Kim
62a905b690 fix(runtime-fallback): reuse normalized messages for visible assistant checks
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 13:39:30 +09:00
YeonGyu-Kim
79fb746a1c fix(runtime-fallback): resolve agents from normalized session messages
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 13:39:30 +09:00
YeonGyu-Kim
fcd4fa5164 fix(runtime-fallback): normalize retry part message extraction
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 13:39:30 +09:00
YeonGyu-Kim
6a4a3322c1 fix(runtime-fallback): add session messages extractor
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 13:39:30 +09:00
YeonGyu-Kim
3caa3fcc3d fix: address Cubic findings for runtime fallback child sessions
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 13:39:30 +09:00
YeonGyu-Kim
ba86ef0eea fix: enable runtime fallback for delegated child sessions (#2357) 2026-03-12 13:39:04 +09:00
acamq
4ded45d14c Merge pull request #2446 from win0na/fix/momus-key-trigger-specificity
fix(momus): make keyTrigger specify file-path-only invocation requirement
2026-03-11 20:34:08 -06:00
acamq
9032eeaa68 Merge pull request #2419 from guazi04/fix/serverurl-throw-getter
fix(tmux): handle serverUrl throw getter from upstream opencode refactor
2026-03-11 20:32:38 -06:00
YeonGyu-Kim
3ea23561f2 Merge pull request #2488 from code-yeongyu/fix/issue-2295-fallback-provider-preserve
fix: preserve session provider context in fallback chain
2026-03-12 11:24:43 +09:00
YeonGyu-Kim
0cdbd15f74 Merge pull request #2487 from code-yeongyu/fix/issue-2431-lsp-path-resolution
fix: unify LSP server PATH resolution between detection and spawn
2026-03-12 11:24:41 +09:00
YeonGyu-Kim
60e6f6d4f3 Merge pull request #2484 from code-yeongyu/fix/issue-2393-cubic-error-name
fix: add FreeUsageLimitError to RETRYABLE_ERROR_NAMES set
2026-03-12 11:24:37 +09:00
YeonGyu-Kim
b00fc89dfa Merge pull request #2458 from code-yeongyu/fix/memory-leaks
fix: resolve 12 memory leaks (3 critical + 9 high)
2026-03-12 11:21:13 +09:00
YeonGyu-Kim
2912b6598c fix: address Cubic findings for provider preserve fallback
- Reorder resolveFallbackProviderID: providerHint now checked before global connected-provider cache
- Revert require('bun:test') hack to standard ESM import in fallback-chain-from-models.test.ts

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 11:05:31 +09:00
YeonGyu-Kim
755efe226e fix: address Cubic findings for FreeUsageLimitError classification
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 11:05:26 +09:00
YeonGyu-Kim
6014f03ed2 fix: address Cubic finding for LSP server npm bin path
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 11:04:43 +09:00
YeonGyu-Kim
2b4a5ca5da test(agent-variant): restore hephaestus openai case
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 11:04:43 +09:00
YeonGyu-Kim
4157c2224f fix(background-agent): clear pending parent on silent cancel
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 11:04:35 +09:00
YeonGyu-Kim
d253f267c3 fix(skill-mcp-manager): guard stale client cleanup
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 11:04:28 +09:00
YeonGyu-Kim
d83f875740 fix(call-omo-agent): track reused sync sessions
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 11:04:20 +09:00
github-actions[bot]
5da347c3ec @ChicK00o has signed the CLA in code-yeongyu/oh-my-openagent#2499 2026-03-12 01:26:01 +00:00
github-actions[bot]
e5706bba48 @djdembeck has signed the CLA in code-yeongyu/oh-my-openagent#2497 2026-03-12 00:48:45 +00:00
acamq
f6ae3a4c64 Merge pull request #2493 from acamq/fix/fallback-test-regression
fix(test): update agent-variant test model to gpt-5.4
2026-03-11 15:47:23 -06:00
acamq
9832f7b52e fix(test): update agent-variant test model to gpt-5.4 2026-03-11 15:43:03 -06:00
acamq
5f3f8bb1d3 Merge pull request #2492 from acamq/fix/prometheus-test-regressions
test: update ultrabrain model expectations to gpt-5.4
2026-03-11 15:25:13 -06:00
acamq
2d6be11fa0 test: update ultrabrain model expectations to gpt-5.4
The DEFAULT_CATEGORIES ultrabrain model was updated from openai/gpt-5.3-codex
to openai/gpt-5.4 in a previous commit, but test expectations were not updated.

Updated test expectations in:
- src/plugin-handlers/config-handler.test.ts (lines 560, 620)
- src/agents/utils.test.ts (lines 1119, 1232, 1234, 1301, 1303, 1316, 1318)
2026-03-11 15:18:29 -06:00
acamq
5f419b7d9d Merge pull request #2473 from code-yeongyu/fix/sync-package-json-to-opencode-intent
fix(auto-update): sync cache package.json to opencode.json intent
2026-03-11 14:51:49 -06:00
acamq
d08754d1b4 fix(auto-update): pipe bun install output and restore other-deps preservation test
background-update-check.ts was using runBunInstall() which defaults to outputMode:"inherit", leaking bun install stdout/stderr into the background session. Reverted to runBunInstallWithDetails({ outputMode: "pipe" }) and explicitly logs result.error on failure.

Restores the accidentally deleted test case asserting that sibling dependencies (e.g. other:"1.0.0") are preserved in package.json after a plugin version sync.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-11 13:28:12 -06:00
acamq
e6e32d345e fix(auto-update): expand semver regex to support hyphenated prerelease tags
The previous pattern `(-[\w.]+)?` used `\w` which excludes hyphens, causing versions like `1.2.3-alpha-1` and `1.2.3-rc-test` to be misclassified as unpinned tags. Updated both plugin-entry.ts and sync-package-json.ts (which share the definition) to the spec-compliant pattern that allows dot-separated identifiers using [0-9A-Za-z-] and optional build metadata.

Also adds String() coercion before .trim() in sync-package-json.ts to guard against a TypeError if the parsed JSON value for currentVersion is non-string at runtime.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-11 13:28:04 -06:00
YeonGyu-Kim
7c89a2acf6 test: update gpt-5.4 fallback expectations
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 02:24:47 +09:00
YeonGyu-Kim
57b4985424 fix(background-agent): delay session error task cleanup
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 02:24:42 +09:00
YeonGyu-Kim
f9c8392179 fix(tmux-subagent): cap stale close retries
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 02:24:35 +09:00
YeonGyu-Kim
cbb378265e fix(skill-mcp-manager): drop superseded stale clients
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 02:24:29 +09:00
YeonGyu-Kim
7997606892 fix(call-omo-agent): preserve reused session tracking
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 02:24:22 +09:00
YeonGyu-Kim
99730088ef fix: remove contaminated await change from FreeUsageLimitError PR
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:51:25 +09:00
YeonGyu-Kim
7870e43578 fix: preserve session provider context in fallback chain (#2295) 2026-03-12 01:49:16 +09:00
YeonGyu-Kim
9b792c3224 Merge pull request #2485 from code-yeongyu/fix/issue-2316-tool-after-error-boundary
fix: add error boundary around extract/discard hooks in tool-execute-after
2026-03-12 01:46:51 +09:00
YeonGyu-Kim
9d0b56d375 fix: unify LSP server PATH resolution between detection and spawn (#2431) 2026-03-12 01:44:06 +09:00
YeonGyu-Kim
305389bd7f fix: add error boundary around extract/discard hooks in tool-execute-after (#2316) 2026-03-12 01:41:07 +09:00
YeonGyu-Kim
e249333898 test(skill-mcp-manager): cover pending cleanup registration retention
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:40:34 +09:00
YeonGyu-Kim
810dd5848f test(skill-mcp-manager): cover disposed guard after disconnectAll
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:40:34 +09:00
YeonGyu-Kim
079c6b17b0 fix: add FreeUsageLimitError to RETRYABLE_ERROR_NAMES set (#2393) 2026-03-12 01:40:24 +09:00
YeonGyu-Kim
aa1aad3bb1 fix: add disposed guard to MCP manager and guard unregister on pending connections 2026-03-12 01:37:03 +09:00
YeonGyu-Kim
f564404015 fix: address review-work round 6 findings (dispose isolation, event dispatch, disconnectedSessions ref-counting) 2026-03-12 01:37:03 +09:00
YeonGyu-Kim
cf276322a3 fix(background-agent): handle async shutdown in process-cleanup signal handlers 2026-03-12 01:37:03 +09:00
YeonGyu-Kim
2c3c447dc4 fix: address review-work round 3 findings (async shutdown, signal generation, stale test name) 2026-03-12 01:37:03 +09:00
YeonGyu-Kim
ff536e992a fix: address review-work round 2 findings
- MCP teardown race: add shutdownGeneration counter to prevent
  in-flight connections from resurrecting after disconnectAll
- MCP multi-key disconnect race: replace disconnectedSessions Set
  with generation-based Map to track per-session disconnect events
- MCP clients: check shutdownGeneration in stdio/http client
  creators before inserting into state.clients
- BackgroundManager: call clearTaskHistoryWhenParentTasksGone after
  timer-based task removal in scheduleTaskRemoval and notifyParentSession
- BackgroundManager: clean completedTaskSummaries when parent has
  no remaining tasks
- Plugin dispose: remove duplicate tmuxSessionManager.cleanup call
  since BackgroundManager.shutdown already handles it via onShutdown
2026-03-12 01:37:03 +09:00
YeonGyu-Kim
03eaa429ce fix: address 5 edge cases from review-work findings
- C3: include command args in auto-slash-command dedup key
- H2: track completed task summaries for ALL COMPLETE message
- H9: increment tmux close retry count on re-mark
- H8: detect stale MCP connections after disconnect+reconnect race
- H8: guard disconnectedSessions growth for non-MCP sessions
- C1: await tmux cleanup in plugin dispose lifecycle
2026-03-12 01:37:03 +09:00
YeonGyu-Kim
b8aea50dfa test(background-agent): update completion timer test for per-task cleanup
Test expected timers only after allComplete, but H2 fix intentionally
decoupled per-task cleanup from sibling completion state. Updated
assertion to expect timer after individual task notification.
2026-03-12 01:37:03 +09:00
YeonGyu-Kim
deaac8cb39 fix(plugin): add dispose lifecycle for full teardown on reload
Plugin created managers, hooks, intervals, and process listeners on
every load but had no teardown mechanism. On plugin reload, old
instances remained alive causing cumulative memory leaks.

- Add createPluginDispose() orchestrating shutdown sequence:
  backgroundManager.shutdown() → skillMcpManager.disconnectAll() →
  disposeHooks()
- Add disposeHooks() aggregator with safe optional chaining
- Wire dispose into index.ts to clean previous instance on reload
- Make dispose idempotent (safe to call multiple times)

Tests: 4 pass, 8 expects
2026-03-12 01:37:03 +09:00
YeonGyu-Kim
b4e13883b1 fix(background-agent): fix 3 memory leaks in task lifecycle management
H3: cancelTask(skipNotification=true) now schedules task removal.
Previously the early return path skipped cleanup, leaking task objects
in this.tasks Map permanently. Extracted scheduleTaskRemoval() helper
called from both skipNotification and normal paths.

H2: Per-task completion cleanup timer decoupled from allComplete check.
Previously cleanup timer only ran when ALL sibling tasks completed. Now
each finished task gets its own removal timer regardless of siblings.

H1+C2: TaskHistory.clearAll() added and wired into shutdown(). Added
clearSession() calls on session error/deletion and prune cycles.
taskHistory was the only data structure missed by shutdown().

Tests: 10 pass (3 cancel + 3 completion + 4 history)
2026-03-12 01:37:03 +09:00
YeonGyu-Kim
d1fc6629c2 fix(skill-mcp-manager): remove process listeners on disconnect and guard connection races
H7: Process 'exit'/'SIGINT' listeners registered per-session were
never removed when all sessions disconnected, accumulating handlers.
- Add unregisterProcessCleanup() called in disconnectAll()

H8: Race condition where disconnectSession() during pending connection
left orphan clients in state.clients.
- Add disconnectedSessions Set to track mid-flight disconnects
- Check disconnect marker after connection resolves, close if stale
- Clear marker on reconnection for same session

Tests: 6 pass (3 disconnect + 3 race)
2026-03-12 01:37:03 +09:00
YeonGyu-Kim
fed720dd11 fix(tmux-subagent): retry pending pane closes to prevent zombie panes
When queryWindowState returned null during session deletion, the
session mapping was deleted but the real tmux pane stayed alive,
creating zombie panes.

- Add closePending/closeRetryCount fields to TrackedSession
- Mark sessions closePending instead of deleting on close failure
- Add retryPendingCloses() called from onSessionCreated and cleanup
- Force-remove mappings after 3 failed retry attempts
- Extract TrackedSessionState helper for field initialization

Tests: 3 pass, 9 expects
2026-03-12 01:37:02 +09:00
YeonGyu-Kim
a2f030e699 fix(todo-continuation-enforcer): expose prune interval for cleanup
Prune interval created inside hook was not exposed for disposal,
preventing cleanup on plugin unload.

- Add dispose() method that clears the prune interval
- Export dispose in hook return type

Tests: 2 pass, 6 expects
2026-03-12 01:37:02 +09:00
YeonGyu-Kim
2d2ca863f1 fix(runtime-fallback): clear monitoring interval on dispose
setInterval for model availability monitoring was never cleared,
keeping the hook alive indefinitely with no dispose mechanism.

- Add dispose() method to RuntimeFallbackHook that clears interval
- Track intervalId in hook state for cleanup
- Export dispose in hook return type

Tests: 3 pass, 10 expects
2026-03-12 01:37:02 +09:00
YeonGyu-Kim
f342dcfa12 fix(call-omo-agent): add finally cleanup for sync executor session Sets
Sync call_omo_agent leaked entries in global activeSessionMessages
and activeSessionToolResults Sets when execution threw errors,
since cleanup only ran on success path.

- Wrap session Set operations in try/finally blocks
- Ensure Set.delete() runs regardless of success/failure
- Add guard against double-cleanup

Tests: 2 pass, 14 expects
2026-03-12 01:37:02 +09:00
YeonGyu-Kim
7904410294 fix(auto-slash-command): bound Set growth with TTL eviction and session cleanup
processedCommands and recentResults Sets grew infinitely because
Date.now() in dedup keys made deduplication impossible and no
session.deleted cleanup existed.

- Extract ProcessedCommandStore with maxSize cap and TTL-based eviction
- Add session cleanup on session.deleted event
- Remove Date.now() from dedup keys for effective deduplication
- Add dispose() for interval cleanup

Tests: 3 pass, 9 expects
2026-03-12 01:37:02 +09:00
YeonGyu-Kim
3822423069 Merge pull request #2482 from code-yeongyu/fix/issue-2407-binary-version-embed
fix: sync root package.json version before binary compile
2026-03-12 01:34:33 +09:00
YeonGyu-Kim
e26088ba8f Merge pull request #2481 from code-yeongyu/fix/issue-2185-lsp-notification-params
fix: use rest params in LSP sendNotification to avoid undefined serialization
2026-03-12 01:34:29 +09:00
YeonGyu-Kim
7998667a86 Merge pull request #2480 from code-yeongyu/fix/issue-2356-preemptive-compaction-limit
fix: skip preemptive compaction when model context limit is unknown
2026-03-12 01:34:25 +09:00
YeonGyu-Kim
9eefbfe310 fix: restore await on metadata call in create-background-task (#2441) 2026-03-12 01:34:16 +09:00
YeonGyu-Kim
ef2017833d Merge pull request #2425 from MoerAI/fix/issue-2408-gemini-vertex-edit-schema
fix(hashline-edit): remove array type from lines union to fix Gemini Vertex schema validation
2026-03-12 01:32:37 +09:00
YeonGyu-Kim
994b9a724b Merge pull request #2424 from MoerAI/fix/issue-2386-custom-agent-summaries
fix(agents): pass custom agent summaries instead of client object to createBuiltinAgents
2026-03-12 01:32:35 +09:00
YeonGyu-Kim
142f8ac7d1 Merge pull request #2422 from MoerAI/fix/issue-2393-model-fallback-defaults
fix(model-fallback): enable by default and add missing error patterns for usage limits
2026-03-12 01:32:34 +09:00
YeonGyu-Kim
f5be99f911 Merge pull request #2420 from MoerAI/fix/issue-2375-run-in-background-default
fix(delegate-task): default run_in_background to false when orchestrator intent is detected
2026-03-12 01:32:31 +09:00
YeonGyu-Kim
182fe746fc Merge pull request #2476 from code-yeongyu/fix/issue-2441-session-id-pending
fix: omit sessionId from metadata when not yet assigned
2026-03-12 01:32:30 +09:00
YeonGyu-Kim
f61ee25282 Merge pull request #2475 from code-yeongyu/fix/issue-2300-compaction-event-dispatch
fix: register preemptive-compaction event handler in dispatchToHooks
2026-03-12 01:32:29 +09:00
YeonGyu-Kim
08b411fc3b fix: use rest params in LSP sendNotification to avoid undefined serialization (#2185)
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:24:42 +09:00
YeonGyu-Kim
26091b2f48 fix: skip preemptive compaction when model context limit is unknown (#2356) 2026-03-12 01:24:16 +09:00
YeonGyu-Kim
afe3792ecf docs(config): correct background task default timeout description
Keep the background_task schema comment aligned with the runtime default so timeout guidance stays accurate.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:14:43 +09:00
YeonGyu-Kim
aaa54858a3 fix(background-agent): extend default no-progress stale timeout to 30 minutes
Give never-updated background tasks a longer default window and keep the default-threshold regression coverage aligned with that behavior.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:14:35 +09:00
YeonGyu-Kim
6d5175b9b0 fix(delegate-task): extend default sync poll timeout to 30 minutes
Keep synchronous subagent runs from timing out after 10 minutes when no explicit override is configured.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:14:26 +09:00
YeonGyu-Kim
f6125c5efa docs: refresh category model variant references
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:08:07 +09:00
YeonGyu-Kim
004f504e6c fix(agents): keep oracle available on first run without cache
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:07:57 +09:00
YeonGyu-Kim
f4f54c2b7f test(ralph-loop): remove volatile tool result timestamp
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:07:50 +09:00
YeonGyu-Kim
b9369d3c89 fix(config): preserve disabled arrays during partial parsing
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:07:43 +09:00
YeonGyu-Kim
88568398ac fix: sync root package.json version before binary compile (#2407) 2026-03-12 01:06:30 +09:00
YeonGyu-Kim
f2a7d227cb fix: omit sessionId from metadata when not yet assigned (#2441) 2026-03-12 01:02:12 +09:00
YeonGyu-Kim
39e799c596 docs: sync category model defaults
Update the public and internal docs to describe the new ultrabrain and unspecified-high defaults so the documented routing matches runtime behavior.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:00:41 +09:00
YeonGyu-Kim
7c29962014 fix(delegate-task): refresh built-in category defaults
Keep delegate-task category defaults in sync with the new routing policy so ultrabrain and unspecified-high resolve to the intended primary models.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:00:41 +09:00
YeonGyu-Kim
d2c2e8196b fix(shared): update category fallback priorities
Align ultrabrain with GPT-5.4 xhigh and move unspecified-high to Opus-first fallback order so category routing reflects the new model policy.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-12 01:00:41 +09:00
YeonGyu-Kim
4a67044cd6 fix: register preemptive-compaction event handler in dispatchToHooks (#2300) 2026-03-12 00:55:15 +09:00
acamq
c55603782c fix(auto-update): handle null JSON.parse and restore mocks on test failure 2026-03-11 08:08:30 -06:00
acamq
46a8ad279b Merge remote-tracking branch 'origin/dev' into fix/sync-package-json-to-opencode-intent 2026-03-11 08:04:16 -06:00
acamq
0764f0e563 fix(auto-update): sync cache package.json to opencode.json intent
When users switch from pinned version to tag in opencode.json (e.g.,
3.10.0 -> @latest), the cache package.json still contains the resolved
version. This causes bun install to reinstall the old version instead
of resolving the new tag.

This adds syncCachePackageJsonToIntent() which updates the cache
package.json to match user intent before running bun install. Uses
atomic writes (temp file + rename) with UUID-based temp names for
concurrent safety.

Critical changes:
- Treat all sync errors as abort conditions (file_not_found,
  plugin_not_in_deps, parse_error, write_error) to prevent corrupting
  a bad cache state further
- Remove dead code (unreachable revert branch for pinned versions)
- Add tests for all error paths and atomic write cleanup
2026-03-11 07:42:08 -06:00
Winona Bryan
d62a586be4 fix(momus): make keyTrigger specify file-path-only invocation requirement
The previous keyTrigger ('Work plan created → invoke Momus') was too
vague — Sisyphus would fire Momus on inline plans or todo lists,
causing Momus to REJECT because its input_extraction requires exactly
one .sisyphus/plans/*.md file path.

The updated trigger explicitly states:
- Momus should only be invoked when a plan file exists on disk
- The file path must be the sole prompt content
- Inline plans and todo lists should NOT trigger Momus
2026-03-11 02:13:21 -04:00
MoerAI
204322b120 fix(hashline-edit): remove array type from lines union to fix Gemini Vertex schema validation (#2408) 2026-03-10 17:18:14 +09:00
MoerAI
46c3bfcf1f fix(agents): pass custom agent summaries instead of client object to createBuiltinAgents (#2386) 2026-03-10 17:10:55 +09:00
MoerAI
059853554d fix(model-fallback): enable by default and add missing error patterns for usage limits (#2393) 2026-03-10 17:04:17 +09:00
MoerAI
49b7e695ce fix(delegate-task): default run_in_background to false when orchestrator intent is detected (#2375) 2026-03-10 16:57:47 +09:00
guazi04
309a3e48ec fix(tmux): handle serverUrl throw getter from upstream opencode refactor 2026-03-10 15:45:44 +08:00
205 changed files with 9895 additions and 5455 deletions

View File

@@ -82,6 +82,11 @@ jobs:
cd packages/${{ matrix.platform }}
jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json
- name: Set root package version
if: steps.check.outputs.skip != 'true'
run: |
jq --arg v "${{ inputs.version }}" '.version = $v' package.json > tmp.json && mv tmp.json package.json
- name: Pre-download baseline compile target
if: steps.check.outputs.skip != 'true' && endsWith(matrix.platform, '-baseline')
shell: bash

1
.gitignore vendored
View File

@@ -9,6 +9,7 @@ dist/
# Platform binaries (built, not committed)
packages/*/bin/oh-my-opencode
packages/*/bin/oh-my-opencode.exe
packages/*/bin/*.map
# IDE
.idea/

View File

@@ -1,105 +1,181 @@
---
name: github-triage
description: "Unified GitHub triage for issues AND PRs. 1 item = 1 background task (category: free). Issues: answer questions from codebase, analyze bugs. PRs: review bugfixes, merge safe ones. All parallel, all background. Triggers: 'triage', 'triage issues', 'triage PRs', 'github triage'."
description: "Read-only GitHub triage for issues AND PRs. 1 item = 1 background task (category: quick). Analyzes all open items and writes evidence-backed reports to /tmp/{datetime}/. Every claim requires a GitHub permalink as proof. NEVER takes any action on GitHub - no comments, no merges, no closes, no labels. Reports only. Triggers: 'triage', 'triage issues', 'triage PRs', 'github triage'."
---
# GitHub Triage — Unified Issue & PR Processor
# GitHub Triage - Read-Only Analyzer
<role>
You are a GitHub triage orchestrator. You fetch all open issues and PRs, classify each one, then spawn exactly 1 background subagent per item using `category="free"`. Each subagent analyzes its item, takes action (comment/close/merge/report), and records results via TaskCreate.
Read-only GitHub triage orchestrator. Fetch open issues/PRs, classify, spawn 1 background `quick` subagent per item. Each subagent analyzes and writes a report file. ZERO GitHub mutations.
</role>
---
## Architecture
## ARCHITECTURE
```
1 issue or PR = 1 TaskCreate = 1 task(category="free", run_in_background=true)
```
**1 ISSUE/PR = 1 TASKCREATE = 1 `quick` SUBAGENT (background). NO EXCEPTIONS.**
| Rule | Value |
|------|-------|
| Category for ALL subagents | `free` |
| Execution mode | `run_in_background=true` |
| Parallelism | ALL items launched simultaneously |
| Result tracking | Each subagent calls `TaskCreate` with its findings |
| Result collection | `background_output()` polling loop |
| Category | `quick` |
| Execution | `run_in_background=true` |
| Parallelism | ALL items simultaneously |
| Tracking | `TaskCreate` per item |
| Output | `/tmp/{YYYYMMDD-HHmmss}/issue-{N}.md` or `pr-{N}.md` |
---
## PHASE 1: FETCH ALL OPEN ITEMS
## Zero-Action Policy (ABSOLUTE)
<fetch>
Run these commands to collect data. Use the bundled script if available, otherwise fall back to gh CLI.
<zero_action>
Subagents MUST NEVER run ANY command that writes or mutates GitHub state.
**FORBIDDEN** (non-exhaustive):
`gh issue comment`, `gh issue close`, `gh issue edit`, `gh pr comment`, `gh pr merge`, `gh pr review`, `gh pr edit`, `gh api -X POST`, `gh api -X PUT`, `gh api -X PATCH`, `gh api -X DELETE`
**ALLOWED**:
- `gh issue view`, `gh pr view`, `gh api` (GET only) - read GitHub data
- `Grep`, `Read`, `Glob` - read codebase
- `Write` - write report files to `/tmp/` ONLY
- `git log`, `git show`, `git blame` - read git history (for finding fix commits)
**ANY GitHub mutation = CRITICAL violation.**
</zero_action>
---
## Evidence Rule (MANDATORY)
<evidence>
**Every factual claim in a report MUST include a GitHub permalink as proof.**
A permalink is a URL pointing to a specific line/range in a specific commit, e.g.:
`https://github.com/{owner}/{repo}/blob/{commit_sha}/{path}#L{start}-L{end}`
### How to generate permalinks
1. Find the relevant file and line(s) via Grep/Read.
2. Get the current commit SHA: `git rev-parse HEAD`
3. Construct: `https://github.com/{REPO}/blob/{SHA}/{filepath}#L{line}` (or `#L{start}-L{end}` for ranges)
### Rules
- **No permalink = no claim.** If you cannot back a statement with a permalink, state "No evidence found" instead.
- Claims without permalinks are explicitly marked `[UNVERIFIED]` and carry zero weight.
- Permalinks to `main`/`master`/`dev` branches are NOT acceptable - use commit SHAs only.
- For bug analysis: permalink to the problematic code. For fix verification: permalink to the fixing commit diff.
</evidence>
---
## Phase 0: Setup
```bash
REPO=$(gh repo view --json nameWithOwner -q .nameWithOwner)
# Issues: all open
gh issue list --repo $REPO --state open --limit 500 \
--json number,title,state,createdAt,updatedAt,labels,author,body,comments
# PRs: all open
gh pr list --repo $REPO --state open --limit 500 \
--json number,title,state,createdAt,updatedAt,labels,author,body,headRefName,baseRefName,isDraft,mergeable,reviewDecision,statusCheckRollup
REPORT_DIR="/tmp/$(date +%Y%m%d-%H%M%S)"
mkdir -p "$REPORT_DIR"
COMMIT_SHA=$(git rev-parse HEAD)
```
If either returns exactly 500 results, paginate using `--search "created:<LAST_CREATED_AT"` until exhausted.
Pass `REPO`, `REPORT_DIR`, and `COMMIT_SHA` to every subagent.
---
## Phase 1: Fetch All Open Items
<fetch>
Paginate if 500 results returned.
```bash
ISSUES=$(gh issue list --repo $REPO --state open --limit 500 \
--json number,title,state,createdAt,updatedAt,labels,author,body,comments)
ISSUE_LEN=$(echo "$ISSUES" | jq length)
if [ "$ISSUE_LEN" -eq 500 ]; then
LAST_DATE=$(echo "$ISSUES" | jq -r '.[-1].createdAt')
while true; do
PAGE=$(gh issue list --repo $REPO --state open --limit 500 \
--search "created:<$LAST_DATE" \
--json number,title,state,createdAt,updatedAt,labels,author,body,comments)
PAGE_LEN=$(echo "$PAGE" | jq length)
[ "$PAGE_LEN" -eq 0 ] && break
ISSUES=$(echo "[$ISSUES, $PAGE]" | jq -s 'add | unique_by(.number)')
[ "$PAGE_LEN" -lt 500 ] && break
LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt')
done
fi
PRS=$(gh pr list --repo $REPO --state open --limit 500 \
--json number,title,state,createdAt,updatedAt,labels,author,body,headRefName,baseRefName,isDraft,mergeable,reviewDecision,statusCheckRollup)
PR_LEN=$(echo "$PRS" | jq length)
if [ "$PR_LEN" -eq 500 ]; then
LAST_DATE=$(echo "$PRS" | jq -r '.[-1].createdAt')
while true; do
PAGE=$(gh pr list --repo $REPO --state open --limit 500 \
--search "created:<$LAST_DATE" \
--json number,title,state,createdAt,updatedAt,labels,author,body,headRefName,baseRefName,isDraft,mergeable,reviewDecision,statusCheckRollup)
PAGE_LEN=$(echo "$PAGE" | jq length)
[ "$PAGE_LEN" -eq 0 ] && break
PRS=$(echo "[$PRS, $PAGE]" | jq -s 'add | unique_by(.number)')
[ "$PAGE_LEN" -lt 500 ] && break
LAST_DATE=$(echo "$PAGE" | jq -r '.[-1].createdAt')
done
fi
```
</fetch>
---
## PHASE 2: CLASSIFY EACH ITEM
## Phase 2: Classify
For each item, determine its type based on title, labels, and body content:
<classification>
### Issues
| Type | Detection | Action Path |
|------|-----------|-------------|
| `ISSUE_QUESTION` | Title contains `[Question]`, `[Discussion]`, `?`, or body is asking "how to" / "why does" / "is it possible" | SUBAGENT_ISSUE_QUESTION |
| `ISSUE_BUG` | Title contains `[Bug]`, `Bug:`, body describes unexpected behavior, error messages, stack traces | SUBAGENT_ISSUE_BUG |
| `ISSUE_FEATURE` | Title contains `[Feature]`, `[RFE]`, `[Enhancement]`, `Feature Request`, `Proposal` | SUBAGENT_ISSUE_FEATURE |
| `ISSUE_OTHER` | Anything else | SUBAGENT_ISSUE_OTHER |
### PRs
| Type | Detection | Action Path |
|------|-----------|-------------|
| `PR_BUGFIX` | Title starts with `fix`, `fix:`, `fix(`, branch contains `fix/`, `bugfix/`, or labels include `bug` | SUBAGENT_PR_BUGFIX |
| `PR_OTHER` | Everything else (feat, refactor, docs, chore, etc.) | SUBAGENT_PR_OTHER |
</classification>
| Type | Detection |
|------|-----------|
| `ISSUE_QUESTION` | `[Question]`, `[Discussion]`, `?`, "how to" / "why does" / "is it possible" |
| `ISSUE_BUG` | `[Bug]`, `Bug:`, error messages, stack traces, unexpected behavior |
| `ISSUE_FEATURE` | `[Feature]`, `[RFE]`, `[Enhancement]`, `Feature Request`, `Proposal` |
| `ISSUE_OTHER` | Anything else |
| `PR_BUGFIX` | Title starts with `fix`, branch contains `fix/`/`bugfix/`, label `bug` |
| `PR_OTHER` | Everything else |
---
## PHASE 3: SPAWN 1 BACKGROUND TASK PER ITEM
For EVERY item, create a TaskCreate entry first, then spawn a background task.
## Phase 3: Spawn Subagents
```
For each item:
1. TaskCreate(subject="Triage: #{number} {title}")
2. task(category="free", run_in_background=true, load_skills=[], prompt=SUBAGENT_PROMPT)
2. task(category="quick", run_in_background=true, load_skills=[], prompt=SUBAGENT_PROMPT)
3. Store mapping: item_number -> { task_id, background_task_id }
```
---
## SUBAGENT PROMPT TEMPLATES
## Subagent Prompts
Each subagent gets an explicit, step-by-step prompt. Free models are limited — leave NOTHING implicit.
### Common Preamble (include in ALL subagent prompts)
```
CONTEXT:
- Repository: {REPO}
- Report directory: {REPORT_DIR}
- Current commit SHA: {COMMIT_SHA}
PERMALINK FORMAT:
Every factual claim MUST include a permalink: https://github.com/{REPO}/blob/{COMMIT_SHA}/{filepath}#L{start}-L{end}
No permalink = no claim. Mark unverifiable claims as [UNVERIFIED].
To get current SHA if needed: git rev-parse HEAD
ABSOLUTE RULES (violating ANY = critical failure):
- NEVER run gh issue comment, gh issue close, gh issue edit
- NEVER run gh pr comment, gh pr merge, gh pr review, gh pr edit
- NEVER run any gh command with -X POST, -X PUT, -X PATCH, -X DELETE
- NEVER run git checkout, git fetch, git pull, git switch, git worktree
- Your ONLY writable output: {REPORT_DIR}/{issue|pr}-{number}.md via the Write tool
```
---
### SUBAGENT_ISSUE_QUESTION
<issue_question_prompt>
### ISSUE_QUESTION
```
You are a GitHub issue responder for the repository {REPO}.
You are analyzing issue #{number} for {REPO}.
ITEM:
- Issue #{number}: {title}
@@ -107,52 +183,43 @@ ITEM:
- Body: {body}
- Comments: {comments_summary}
YOUR JOB:
1. Read the issue carefully. Understand what the user is asking.
2. Search the codebase to find the answer. Use Grep and Read tools.
- Search for relevant file names, function names, config keys mentioned in the issue.
- Read the files you find to understand how the feature works.
3. Decide: Can you answer this clearly and accurately from the codebase?
TASK:
1. Understand the question.
2. Search the codebase (Grep, Read) for the answer.
3. For every finding, construct a permalink: https://github.com/{REPO}/blob/{COMMIT_SHA}/{path}#L{N}
4. Write report to {REPORT_DIR}/issue-{number}.md
IF YES (you found a clear, accurate answer):
Step A: Write a helpful comment. The comment MUST:
- Start with exactly: [sisyphus-bot]
- Be warm, friendly, and thorough
- Include specific file paths and code references
- Include code snippets or config examples if helpful
- End with "Feel free to reopen if this doesn't resolve your question!"
Step B: Post the comment:
gh issue comment {number} --repo {REPO} --body "YOUR_COMMENT"
Step C: Close the issue:
gh issue close {number} --repo {REPO}
Step D: Report back with this EXACT format:
ACTION: ANSWERED_AND_CLOSED
COMMENT_POSTED: yes
SUMMARY: [1-2 sentence summary of your answer]
REPORT FORMAT (write this as the file content):
IF NO (not enough info in codebase, or answer is uncertain):
Report back with:
ACTION: NEEDS_MANUAL_ATTENTION
REASON: [why you couldn't answer — be specific]
PARTIAL_FINDINGS: [what you DID find, if anything]
# Issue #{number}: {title}
**Type:** Question | **Author:** {author} | **Created:** {createdAt}
RULES:
- NEVER guess. Only answer if the codebase clearly supports your answer.
- NEVER make up file paths or function names.
- The [sisyphus-bot] prefix is MANDATORY on every comment you post.
- Be genuinely helpful — imagine you're a senior maintainer who cares about the community.
## Question
[1-2 sentence summary]
## Findings
[Each finding with permalink proof. Example:]
- The config is parsed in [`src/config/loader.ts#L42-L58`](https://github.com/{REPO}/blob/{SHA}/src/config/loader.ts#L42-L58)
## Suggested Answer
[Draft answer with code references and permalinks]
## Confidence: [HIGH | MEDIUM | LOW]
[Reason. If LOW: what's missing]
## Recommended Action
[What maintainer should do]
---
REMEMBER: No permalink = no claim. Every code reference needs a permalink.
```
</issue_question_prompt>
---
### SUBAGENT_ISSUE_BUG
<issue_bug_prompt>
### ISSUE_BUG
```
You are a GitHub bug analyzer for the repository {REPO}.
You are analyzing bug report #{number} for {REPO}.
ITEM:
- Issue #{number}: {title}
@@ -160,74 +227,75 @@ ITEM:
- Body: {body}
- Comments: {comments_summary}
YOUR JOB:
1. Read the issue carefully. Understand the reported bug:
- What behavior does the user expect?
- What behavior do they actually see?
- What steps reproduce it?
2. Search the codebase for the relevant code. Use Grep and Read tools.
- Find the files/functions mentioned or related to the bug.
- Read them carefully and trace the logic.
3. Determine one of three outcomes:
TASK:
1. Understand: expected behavior, actual behavior, reproduction steps.
2. Search the codebase for relevant code. Trace the logic.
3. Determine verdict: CONFIRMED_BUG, NOT_A_BUG, ALREADY_FIXED, or UNCLEAR.
4. For ALREADY_FIXED: find the fixing commit using git log/git blame. Include the commit SHA and what changed.
5. For every finding, construct a permalink.
6. Write report to {REPORT_DIR}/issue-{number}.md
OUTCOME A — CONFIRMED BUG (you found the problematic code):
Step 1: Post a comment on the issue. The comment MUST:
- Start with exactly: [sisyphus-bot]
- Apologize sincerely for the inconvenience ("We're sorry you ran into this issue.")
- Briefly acknowledge what the bug is
- Say "We've identified the root cause and will work on a fix."
- Do NOT reveal internal implementation details unnecessarily
Step 2: Post the comment:
gh issue comment {number} --repo {REPO} --body "YOUR_COMMENT"
Step 3: Report back with:
ACTION: CONFIRMED_BUG
ROOT_CAUSE: [which file, which function, what goes wrong]
FIX_APPROACH: [how to fix it — be specific: "In {file}, line ~{N}, change X to Y because Z"]
SEVERITY: [LOW|MEDIUM|HIGH|CRITICAL]
AFFECTED_FILES: [list of files that need changes]
FINDING "ALREADY_FIXED" COMMITS:
- Use `git log --all --oneline -- {file}` to find recent changes to relevant files
- Use `git log --all --grep="fix" --grep="{keyword}" --all-match --oneline` to search commit messages
- Use `git blame {file}` to find who last changed the relevant lines
- Use `git show {commit_sha}` to verify the fix
- Construct commit permalink: https://github.com/{REPO}/commit/{fix_commit_sha}
OUTCOME B — NOT A BUG (user misunderstanding, provably correct behavior):
ONLY choose this if you can RIGOROUSLY PROVE the behavior is correct.
Step 1: Post a comment. The comment MUST:
- Start with exactly: [sisyphus-bot]
- Be kind and empathetic — never condescending
- Explain clearly WHY the current behavior is correct
- Include specific code references or documentation links
- Offer a workaround or alternative if possible
- End with "Please let us know if you have further questions!"
Step 2: Post the comment:
gh issue comment {number} --repo {REPO} --body "YOUR_COMMENT"
Step 3: DO NOT close the issue. Let the user or maintainer decide.
Step 4: Report back with:
ACTION: NOT_A_BUG
EXPLANATION: [why this is correct behavior]
PROOF: [specific code reference proving it]
REPORT FORMAT (write this as the file content):
OUTCOME C — UNCLEAR (can't determine from codebase alone):
Report back with:
ACTION: NEEDS_INVESTIGATION
FINDINGS: [what you found so far]
BLOCKERS: [what's preventing you from determining the cause]
SUGGESTED_NEXT_STEPS: [what a human should look at]
# Issue #{number}: {title}
**Type:** Bug Report | **Author:** {author} | **Created:** {createdAt}
RULES:
- NEVER guess at root causes. Only report CONFIRMED_BUG if you found the exact problematic code.
- NEVER close bug issues yourself. Only comment.
- For OUTCOME B (not a bug): you MUST have rigorous proof. If there's ANY doubt, choose OUTCOME C instead.
- The [sisyphus-bot] prefix is MANDATORY on every comment.
- When apologizing, be genuine. The user took time to report this.
## Bug Summary
**Expected:** [what user expects]
**Actual:** [what actually happens]
**Reproduction:** [steps if provided]
## Verdict: [CONFIRMED_BUG | NOT_A_BUG | ALREADY_FIXED | UNCLEAR]
## Analysis
### Evidence
[Each piece of evidence with permalink. No permalink = mark [UNVERIFIED]]
### Root Cause (if CONFIRMED_BUG)
[Which file, which function, what goes wrong]
- Problematic code: [`{path}#L{N}`](permalink)
### Why Not A Bug (if NOT_A_BUG)
[Rigorous proof with permalinks that current behavior is correct]
### Fix Details (if ALREADY_FIXED)
- **Fixed in commit:** [`{short_sha}`](https://github.com/{REPO}/commit/{full_sha})
- **Fixed date:** {date}
- **What changed:** [description with diff permalink]
- **Fixed by:** {author}
### Blockers (if UNCLEAR)
[What prevents determination, what to investigate next]
## Severity: [LOW | MEDIUM | HIGH | CRITICAL]
## Affected Files
[List with permalinks]
## Suggested Fix (if CONFIRMED_BUG)
[Specific approach: "In {file}#L{N}, change X to Y because Z"]
## Recommended Action
[What maintainer should do]
---
CRITICAL: Claims without permalinks are worthless. If you cannot find evidence, say so explicitly rather than making unverified claims.
```
</issue_bug_prompt>
---
### SUBAGENT_ISSUE_FEATURE
<issue_feature_prompt>
### ISSUE_FEATURE
```
You are a GitHub feature request analyzer for the repository {REPO}.
You are analyzing feature request #{number} for {REPO}.
ITEM:
- Issue #{number}: {title}
@@ -235,38 +303,41 @@ ITEM:
- Body: {body}
- Comments: {comments_summary}
YOUR JOB:
1. Read the feature request.
2. Search the codebase to check if this feature already exists (partially or fully).
3. Assess feasibility and alignment with the project.
TASK:
1. Understand the request.
2. Search codebase for existing (partial/full) implementations.
3. Assess feasibility.
4. Write report to {REPORT_DIR}/issue-{number}.md
Report back with:
ACTION: FEATURE_ASSESSED
ALREADY_EXISTS: [YES_FULLY | YES_PARTIALLY | NO]
IF_EXISTS: [where in the codebase, how to use it]
FEASIBILITY: [EASY | MODERATE | HARD | ARCHITECTURAL_CHANGE]
RELEVANT_FILES: [files that would need changes]
NOTES: [any observations about implementation approach]
REPORT FORMAT (write this as the file content):
If the feature already fully exists:
Post a comment (prefix: [sisyphus-bot]) explaining how to use the existing feature with examples.
gh issue comment {number} --repo {REPO} --body "YOUR_COMMENT"
# Issue #{number}: {title}
**Type:** Feature Request | **Author:** {author} | **Created:** {createdAt}
RULES:
- Do NOT close feature requests.
- The [sisyphus-bot] prefix is MANDATORY on any comment.
## Request Summary
[What the user wants]
## Existing Implementation: [YES_FULLY | YES_PARTIALLY | NO]
[If exists: where, with permalinks to the implementation]
## Feasibility: [EASY | MODERATE | HARD | ARCHITECTURAL_CHANGE]
## Relevant Files
[With permalinks]
## Implementation Notes
[Approach, pitfalls, dependencies]
## Recommended Action
[What maintainer should do]
```
</issue_feature_prompt>
---
### SUBAGENT_ISSUE_OTHER
<issue_other_prompt>
### ISSUE_OTHER
```
You are a GitHub issue analyzer for the repository {REPO}.
You are analyzing issue #{number} for {REPO}.
ITEM:
- Issue #{number}: {title}
@@ -274,209 +345,195 @@ ITEM:
- Body: {body}
- Comments: {comments_summary}
YOUR JOB:
Quickly assess this issue and report:
ACTION: ASSESSED
TYPE_GUESS: [QUESTION | BUG | FEATURE | DISCUSSION | META | STALE]
SUMMARY: [1-2 sentence summary]
NEEDS_ATTENTION: [YES | NO]
SUGGESTED_LABEL: [if any]
TASK: Assess and write report to {REPORT_DIR}/issue-{number}.md
Do NOT post comments. Do NOT close. Just analyze and report.
REPORT FORMAT (write this as the file content):
# Issue #{number}: {title}
**Type:** [QUESTION | BUG | FEATURE | DISCUSSION | META | STALE]
**Author:** {author} | **Created:** {createdAt}
## Summary
[1-2 sentences]
## Needs Attention: [YES | NO]
## Suggested Label: [if any]
## Recommended Action: [what maintainer should do]
```
</issue_other_prompt>
---
### SUBAGENT_PR_BUGFIX
<pr_bugfix_prompt>
### PR_BUGFIX
```
You are a GitHub PR reviewer for the repository {REPO}.
You are reviewing PR #{number} for {REPO}.
ITEM:
- PR #{number}: {title}
- Author: {author}
- Base: {baseRefName}
- Head: {headRefName}
- Draft: {isDraft}
- Mergeable: {mergeable}
- Review Decision: {reviewDecision}
- CI Status: {statusCheckRollup_summary}
- Base: {baseRefName} <- Head: {headRefName}
- Draft: {isDraft} | Mergeable: {mergeable}
- Review: {reviewDecision} | CI: {statusCheckRollup_summary}
- Body: {body}
YOUR JOB:
1. Fetch PR details (DO NOT checkout the branch — read-only analysis):
gh pr view {number} --repo {REPO} --json files,reviews,comments,statusCheckRollup,reviewDecision
2. Read the changed files list. For each changed file, use `gh api repos/{REPO}/pulls/{number}/files` to see the diff.
3. Search the codebase to understand what the PR is fixing and whether the fix is correct.
4. Evaluate merge safety:
TASK:
1. Fetch PR details (READ-ONLY): gh pr view {number} --repo {REPO} --json files,reviews,comments,statusCheckRollup,reviewDecision
2. Read diff: gh api repos/{REPO}/pulls/{number}/files
3. Search codebase to verify fix correctness.
4. Write report to {REPORT_DIR}/pr-{number}.md
MERGE CONDITIONS (ALL must be true for auto-merge):
a. CI status checks: ALL passing (no failures, no pending)
b. Review decision: APPROVED
c. The fix is clearly correct — addresses an obvious, unambiguous bug
d. No risky side effects (no architectural changes, no breaking changes)
e. Not a draft PR
f. Mergeable state is clean (no conflicts)
REPORT FORMAT (write this as the file content):
IF ALL MERGE CONDITIONS MET:
Step 1: Merge the PR:
gh pr merge {number} --repo {REPO} --squash --auto
Step 2: Report back with:
ACTION: MERGED
FIX_SUMMARY: [what bug was fixed and how]
FILES_CHANGED: [list of files]
RISK: NONE
# PR #{number}: {title}
**Type:** Bugfix | **Author:** {author}
**Base:** {baseRefName} <- {headRefName} | **Draft:** {isDraft}
IF ANY CONDITION NOT MET:
Report back with:
ACTION: NEEDS_HUMAN_DECISION
FIX_SUMMARY: [what the PR does]
WHAT_IT_FIXES: [the bug or issue it addresses]
CI_STATUS: [PASS | FAIL | PENDING — list any failures]
REVIEW_STATUS: [APPROVED | CHANGES_REQUESTED | PENDING | NONE]
MISSING: [what's preventing auto-merge — be specific]
RISK_ASSESSMENT: [what could go wrong]
AMBIGUOUS_PARTS: [anything that needs human judgment]
RECOMMENDED_ACTION: [what the maintainer should do]
## Fix Summary
[What bug, how fixed - with permalinks to changed code]
ABSOLUTE RULES:
- NEVER run `git checkout`, `git fetch`, `git pull`, or `git switch`. READ-ONLY via gh CLI and API.
- NEVER checkout the PR branch. NEVER. Use `gh api` and `gh pr view` only.
- Only merge if you are 100% certain ALL conditions are met. When in doubt, report instead.
- The [sisyphus-bot] prefix is MANDATORY on any comment you post.
## Code Review
### Correctness
[Is fix correct? Root cause addressed? Evidence with permalinks]
### Side Effects
[Risky changes, breaking changes - with permalinks if any]
### Code Quality
[Style, patterns, test coverage]
## Merge Readiness
| Check | Status |
|-------|--------|
| CI | [PASS / FAIL / PENDING] |
| Review | [APPROVED / CHANGES_REQUESTED / PENDING / NONE] |
| Mergeable | [YES / NO / CONFLICTED] |
| Draft | [YES / NO] |
| Correctness | [VERIFIED / CONCERNS / UNCLEAR] |
| Risk | [NONE / LOW / MEDIUM / HIGH] |
## Files Changed
[List with brief descriptions]
## Recommended Action: [MERGE | REQUEST_CHANGES | NEEDS_REVIEW | WAIT]
[Reasoning with evidence]
---
NEVER merge. NEVER comment. NEVER review. Write to file ONLY.
```
</pr_bugfix_prompt>
---
### SUBAGENT_PR_OTHER
<pr_other_prompt>
### PR_OTHER
```
You are a GitHub PR reviewer for the repository {REPO}.
You are reviewing PR #{number} for {REPO}.
ITEM:
- PR #{number}: {title}
- Author: {author}
- Base: {baseRefName}
- Head: {headRefName}
- Draft: {isDraft}
- Mergeable: {mergeable}
- Review Decision: {reviewDecision}
- CI Status: {statusCheckRollup_summary}
- Base: {baseRefName} <- Head: {headRefName}
- Draft: {isDraft} | Mergeable: {mergeable}
- Review: {reviewDecision} | CI: {statusCheckRollup_summary}
- Body: {body}
YOUR JOB:
1. Fetch PR details (READ-ONLY — no checkout):
gh pr view {number} --repo {REPO} --json files,reviews,comments,statusCheckRollup,reviewDecision
2. Read the changed files via `gh api repos/{REPO}/pulls/{number}/files`.
3. Assess the PR and report:
TASK:
1. Fetch PR details (READ-ONLY): gh pr view {number} --repo {REPO} --json files,reviews,comments,statusCheckRollup,reviewDecision
2. Read diff: gh api repos/{REPO}/pulls/{number}/files
3. Write report to {REPORT_DIR}/pr-{number}.md
ACTION: PR_ASSESSED
TYPE: [FEATURE | REFACTOR | DOCS | CHORE | TEST | OTHER]
SUMMARY: [what this PR does in 2-3 sentences]
CI_STATUS: [PASS | FAIL | PENDING]
REVIEW_STATUS: [APPROVED | CHANGES_REQUESTED | PENDING | NONE]
FILES_CHANGED: [count and key files]
RISK_LEVEL: [LOW | MEDIUM | HIGH]
ALIGNMENT: [does this fit the project direction? YES | NO | UNCLEAR]
BLOCKERS: [anything preventing merge]
RECOMMENDED_ACTION: [MERGE | REQUEST_CHANGES | NEEDS_REVIEW | CLOSE | WAIT]
NOTES: [any observations for the maintainer]
REPORT FORMAT (write this as the file content):
ABSOLUTE RULES:
- NEVER run `git checkout`, `git fetch`, `git pull`, or `git switch`. READ-ONLY.
- NEVER checkout the PR branch. Use `gh api` and `gh pr view` only.
- Do NOT merge non-bugfix PRs automatically. Report only.
# PR #{number}: {title}
**Type:** [FEATURE | REFACTOR | DOCS | CHORE | TEST | OTHER]
**Author:** {author}
**Base:** {baseRefName} <- {headRefName} | **Draft:** {isDraft}
## Summary
[2-3 sentences with permalinks to key changes]
## Status
| Check | Status |
|-------|--------|
| CI | [PASS / FAIL / PENDING] |
| Review | [APPROVED / CHANGES_REQUESTED / PENDING / NONE] |
| Mergeable | [YES / NO / CONFLICTED] |
| Risk | [LOW / MEDIUM / HIGH] |
| Alignment | [YES / NO / UNCLEAR] |
## Files Changed
[Count and key files]
## Blockers
[If any]
## Recommended Action: [MERGE | REQUEST_CHANGES | NEEDS_REVIEW | CLOSE | WAIT]
[Reasoning]
---
NEVER merge. NEVER comment. NEVER review. Write to file ONLY.
```
</pr_other_prompt>
---
## Phase 4: Collect & Update
Poll `background_output()` per task. As each completes:
1. Parse report.
2. `TaskUpdate(id=task_id, status="completed", description=REPORT_SUMMARY)`
3. Stream to user immediately.
---
## PHASE 4: COLLECT RESULTS & UPDATE TASKS
## Phase 5: Final Summary
<collection>
Poll `background_output()` for each spawned task. As each completes:
1. Parse the subagent's report.
2. Update the corresponding TaskCreate entry:
- `TaskUpdate(id=task_id, status="completed", description=FULL_REPORT_TEXT)`
3. Stream the result to the user immediately — do not wait for all to finish.
Track counters:
- issues_answered (commented + closed)
- bugs_confirmed
- bugs_not_a_bug
- prs_merged
- prs_needs_decision
- features_assessed
</collection>
---
## PHASE 5: FINAL SUMMARY
After all background tasks complete, produce a summary:
Write to `{REPORT_DIR}/SUMMARY.md` AND display to user:
```markdown
# GitHub Triage Report {REPO}
# GitHub Triage Report - {REPO}
**Date:** {date}
**Date:** {date} | **Commit:** {COMMIT_SHA}
**Items Processed:** {total}
**Report Directory:** {REPORT_DIR}
## Issues ({issue_count})
| Action | Count |
|--------|-------|
| Answered & Closed | {issues_answered} |
| Bug Confirmed | {bugs_confirmed} |
| Not A Bug (explained) | {bugs_not_a_bug} |
| Feature Assessed | {features_assessed} |
| Needs Manual Attention | {needs_manual} |
| Category | Count |
|----------|-------|
| Bug Confirmed | {n} |
| Bug Already Fixed | {n} |
| Not A Bug | {n} |
| Needs Investigation | {n} |
| Question Analyzed | {n} |
| Feature Assessed | {n} |
| Other | {n} |
## PRs ({pr_count})
| Action | Count |
|--------|-------|
| Auto-Merged (safe bugfix) | {prs_merged} |
| Needs Human Decision | {prs_needs_decision} |
| Assessed (non-bugfix) | {prs_assessed} |
| Category | Count |
|----------|-------|
| Bugfix Reviewed | {n} |
| Other PR Reviewed | {n} |
## Items Requiring Your Attention
[List each item that needs human decision with its report summary]
## Items Requiring Attention
[Each item: number, title, verdict, 1-line summary, link to report file]
## Report Files
[All generated files with paths]
```
---
## ANTI-PATTERNS
## Anti-Patterns
| Violation | Severity |
|-----------|----------|
| Using any category other than `free` | CRITICAL |
| ANY GitHub mutation (comment/close/merge/review/label/edit) | **CRITICAL** |
| Claim without permalink | **CRITICAL** |
| Using category other than `quick` | CRITICAL |
| Batching multiple items into one task | CRITICAL |
| Using `run_in_background=false` | CRITICAL |
| Subagent running `git checkout` on a PR branch | CRITICAL |
| Posting comment without `[sisyphus-bot]` prefix | CRITICAL |
| Merging a PR that doesn't meet ALL 6 conditions | CRITICAL |
| Closing a bug issue (only comment, never close bugs) | HIGH |
| Guessing at answers without codebase evidence | HIGH |
| Not recording results via TaskCreate/TaskUpdate | HIGH |
---
## QUICK START
When invoked:
1. `TaskCreate` for the overall triage job
2. Fetch all open issues + PRs via gh CLI (paginate if needed)
3. Classify each item (ISSUE_QUESTION, ISSUE_BUG, ISSUE_FEATURE, PR_BUGFIX, etc.)
4. For EACH item: `TaskCreate` + `task(category="free", run_in_background=true, load_skills=[], prompt=...)`
5. Poll `background_output()` — stream results as they arrive
6. `TaskUpdate` each task with the subagent's findings
7. Produce final summary report
| `run_in_background=false` | CRITICAL |
| `git checkout` on PR branch | CRITICAL |
| Guessing without codebase evidence | HIGH |
| Not writing report to `{REPORT_DIR}` | HIGH |
| Using branch name instead of commit SHA in permalink | HIGH |

View File

@@ -181,7 +181,7 @@ When Sisyphus delegates to a subagent, it doesn't pick a model. It picks a **cat
| `quick` | Single-file changes, typos |
| `ultrabrain` | Hard logic, architecture decisions |
Agent says what kind of work. Harness picks the right model. You touch nothing.
Agent says what kind of work. Harness picks the right model. `ultrabrain` now routes to GPT-5.4 xhigh by default. You touch nothing.
### Claude Code Compatibility

View File

@@ -64,8 +64,8 @@ These agents have Claude-optimized prompts — long, detailed, mechanics-driven.
| Agent | Role | Fallback Chain | Notes |
| ------------ | ----------------- | -------------------------------------- | ------------------------------------------------------------------------------------------------- |
| **Sisyphus** | Main orchestrator | Claude Opus → K2P5 → Kimi K2.5 → GPT-5.4 → GLM 5 → Big Pickle | Claude-family first. GPT-5.4 has dedicated prompt support. Kimi/GLM as intermediate fallbacks. |
| **Metis** | Plan gap analyzer | Claude Opus → GPT-5.4 → Gemini 3.1 Pro | Claude preferred, GPT acceptable fallback. |
| **Sisyphus** | Main orchestrator | Claude Opus → opencode-go/kimi-k2.5 → K2P5 → GPT-5.4 → GLM-5 → Big Pickle | Claude-family first. GPT-5.4 has dedicated prompt support. Kimi/GLM as intermediate fallbacks. |
| **Metis** | Plan gap analyzer | Claude Opus → opencode-go/glm-5 → K2P5 | Claude preferred. Uses opencode-go for reliable GLM-5 access. |
### Dual-Prompt Agents → Claude preferred, GPT supported
@@ -73,8 +73,8 @@ These agents ship separate prompts for Claude and GPT families. They auto-detect
| Agent | Role | Fallback Chain | Notes |
| -------------- | ----------------- | -------------------------------------- | -------------------------------------------------------------------- |
| **Prometheus** | Strategic planner | Claude Opus → GPT-5.4 → Gemini 3.1 Pro | Interview-mode planning. GPT prompt is compact and principle-driven. |
| **Atlas** | Todo orchestrator | Claude Sonnet 4.6 → GPT-5.4 | Claude first, GPT-5.4 as the current fallback path. |
| **Prometheus** | Strategic planner | Claude Opus → GPT-5.4 → opencode-go/glm-5 → Gemini 3.1 Pro | Interview-mode planning. GPT prompt is compact and principle-driven. |
| **Atlas** | Todo orchestrator | Claude Sonnet → opencode-go/kimi-k2.5 | Claude first, opencode-go as the current fallback path. |
### Deep Specialists → GPT
@@ -92,9 +92,9 @@ These agents do grep, search, and retrieval. They intentionally use the fastest,
| Agent | Role | Fallback Chain | Notes |
| --------------------- | ------------------ | ---------------------------------------------- | ----------------------------------------------------- |
| **Explore** | Fast codebase grep | Grok Code Fast → MiniMax → Haiku → GPT-5-Nano | Speed is everything. Fire 10 in parallel. |
| **Librarian** | Docs/code search | Gemini Flash → MiniMax → Big Pickle | Doc retrieval doesn't need deep reasoning. |
| **Multimodal Looker** | Vision/screenshots | GPT-5.3 Codex → K2P5 → Gemini Flash → GLM-4.6v | Uses the first available multimodal-capable fallback. |
| **Explore** | Fast codebase grep | Grok Code Fast → opencode-go/minimax-m2.5 → MiniMax Free → Haiku → GPT-5-Nano | Speed is everything. Fire 10 in parallel. |
| **Librarian** | Docs/code search | opencode-go/minimax-m2.5 → MiniMax Free → Haiku → GPT-5-Nano | Doc retrieval doesn't need deep reasoning. |
| **Multimodal Looker** | Vision/screenshots | GPT-5.4 → opencode-go/kimi-k2.5 → GLM-4.6v → GPT-5-Nano | Uses the first available multimodal-capable fallback. |
---
@@ -132,6 +132,26 @@ Principle-driven, explicit reasoning, deep technical capability. Best for agents
| **Grok Code Fast 1** | Blazing fast code grep. Default for Explore agent. |
| **MiniMax M2.5** | Fast and smart. Good for utility tasks and search/retrieval. |
### OpenCode Go
A premium subscription tier ($10/month) that provides reliable access to Chinese frontier models through OpenCode's infrastructure.
**Available Models:**
| Model | Use Case |
| ------------------------ | --------------------------------------------------------------------- |
| **opencode-go/kimi-k2.5** | Vision-capable, Claude-like reasoning. Used by Sisyphus, Atlas, Sisyphus-Junior, Multimodal Looker. |
| **opencode-go/glm-5** | Text-only orchestration model. Used by Oracle, Prometheus, Metis, Momus. |
| **opencode-go/minimax-m2.5** | Ultra-cheap, fast responses. Used by Librarian, Explore for utility work. |
**When It Gets Used:**
OpenCode Go models appear in fallback chains as intermediate options. They bridge the gap between premium Claude access and free-tier alternatives. The system tries OpenCode Go models before falling back to free tiers (MiniMax Free, Big Pickle) or GPT alternatives.
**Go-Only Scenarios:**
Some model identifiers like `k2p5` (paid Kimi K2.5) and `glm-5` may only be available through OpenCode Go subscription in certain regions. When configured with these short identifiers, the system resolves them through the opencode-go provider first.
### About Free-Tier Fallbacks
You may see model names like `kimi-k2.5-free`, `minimax-m2.5-free`, or `big-pickle` (GLM 4.6) in the source code or logs. These are free-tier versions of the same model families, served through the OpenCode Zen provider. They exist as lower-priority entries in fallback chains.
@@ -147,11 +167,11 @@ When agents delegate work, they don't pick a model name — they pick a **catego
| Category | When Used | Fallback Chain |
| -------------------- | -------------------------- | -------------------------------------------- |
| `visual-engineering` | Frontend, UI, CSS, design | Gemini 3.1 Pro → GLM 5 → Claude Opus |
| `ultrabrain` | Maximum reasoning needed | GPT-5.3 Codex → Gemini 3.1 Pro → Claude Opus |
| `ultrabrain` | Maximum reasoning needed | GPT-5.4 → Gemini 3.1 Pro → Claude Opus |
| `deep` | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3.1 Pro |
| `artistry` | Creative, novel approaches | Gemini 3.1 Pro → Claude Opus → GPT-5.4 |
| `quick` | Simple, fast tasks | Claude Haiku → Gemini Flash → GPT-5-Nano |
| `unspecified-high` | General complex work | GPT-5.4 → Claude Opus → GLM 5 → K2P5 |
| `unspecified-high` | General complex work | Claude Opus → GPT-5.4 (high) → GLM 5 → K2P5 |
| `unspecified-low` | General standard work | Claude Sonnet → GPT-5.3 Codex → Gemini Flash |
| `writing` | Text, docs, prose | Gemini Flash → Claude Sonnet |
@@ -190,7 +210,7 @@ See the [Orchestration System Guide](./orchestration.md) for how agents dispatch
"categories": {
"quick": { "model": "opencode/gpt-5-nano" },
"unspecified-low": { "model": "anthropic/claude-sonnet-4-6" },
"unspecified-high": { "model": "openai/gpt-5.4-high" },
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
"visual-engineering": {
"model": "google/gemini-3.1-pro",
"variant": "high",

View File

@@ -68,7 +68,12 @@ Ask the user these questions to determine CLI options:
- If **yes**`--zai-coding-plan=yes`
- If **no**`--zai-coding-plan=no` (default)
**Provider Priority**: Native (anthropic/, openai/, google/) > GitHub Copilot > OpenCode Zen > Z.ai Coding Plan
7. **Do you have an OpenCode Go subscription?**
- OpenCode Go is a $10/month subscription providing access to GLM-5, Kimi K2.5, and MiniMax M2.5 models
- If **yes**`--opencode-go=yes`
- If **no**`--opencode-go=no` (default)
**Provider Priority**: Native (anthropic/, openai/, google/) > Kimi for Coding > GitHub Copilot > OpenCode Go > OpenCode Zen > Z.ai Coding Plan
MUST STRONGLY WARNING, WHEN USER SAID THEY DON'T HAVE CLAUDE SUBSCRIPTION, SISYPHUS AGENT MIGHT NOT WORK IDEALLY.
@@ -91,7 +96,7 @@ Spawn a subagent to handle installation and report back - to save context.
Based on user's answers, run the CLI installer with appropriate flags:
```bash
bunx oh-my-opencode install --no-tui --claude=<yes|no|max20> --gemini=<yes|no> --copilot=<yes|no> [--openai=<yes|no>] [--opencode-zen=<yes|no>] [--zai-coding-plan=<yes|no>]
bunx oh-my-opencode install --no-tui --claude=<yes|no|max20> --gemini=<yes|no> --copilot=<yes|no> [--openai=<yes|no>] [--opencode-go=<yes|no>] [--opencode-zen=<yes|no>] [--zai-coding-plan=<yes|no>]
```
**Examples:**
@@ -102,6 +107,7 @@ bunx oh-my-opencode install --no-tui --claude=<yes|no|max20> --gemini=<yes|no> -
- User has only GitHub Copilot: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=yes`
- User has Z.ai for Librarian: `bunx oh-my-opencode install --no-tui --claude=yes --gemini=no --copilot=no --zai-coding-plan=yes`
- User has only OpenCode Zen: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=no --opencode-zen=yes`
- User has OpenCode Go only: `bunx oh-my-opencode install --no-tui --claude=no --openai=no --gemini=no --copilot=no --opencode-go=yes`
- User has no subscriptions: `bunx oh-my-opencode install --no-tui --claude=no --gemini=no --copilot=no`
The CLI will:

View File

@@ -296,12 +296,12 @@ task({ category: "quick", prompt: "..." }); // "Just get it done fast"
| Category | Model | When to Use |
| -------------------- | ---------------------- | ----------------------------------------------------------- |
| `visual-engineering` | Gemini 3.1 Pro | Frontend, UI/UX, design, styling, animation |
| `ultrabrain` | GPT-5.3 Codex (xhigh) | Deep logical reasoning, complex architecture decisions |
| `ultrabrain` | GPT-5.4 (xhigh) | Deep logical reasoning, complex architecture decisions |
| `artistry` | Gemini 3.1 Pro (high) | Highly creative or artistic tasks, novel ideas |
| `quick` | Claude Haiku 4.5 | Trivial tasks - single file changes, typo fixes |
| `deep` | GPT-5.3 Codex (medium) | Goal-oriented autonomous problem-solving, thorough research |
| `unspecified-low` | Claude Sonnet 4.6 | Tasks that don't fit other categories, low effort |
| `unspecified-high` | GPT-5.4 (high) | Tasks that don't fit other categories, high effort |
| `unspecified-high` | Claude Opus 4.6 (max) | Tasks that don't fit other categories, high effort |
| `writing` | Gemini 3 Flash | Documentation, prose, technical writing |
### Skills: Domain-Specific Instructions

View File

@@ -101,7 +101,7 @@ Use Hephaestus when you need deep architectural reasoning, complex debugging acr
- **Multi-model orchestration.** Pure Codex is single-model. OmO routes different tasks to different models automatically. GPT for deep reasoning. Gemini for frontend. Haiku for speed. The right brain for the right job.
- **Background agents.** Fire 5+ agents in parallel. Something Codex simply cannot do. While one agent writes code, another researches patterns, another checks documentation. Like a real dev team.
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.3 Codex. `quick` gets Haiku. No manual juggling.
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.4. `quick` gets Haiku. No manual juggling.
- **Accumulated wisdom.** Subagents learn from previous results. Conventions discovered in task 1 are passed to task 5. Mistakes made early aren't repeated. The system gets smarter as it works.
### Prometheus: The Strategic Planner
@@ -193,13 +193,13 @@ You can override specific agents or categories in your config:
},
// General high-effort work
"unspecified-high": { "model": "openai/gpt-5.4", "variant": "high" },
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
// Quick tasks: use the cheapest models
"quick": { "model": "anthropic/claude-haiku-4-5" },
// Deep reasoning: GPT-5.3-codex
"ultrabrain": { "model": "openai/gpt-5.3-codex", "variant": "xhigh" },
// Deep reasoning: GPT-5.4
"ultrabrain": { "model": "openai/gpt-5.4", "variant": "xhigh" },
},
}
```

View File

@@ -100,7 +100,7 @@ Here's a practical starting configuration:
"unspecified-low": { "model": "anthropic/claude-sonnet-4-6" },
// unspecified-high — complex work
"unspecified-high": { "model": "openai/gpt-5.4-high" },
"unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
// writing — docs/prose
"writing": { "model": "google/gemini-3-flash" },
@@ -225,12 +225,12 @@ Domain-specific model delegation used by the `task()` tool. When Sisyphus delega
| Category | Default Model | Description |
| -------------------- | ------------------------------- | ---------------------------------------------- |
| `visual-engineering` | `google/gemini-3.1-pro` (high) | Frontend, UI/UX, design, animation |
| `ultrabrain` | `openai/gpt-5.3-codex` (xhigh) | Deep logical reasoning, complex architecture |
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture |
| `deep` | `openai/gpt-5.3-codex` (medium) | Autonomous problem-solving, thorough research |
| `artistry` | `google/gemini-3.1-pro` (high) | Creative/unconventional approaches |
| `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks, typo fixes, single-file changes |
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | General tasks, low effort |
| `unspecified-high` | `openai/gpt-5.4` (high) | General tasks, high effort |
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | General tasks, high effort |
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
> **Note**: Built-in defaults only apply if the category is present in your config. Otherwise the system default model is used.
@@ -283,12 +283,12 @@ Disable categories: `{ "disabled_categories": ["ultrabrain"] }`
| Category | Default Model | Provider Priority |
| ---------------------- | ------------------- | -------------------------------------------------------------- |
| **visual-engineering** | `gemini-3.1-pro` | `gemini-3.1-pro``glm-5``claude-opus-4-6` |
| **ultrabrain** | `gpt-5.3-codex` | `gpt-5.3-codex``gemini-3.1-pro``claude-opus-4-6` |
| **ultrabrain** | `gpt-5.4` | `gpt-5.4``gemini-3.1-pro``claude-opus-4-6` |
| **deep** | `gpt-5.3-codex` | `gpt-5.3-codex``claude-opus-4-6``gemini-3.1-pro` |
| **artistry** | `gemini-3.1-pro` | `gemini-3.1-pro``claude-opus-4-6``gpt-5.4` |
| **quick** | `claude-haiku-4-5` | `claude-haiku-4-5``gemini-3-flash``gpt-5-nano` |
| **unspecified-low** | `claude-sonnet-4-6` | `claude-sonnet-4-6``gpt-5.3-codex``gemini-3-flash` |
| **unspecified-high** | `gpt-5.4` | `gpt-5.4``claude-opus-4-6``glm-5``k2p5``kimi-k2.5` |
| **unspecified-high** | `claude-opus-4-6` | `claude-opus-4-6``gpt-5.4 (high)``glm-5``k2p5``kimi-k2.5` |
| **writing** | `gemini-3-flash` | `gemini-3-flash``claude-sonnet-4-6` |
Run `bunx oh-my-opencode doctor --verbose` to see effective model resolution for your config.

View File

@@ -108,12 +108,12 @@ By combining these two concepts, you can generate optimal agents through `task`.
| Category | Default Model | Use Cases |
| -------------------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------------- |
| `visual-engineering` | `google/gemini-3.1-pro` | Frontend, UI/UX, design, styling, animation |
| `ultrabrain` | `openai/gpt-5.3-codex` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
| `ultrabrain` | `openai/gpt-5.4` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
| `deep` | `openai/gpt-5.3-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. |
| `artistry` | `google/gemini-3.1-pro` (high) | Highly creative/artistic tasks, novel ideas |
| `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
| `unspecified-low` | `anthropic/claude-sonnet-4-6` | Tasks that don't fit other categories, low effort required |
| `unspecified-high` | `openai/gpt-5.4` (high) | Tasks that don't fit other categories, high effort required |
| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | Tasks that don't fit other categories, high effort required |
| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |
### Usage
@@ -332,7 +332,7 @@ You can create powerful specialized agents by combining Categories and Skills.
- **Category**: `ultrabrain`
- **load_skills**: `[]` (pure reasoning)
- **Effect**: Leverages GPT-5.3 Codex's logical reasoning for in-depth system architecture analysis.
- **Effect**: Leverages GPT-5.4 xhigh reasoning for in-depth system architecture analysis.
#### The Maintainer (Quick Fixes)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -2103,6 +2103,62 @@
"created_at": "2026-03-11T06:16:22Z",
"repoId": 1108837393,
"pullRequestNo": 2446
},
{
"name": "djdembeck",
"id": 71412966,
"comment_id": 4043153461,
"created_at": "2026-03-12T00:48:33Z",
"repoId": 1108837393,
"pullRequestNo": 2497
},
{
"name": "ChicK00o",
"id": 5801907,
"comment_id": 4043272263,
"created_at": "2026-03-12T01:25:48Z",
"repoId": 1108837393,
"pullRequestNo": 2499
},
{
"name": "apple-ouyang",
"id": 45086632,
"comment_id": 4047283442,
"created_at": "2026-03-12T14:39:04Z",
"repoId": 1108837393,
"pullRequestNo": 2528
},
{
"name": "xodn348",
"id": 58055473,
"comment_id": 4047565656,
"created_at": "2026-03-12T15:14:07Z",
"repoId": 1108837393,
"pullRequestNo": 2531
},
{
"name": "ricatix",
"id": 225344788,
"comment_id": 4047640074,
"created_at": "2026-03-12T15:22:55Z",
"repoId": 1108837393,
"pullRequestNo": 2532
},
{
"name": "Gujiassh",
"id": 92616678,
"comment_id": 4048205197,
"created_at": "2026-03-12T16:36:48Z",
"repoId": 1108837393,
"pullRequestNo": 2524
},
{
"name": "cpkt9762",
"id": 23377592,
"comment_id": 4049736830,
"created_at": "2026-03-12T20:17:25Z",
"repoId": 1108837393,
"pullRequestNo": 2539
}
]
}

View File

@@ -115,6 +115,7 @@ export async function createBuiltinAgents(
browserProvider,
uiSelectedModel,
availableModels,
isFirstRunNoCache,
disabledSkills,
disableOmoEnv,
})

View File

@@ -7,7 +7,7 @@ import { AGENT_MODEL_REQUIREMENTS, isModelAvailable } from "../../shared"
import { buildAgent, isFactory } from "../agent-builder"
import { applyOverrides } from "./agent-overrides"
import { applyEnvironmentContext } from "./environment-context"
import { applyModelResolution } from "./model-resolution"
import { applyModelResolution, getFirstFallbackModel } from "./model-resolution"
export function collectPendingBuiltinAgents(input: {
agentSources: Record<BuiltinAgentName, import("../agent-builder").AgentSource>
@@ -21,6 +21,7 @@ export function collectPendingBuiltinAgents(input: {
browserProvider?: BrowserAutomationProvider
uiSelectedModel?: string
availableModels: Set<string>
isFirstRunNoCache: boolean
disabledSkills?: Set<string>
useTaskSystem?: boolean
disableOmoEnv?: boolean
@@ -37,6 +38,7 @@ export function collectPendingBuiltinAgents(input: {
browserProvider,
uiSelectedModel,
availableModels,
isFirstRunNoCache,
disabledSkills,
disableOmoEnv = false,
} = input
@@ -66,13 +68,16 @@ export function collectPendingBuiltinAgents(input: {
const isPrimaryAgent = isFactory(source) && source.mode === "primary"
const resolution = applyModelResolution({
let resolution = applyModelResolution({
uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
userModel: override?.model,
requirement,
availableModels,
systemDefaultModel,
})
if (!resolution && isFirstRunNoCache && !override?.model) {
resolution = getFirstFallbackModel(requirement)
}
if (!resolution) continue
const { model, variant: resolvedVariant } = resolution

View File

@@ -342,5 +342,6 @@ export const momusPromptMetadata: AgentPromptMetadata = {
"When user explicitly wants to skip review",
"For trivial plans that don't need formal review",
],
keyTrigger: "Work plan created → invoke Momus for review before execution",
keyTrigger:
"Work plan saved to `.sisyphus/plans/*.md` → invoke Momus with the file path as the sole prompt (e.g. `prompt=\".sisyphus/plans/my-plan.md\"`). Do NOT invoke Momus for inline plans or todo lists.",
};

View File

@@ -321,6 +321,7 @@ After plan complete:
Use incremental write protocol for large plans
Delete draft after plan completion
Present "Start Work" vs "High Accuracy" choice after plan
Final Verification Wave must require explicit user "okay" before marking work complete
**USE TOOL CALLS for every phase transition — not internal reasoning**
</critical_rules>

View File

@@ -395,12 +395,14 @@ Wave 2: [dependent tasks with categories]
**Commit**: YES/NO | Message: \`type(scope): desc\` | Files: [paths]
## Final Verification Wave (4 parallel agents, ALL must APPROVE)
- [ ] F1. Plan Compliance Audit — oracle
- [ ] F2. Code Quality Review — unspecified-high
- [ ] F3. Real Manual QA — unspecified-high (+ playwright if UI)
- [ ] F4. Scope Fidelity Check — deep
## Final Verification Wave (MANDATORY \u2014 after ALL implementation tasks)
> 4 review agents run in PARALLEL. ALL must APPROVE. Present consolidated results to user and get explicit "okay" before completing.
> **Do NOT auto-proceed after verification. Wait for user's explicit approval before marking work complete.**
> **Never mark F1-F4 as checked before getting user's okay.** Rejection or user feedback -> fix -> re-run -> present again -> wait for okay.
- [ ] F1. Plan Compliance Audit \u2014 oracle
- [ ] F2. Code Quality Review \u2014 unspecified-high
- [ ] F3. Real Manual QA \u2014 unspecified-high (+ playwright if UI)
- [ ] F4. Scope Fidelity Check \u2014 deep
## Commit Strategy
## Success Criteria
\`\`\`

View File

@@ -210,10 +210,4 @@ Question({
}]
})
\`\`\`
**Based on user choice:**
- **Start Work** → Delete draft, guide to \`/start-work {name}\`
- **High Accuracy Review** → Enter Momus loop (PHASE 3)
---
`

View File

@@ -125,19 +125,14 @@ Wave 3 (After Wave 2 — integration + UI):
├── Task 19: Deployment config C (depends: 15) [quick]
└── Task 20: UI request log + build (depends: 16) [visual-engineering]
Wave 4 (After Wave 3 — verification):
├── Task 21: Integration tests (depends: 15) [deep]
├── Task 22: UI QA - Playwright (depends: 20) [unspecified-high]
├── Task 23: E2E QA (depends: 21) [deep]
└── Task 24: Git cleanup + tagging (depends: 21) [git]
Wave FINAL (After ALL tasks \u2014 4 parallel reviews, then user okay):
\u251c\u2500\u2500 Task F1: Plan compliance audit (oracle)
\u251c\u2500\u2500 Task F2: Code quality review (unspecified-high)
\u251c\u2500\u2500 Task F3: Real manual QA (unspecified-high)
\u2514\u2500\u2500 Task F4: Scope fidelity check (deep)
-> Present results -> Get explicit user okay
Wave FINAL (After ALL tasks — independent review, 4 parallel):
├── Task F1: Plan compliance audit (oracle)
├── Task F2: Code quality review (unspecified-high)
├── Task F3: Real manual QA (unspecified-high)
└── Task F4: Scope fidelity check (deep)
Critical Path: Task 1 → Task 5 → Task 8 → Task 11 → Task 15 → Task 21 → F1-F4
Critical Path: Task 1 \u2192 Task 5 \u2192 Task 8 \u2192 Task 11 \u2192 Task 15 \u2192 Task 21 \u2192 F1-F4 \u2192 user okay
Parallel Speedup: ~70% faster than sequential
Max Concurrent: 7 (Waves 1 & 2)
\`\`\`
@@ -282,24 +277,27 @@ Max Concurrent: 7 (Waves 1 & 2)
---
## Final Verification Wave (MANDATORY after ALL implementation tasks)
## Final Verification Wave (MANDATORY \u2014 after ALL implementation tasks)
> 4 review agents run in PARALLEL. ALL must APPROVE. Rejection → fix → re-run.
> 4 review agents run in PARALLEL. ALL must APPROVE. Present consolidated results to user and get explicit "okay" before completing.
>
> **Do NOT auto-proceed after verification. Wait for user's explicit approval before marking work complete.**
> **Never mark F1-F4 as checked before getting user's okay.** Rejection or user feedback -> fix -> re-run -> present again -> wait for okay.
- [ ] F1. **Plan Compliance Audit** \`oracle\`
Read the plan end-to-end. For each "Must Have": verify implementation exists (read file, curl endpoint, run command). For each "Must NOT Have": search codebase for forbidden patterns reject with file:line if found. Check evidence files exist in .sisyphus/evidence/. Compare deliverables against plan.
- [ ] F1. **Plan Compliance Audit** \u2014 \`oracle\`
Read the plan end-to-end. For each "Must Have": verify implementation exists (read file, curl endpoint, run command). For each "Must NOT Have": search codebase for forbidden patterns \u2014 reject with file:line if found. Check evidence files exist in .sisyphus/evidence/. Compare deliverables against plan.
Output: \`Must Have [N/N] | Must NOT Have [N/N] | Tasks [N/N] | VERDICT: APPROVE/REJECT\`
- [ ] F2. **Code Quality Review** \`unspecified-high\`
- [ ] F2. **Code Quality Review** \u2014 \`unspecified-high\`
Run \`tsc --noEmit\` + linter + \`bun test\`. Review all changed files for: \`as any\`/\`@ts-ignore\`, empty catches, console.log in prod, commented-out code, unused imports. Check AI slop: excessive comments, over-abstraction, generic names (data/result/item/temp).
Output: \`Build [PASS/FAIL] | Lint [PASS/FAIL] | Tests [N pass/N fail] | Files [N clean/N issues] | VERDICT\`
- [ ] F3. **Real Manual QA** \`unspecified-high\` (+ \`playwright\` skill if UI)
Start from clean state. Execute EVERY QA scenario from EVERY task follow exact steps, capture evidence. Test cross-task integration (features working together, not isolation). Test edge cases: empty state, invalid input, rapid actions. Save to \`.sisyphus/evidence/final-qa/\`.
- [ ] F3. **Real Manual QA** \u2014 \`unspecified-high\` (+ \`playwright\` skill if UI)
Start from clean state. Execute EVERY QA scenario from EVERY task \u2014 follow exact steps, capture evidence. Test cross-task integration (features working together, not isolation). Test edge cases: empty state, invalid input, rapid actions. Save to \`.sisyphus/evidence/final-qa/\`.
Output: \`Scenarios [N/N pass] | Integration [N/N] | Edge Cases [N tested] | VERDICT\`
- [ ] F4. **Scope Fidelity Check** \`deep\`
For each task: read "What to do", read actual diff (git log/diff). Verify 1:1 everything in spec was built (no missing), nothing beyond spec was built (no creep). Check "Must NOT do" compliance. Detect cross-task contamination: Task N touching Task M's files. Flag unaccounted changes.
- [ ] F4. **Scope Fidelity Check** \u2014 \`deep\`
For each task: read "What to do", read actual diff (git log/diff). Verify 1:1 \u2014 everything in spec was built (no missing), nothing beyond spec was built (no creep). Check "Must NOT do" compliance. Detect cross-task contamination: Task N touching Task M's files. Flag unaccounted changes.
Output: \`Tasks [N/N compliant] | Contamination [CLEAN/N issues] | Unaccounted [CLEAN/N files] | VERDICT\`
---

View File

@@ -483,17 +483,23 @@ describe("createBuiltinAgents without systemDefaultModel", () => {
cacheSpy.mockRestore?.()
})
test("agents NOT created when no cache and no systemDefaultModel (first run without defaults)", async () => {
// #given
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
test("oracle is created on first run when no cache and no systemDefaultModel", async () => {
// #given
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(new Set())
// #when
const agents = await createBuiltinAgents([], {}, undefined, undefined)
try {
// #when
const agents = await createBuiltinAgents([], {}, undefined, undefined)
// #then
expect(agents.oracle).toBeUndefined()
cacheSpy.mockRestore?.()
})
// #then
expect(agents.oracle).toBeDefined()
expect(agents.oracle.model).toBe("openai/gpt-5.4")
} finally {
fetchSpy.mockRestore()
cacheSpy.mockRestore()
}
})
test("sisyphus created via connected cache fallback when all providers available", async () => {
// #given
@@ -1110,7 +1116,7 @@ describe("buildAgent with category and skills", () => {
const agent = buildAgent(source["test-agent"], TEST_MODEL)
// #then - category's built-in model and skills are applied
expect(agent.model).toBe("openai/gpt-5.3-codex")
expect(agent.model).toBe("openai/gpt-5.4")
expect(agent.variant).toBe("xhigh")
expect(agent.prompt).toContain("Role: Designer-Turned-Developer")
expect(agent.prompt).toContain("Task description")
@@ -1223,9 +1229,9 @@ describe("override.category expansion in createBuiltinAgents", () => {
// #when
const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
// #then - ultrabrain category: model=openai/gpt-5.3-codex, variant=xhigh
// #then - ultrabrain category: model=openai/gpt-5.4, variant=xhigh
expect(agents.oracle).toBeDefined()
expect(agents.oracle.model).toBe("openai/gpt-5.3-codex")
expect(agents.oracle.model).toBe("openai/gpt-5.4")
expect(agents.oracle.variant).toBe("xhigh")
})
@@ -1292,9 +1298,9 @@ describe("override.category expansion in createBuiltinAgents", () => {
// #when
const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
// #then - ultrabrain category: model=openai/gpt-5.3-codex, variant=xhigh
// #then - ultrabrain category: model=openai/gpt-5.4, variant=xhigh
expect(agents.sisyphus).toBeDefined()
expect(agents.sisyphus.model).toBe("openai/gpt-5.3-codex")
expect(agents.sisyphus.model).toBe("openai/gpt-5.4")
expect(agents.sisyphus.variant).toBe("xhigh")
})
@@ -1307,9 +1313,9 @@ describe("override.category expansion in createBuiltinAgents", () => {
// #when
const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
// #then - ultrabrain category: model=openai/gpt-5.3-codex, variant=xhigh
// #then - ultrabrain category: model=openai/gpt-5.4, variant=xhigh
expect(agents.atlas).toBeDefined()
expect(agents.atlas.model).toBe("openai/gpt-5.3-codex")
expect(agents.atlas.model).toBe("openai/gpt-5.4")
expect(agents.atlas.variant).toBe("xhigh")
})

View File

@@ -31,6 +31,9 @@ exports[`generateModelConfig no providers available returns ULTIMATE_FALLBACK fo
"prometheus": {
"model": "opencode/glm-4.7-free",
},
"sisyphus-junior": {
"model": "opencode/glm-4.7-free",
},
},
"categories": {
"artistry": {
@@ -71,9 +74,6 @@ exports[`generateModelConfig single native provider uses Claude models when only
"explore": {
"model": "anthropic/claude-haiku-4-5",
},
"librarian": {
"model": "anthropic/claude-sonnet-4-5",
},
"metis": {
"model": "anthropic/claude-opus-4-6",
"variant": "max",
@@ -97,6 +97,9 @@ exports[`generateModelConfig single native provider uses Claude models when only
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"quick": {
@@ -133,9 +136,6 @@ exports[`generateModelConfig single native provider uses Claude models with isMa
"explore": {
"model": "anthropic/claude-haiku-4-5",
},
"librarian": {
"model": "anthropic/claude-sonnet-4-5",
},
"metis": {
"model": "anthropic/claude-opus-4-6",
"variant": "max",
@@ -159,6 +159,9 @@ exports[`generateModelConfig single native provider uses Claude models with isMa
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"quick": {
@@ -191,8 +194,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "openai/gpt-5.4",
"variant": "medium",
"model": "opencode/glm-4.7-free",
},
"explore": {
"model": "openai/gpt-5.4",
@@ -207,8 +209,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
"variant": "medium",
},
"metis": {
"model": "openai/gpt-5.4",
"variant": "high",
"model": "opencode/glm-4.7-free",
},
"momus": {
"model": "openai/gpt-5.4",
@@ -230,6 +231,9 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
"model": "openai/gpt-5.4",
"variant": "medium",
},
"sisyphus-junior": {
"model": "opencode/glm-4.7-free",
},
},
"categories": {
"artistry": {
@@ -273,8 +277,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "openai/gpt-5.4",
"variant": "medium",
"model": "opencode/glm-4.7-free",
},
"explore": {
"model": "openai/gpt-5.4",
@@ -289,8 +292,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
"variant": "medium",
},
"metis": {
"model": "openai/gpt-5.4",
"variant": "high",
"model": "opencode/glm-4.7-free",
},
"momus": {
"model": "openai/gpt-5.4",
@@ -312,6 +314,9 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
"model": "openai/gpt-5.4",
"variant": "medium",
},
"sisyphus-junior": {
"model": "opencode/glm-4.7-free",
},
},
"categories": {
"artistry": {
@@ -355,24 +360,20 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "google/gemini-3.1-pro-preview",
"model": "opencode/glm-4.7-free",
},
"explore": {
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "opencode/glm-4.7-free",
},
"metis": {
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
"model": "opencode/glm-4.7-free",
},
"momus": {
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "opencode/glm-4.7-free",
},
"oracle": {
"model": "google/gemini-3.1-pro-preview",
@@ -381,6 +382,9 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"prometheus": {
"model": "google/gemini-3.1-pro-preview",
},
"sisyphus-junior": {
"model": "opencode/glm-4.7-free",
},
},
"categories": {
"artistry": {
@@ -416,24 +420,20 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": {
"atlas": {
"model": "google/gemini-3.1-pro-preview",
"model": "opencode/glm-4.7-free",
},
"explore": {
"model": "opencode/gpt-5-nano",
},
"librarian": {
"model": "opencode/glm-4.7-free",
},
"metis": {
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
"model": "opencode/glm-4.7-free",
},
"momus": {
"model": "google/gemini-3.1-pro-preview",
"variant": "high",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "opencode/glm-4.7-free",
},
"oracle": {
"model": "google/gemini-3.1-pro-preview",
@@ -442,6 +442,9 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
"prometheus": {
"model": "google/gemini-3.1-pro-preview",
},
"sisyphus-junior": {
"model": "opencode/glm-4.7-free",
},
},
"categories": {
"artistry": {
@@ -486,9 +489,6 @@ exports[`generateModelConfig all native providers uses preferred models from fal
"model": "openai/gpt-5.3-codex",
"variant": "medium",
},
"librarian": {
"model": "anthropic/claude-sonnet-4-5",
},
"metis": {
"model": "anthropic/claude-opus-4-6",
"variant": "max",
@@ -513,6 +513,9 @@ exports[`generateModelConfig all native providers uses preferred models from fal
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {
@@ -561,9 +564,6 @@ exports[`generateModelConfig all native providers uses preferred models with isM
"model": "openai/gpt-5.3-codex",
"variant": "medium",
},
"librarian": {
"model": "anthropic/claude-sonnet-4-5",
},
"metis": {
"model": "anthropic/claude-opus-4-6",
"variant": "max",
@@ -588,6 +588,9 @@ exports[`generateModelConfig all native providers uses preferred models with isM
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {
@@ -637,9 +640,6 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"model": "opencode/gpt-5.3-codex",
"variant": "medium",
},
"librarian": {
"model": "opencode/glm-4.7-free",
},
"metis": {
"model": "opencode/claude-opus-4-6",
"variant": "max",
@@ -664,6 +664,9 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"model": "opencode/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "opencode/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {
@@ -712,9 +715,6 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"model": "opencode/gpt-5.3-codex",
"variant": "medium",
},
"librarian": {
"model": "opencode/glm-4.7-free",
},
"metis": {
"model": "opencode/claude-opus-4-6",
"variant": "max",
@@ -739,6 +739,9 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"model": "opencode/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "opencode/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {
@@ -784,9 +787,6 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"explore": {
"model": "github-copilot/gpt-5-mini",
},
"librarian": {
"model": "github-copilot/claude-sonnet-4.5",
},
"metis": {
"model": "github-copilot/claude-opus-4.6",
"variant": "max",
@@ -796,7 +796,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"variant": "xhigh",
},
"multimodal-looker": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "opencode/glm-4.7-free",
},
"oracle": {
"model": "github-copilot/gpt-5.4",
@@ -810,6 +810,9 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"model": "github-copilot/claude-opus-4.6",
"variant": "max",
},
"sisyphus-junior": {
"model": "github-copilot/claude-sonnet-4.6",
},
},
"categories": {
"artistry": {
@@ -850,9 +853,6 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"explore": {
"model": "github-copilot/gpt-5-mini",
},
"librarian": {
"model": "github-copilot/claude-sonnet-4.5",
},
"metis": {
"model": "github-copilot/claude-opus-4.6",
"variant": "max",
@@ -862,7 +862,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"variant": "xhigh",
},
"multimodal-looker": {
"model": "github-copilot/gemini-3-flash-preview",
"model": "opencode/glm-4.7-free",
},
"oracle": {
"model": "github-copilot/gpt-5.4",
@@ -876,6 +876,9 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"model": "github-copilot/claude-opus-4.6",
"variant": "max",
},
"sisyphus-junior": {
"model": "github-copilot/claude-sonnet-4.6",
},
},
"categories": {
"artistry": {
@@ -938,6 +941,9 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
"sisyphus": {
"model": "zai-coding-plan/glm-5",
},
"sisyphus-junior": {
"model": "opencode/glm-4.7-free",
},
},
"categories": {
"quick": {
@@ -993,6 +999,9 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
"sisyphus": {
"model": "zai-coding-plan/glm-5",
},
"sisyphus-junior": {
"model": "opencode/glm-4.7-free",
},
},
"categories": {
"quick": {
@@ -1031,9 +1040,6 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"model": "opencode/gpt-5.3-codex",
"variant": "medium",
},
"librarian": {
"model": "opencode/glm-4.7-free",
},
"metis": {
"model": "anthropic/claude-opus-4-6",
"variant": "max",
@@ -1058,6 +1064,9 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {
@@ -1106,9 +1115,6 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"model": "openai/gpt-5.3-codex",
"variant": "medium",
},
"librarian": {
"model": "github-copilot/claude-sonnet-4.5",
},
"metis": {
"model": "github-copilot/claude-opus-4.6",
"variant": "max",
@@ -1133,6 +1139,9 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"model": "github-copilot/claude-opus-4.6",
"variant": "max",
},
"sisyphus-junior": {
"model": "github-copilot/claude-sonnet-4.6",
},
},
"categories": {
"artistry": {
@@ -1203,6 +1212,9 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"quick": {
@@ -1238,9 +1250,6 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"explore": {
"model": "anthropic/claude-haiku-4-5",
},
"librarian": {
"model": "anthropic/claude-sonnet-4-5",
},
"metis": {
"model": "anthropic/claude-opus-4-6",
"variant": "max",
@@ -1250,7 +1259,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"variant": "max",
},
"multimodal-looker": {
"model": "google/gemini-3-flash-preview",
"model": "opencode/glm-4.7-free",
},
"oracle": {
"model": "google/gemini-3.1-pro-preview",
@@ -1264,6 +1273,9 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {
@@ -1335,6 +1347,9 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
"model": "github-copilot/claude-opus-4.6",
"variant": "max",
},
"sisyphus-junior": {
"model": "github-copilot/claude-sonnet-4.6",
},
},
"categories": {
"artistry": {
@@ -1410,6 +1425,9 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {
@@ -1485,6 +1503,9 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
"model": "anthropic/claude-opus-4-6",
"variant": "max",
},
"sisyphus-junior": {
"model": "anthropic/claude-sonnet-4-6",
},
},
"categories": {
"artistry": {

View File

@@ -31,6 +31,7 @@ program
.option("--opencode-zen <value>", "OpenCode Zen access: no, yes (default: no)")
.option("--zai-coding-plan <value>", "Z.ai Coding Plan subscription: no, yes (default: no)")
.option("--kimi-for-coding <value>", "Kimi For Coding subscription: no, yes (default: no)")
.option("--opencode-go <value>", "OpenCode Go subscription: no, yes (default: no)")
.option("--skip-auth", "Skip authentication setup hints")
.addHelpText("after", `
Examples:
@@ -57,6 +58,7 @@ Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
opencodeZen: options.opencodeZen,
zaiCodingPlan: options.zaiCodingPlan,
kimiForCoding: options.kimiForCoding,
opencodeGo: options.opencodeGo,
skipAuth: options.skipAuth ?? false,
}
const exitCode = await install(args)

View File

@@ -53,8 +53,8 @@ describe("runBunInstallWithDetails", () => {
})
describe("#given the cache workspace exists", () => {
describe("#when bun install uses inherited output", () => {
it("#then runs bun install in the cache directory", async () => {
describe("#when bun install uses default piped output", () => {
it("#then pipes stdout and stderr by default", async () => {
// given
// when
@@ -65,8 +65,8 @@ describe("runBunInstallWithDetails", () => {
expect(getOpenCodeCacheDirSpy).toHaveBeenCalledTimes(1)
expect(spawnWithWindowsHideSpy).toHaveBeenCalledWith(["bun", "install"], {
cwd: "/tmp/opencode-cache",
stdout: "inherit",
stderr: "inherit",
stdout: "pipe",
stderr: "pipe",
})
})
})
@@ -88,6 +88,23 @@ describe("runBunInstallWithDetails", () => {
})
})
describe("#when bun install uses explicit inherited output", () => {
it("#then passes inherit mode to the spawned process", async () => {
// given
// when
const result = await runBunInstallWithDetails({ outputMode: "inherit" })
// then
expect(result).toEqual({ success: true })
expect(spawnWithWindowsHideSpy).toHaveBeenCalledWith(["bun", "install"], {
cwd: "/tmp/opencode-cache",
stdout: "inherit",
stderr: "inherit",
})
})
})
describe("#when piped bun install fails", () => {
it("#then logs captured stdout and stderr", async () => {
// given

View File

@@ -64,7 +64,7 @@ function logCapturedOutputOnFailure(outputMode: BunInstallOutputMode, output: Bu
}
export async function runBunInstallWithDetails(options?: RunBunInstallOptions): Promise<BunInstallResult> {
const outputMode = options?.outputMode ?? "inherit"
const outputMode = options?.outputMode ?? "pipe"
const cacheDir = getOpenCodeCacheDir()
const packageJsonPath = `${cacheDir}/package.json`

View File

@@ -45,7 +45,8 @@ export function detectCurrentConfig(): DetectedConfig {
hasCopilot: false,
hasOpencodeZen: true,
hasZaiCodingPlan: false,
hasKimiForCoding: false,
hasKimiForCoding: false,
hasOpencodeGo: false,
}
const { format, path } = detectConfigFormat()

View File

@@ -159,7 +159,8 @@ export function argsToConfig(args: InstallArgs): InstallConfig {
hasCopilot: args.copilot === "yes",
hasOpencodeZen: args.opencodeZen === "yes",
hasZaiCodingPlan: args.zaiCodingPlan === "yes",
hasKimiForCoding: args.kimiForCoding === "yes",
hasKimiForCoding: args.kimiForCoding === "yes",
hasOpencodeGo: args.opencodeGo === "yes",
}
}
@@ -170,7 +171,8 @@ export function detectedToInitialValues(detected: DetectedConfig): {
copilot: BooleanArg
opencodeZen: BooleanArg
zaiCodingPlan: BooleanArg
kimiForCoding: BooleanArg
kimiForCoding: BooleanArg
opencodeGo: BooleanArg
} {
let claude: ClaudeSubscription = "no"
if (detected.hasClaude) {
@@ -184,6 +186,7 @@ export function detectedToInitialValues(detected: DetectedConfig): {
copilot: detected.hasCopilot ? "yes" : "no",
opencodeZen: detected.hasOpencodeZen ? "yes" : "no",
zaiCodingPlan: detected.hasZaiCodingPlan ? "yes" : "no",
kimiForCoding: detected.hasKimiForCoding ? "yes" : "no",
kimiForCoding: detected.hasKimiForCoding ? "yes" : "no",
opencodeGo: detected.hasOpencodeGo ? "yes" : "no",
}
}

View File

@@ -12,6 +12,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["opencode-go"], model: "kimi-k2.5" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.4", variant: "medium" },
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
@@ -45,37 +46,29 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["opencode-go"], model: "glm-5" },
],
},
librarian: {
fallbackChain: [
{ providers: ["zai-coding-plan"], model: "glm-4.7" },
{ providers: ["opencode"], model: "glm-4.7-free" },
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-sonnet-4-5",
},
{ providers: ["opencode-go"], model: "minimax-m2.5" },
{ providers: ["opencode"], model: "minimax-m2.5-free" },
{ providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
},
explore: {
fallbackChain: [
{ providers: ["github-copilot"], model: "grok-code-fast-1" },
{ providers: ["opencode-go"], model: "minimax-m2.5" },
{ providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
},
"multimodal-looker": {
fallbackChain: [
{
providers: ["openai", "opencode"],
model: "gpt-5.4",
variant: "medium",
},
{ providers: ["kimi-for-coding"], model: "k2p5" },
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3-flash",
},
{ providers: ["openai", "opencode"], model: "gpt-5.4", variant: "medium" },
{ providers: ["opencode-go"], model: "kimi-k2.5" },
{ providers: ["zai-coding-plan"], model: "glm-4.6v" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
@@ -93,6 +86,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
model: "gpt-5.4",
variant: "high",
},
{ providers: ["opencode-go"], model: "glm-5" },
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
@@ -106,17 +100,8 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["opencode-go"], model: "glm-5" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{
providers: ["openai", "github-copilot", "opencode"],
model: "gpt-5.4",
variant: "high",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
],
},
momus: {
@@ -136,158 +121,163 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
model: "gemini-3.1-pro",
variant: "high",
},
{ providers: ["opencode-go"], model: "glm-5" },
],
},
atlas: {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-sonnet-4-5",
},
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.4", variant: "medium" },
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
},
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
{ providers: ["opencode-go"], model: "kimi-k2.5" },
],
},
"sisyphus-junior": {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
{ providers: ["opencode-go"], model: "kimi-k2.5" },
{ providers: ["opencode"], model: "big-pickle" },
],
},
};
export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
{
"visual-engineering": {
fallbackChain: [
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["kimi-for-coding"], model: "k2p5" },
],
},
ultrabrain: {
fallbackChain: [
{
providers: ["openai", "opencode"],
model: "gpt-5.3-codex",
variant: "xhigh",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
],
},
deep: {
fallbackChain: [
{
providers: ["openai", "opencode"],
model: "gpt-5.3-codex",
variant: "medium",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
],
requiresModel: "gpt-5.3-codex",
},
artistry: {
fallbackChain: [
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{
providers: ["openai", "github-copilot", "opencode"],
model: "gpt-5.4",
},
],
requiresModel: "gemini-3.1-pro",
},
quick: {
fallbackChain: [
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-haiku-4-5",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3-flash",
},
{ providers: ["opencode"], model: "gpt-5-nano" },
],
},
"unspecified-low": {
fallbackChain: [
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-sonnet-4-5",
},
{
providers: ["openai", "opencode"],
model: "gpt-5.3-codex",
variant: "medium",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3-flash",
},
],
},
"unspecified-high": {
fallbackChain: [
{
providers: ["openai", "github-copilot", "opencode"],
model: "gpt-5.4",
variant: "high",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5" },
],
},
writing: {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3-flash",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-sonnet-4-5",
},
],
},
};
export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
"visual-engineering": {
fallbackChain: [
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode-go"], model: "glm-5" },
],
},
ultrabrain: {
fallbackChain: [
{
providers: ["openai", "opencode"],
model: "gpt-5.3-codex",
variant: "xhigh",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["opencode-go"], model: "glm-5" },
],
},
deep: {
fallbackChain: [
{
providers: ["openai", "opencode"],
model: "gpt-5.3-codex",
variant: "medium",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
],
requiresModel: "gpt-5.3-codex",
},
artistry: {
fallbackChain: [
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3.1-pro",
variant: "high",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{
providers: ["openai", "github-copilot", "opencode"],
model: "gpt-5.4",
},
],
requiresModel: "gemini-3.1-pro",
},
quick: {
fallbackChain: [
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-haiku-4-5",
},
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3-flash",
},
{ providers: ["opencode-go"], model: "minimax-m2.5" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
},
"unspecified-low": {
fallbackChain: [
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-sonnet-4-5",
},
{
providers: ["openai", "opencode"],
model: "gpt-5.3-codex",
variant: "medium",
},
{ providers: ["opencode-go"], model: "kimi-k2.5" },
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3-flash",
},
],
},
"unspecified-high": {
fallbackChain: [
{
providers: ["openai", "github-copilot", "opencode"],
model: "gpt-5.4",
variant: "high",
},
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "max",
},
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5" },
{ providers: ["opencode-go"], model: "glm-5" },
],
},
writing: {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{
providers: ["google", "github-copilot", "opencode"],
model: "gemini-3-flash",
},
{ providers: ["opencode-go"], model: "kimi-k2.5" },
{
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-sonnet-4-5",
},
],
},
};

View File

@@ -7,7 +7,8 @@ export interface ProviderAvailability {
opencodeZen: boolean
copilot: boolean
zai: boolean
kimiForCoding: boolean
kimiForCoding: boolean
opencodeGo: boolean
isMaxPlan: boolean
}

View File

@@ -495,15 +495,15 @@ describe("generateModelConfig", () => {
expect(result.agents?.librarian?.model).toBe("zai-coding-plan/glm-4.7")
})
test("librarian falls back to generic chain result when no librarian provider matches", () => {
// #given only Claude is available (no ZAI)
test("librarian is omitted when no librarian provider matches", () => {
// #given only Claude is available (no opencode-go or ZAI)
const config = createConfig({ hasClaude: true })
// #when generateModelConfig is called
const result = generateModelConfig(config)
// #then librarian should use generic chain result when chain providers are unavailable
expect(result.agents?.librarian?.model).toBe("anthropic/claude-sonnet-4-5")
// #then librarian should be omitted when its dedicated providers are unavailable
expect(result.agents?.librarian).toBeUndefined()
})
})

View File

@@ -33,8 +33,8 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
avail.opencodeZen ||
avail.copilot ||
avail.zai ||
avail.kimiForCoding
avail.kimiForCoding ||
avail.opencodeGo
if (!hasAnyProvider) {
return {
$schema: SCHEMA_URL,
@@ -53,8 +53,12 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
const categories: Record<string, CategoryConfig> = {}
for (const [role, req] of Object.entries(CLI_AGENT_MODEL_REQUIREMENTS)) {
if (role === "librarian" && avail.zai) {
agents[role] = { model: ZAI_MODEL }
if (role === "librarian") {
if (avail.opencodeGo) {
agents[role] = { model: "opencode-go/minimax-m2.5" }
} else if (avail.zai) {
agents[role] = { model: ZAI_MODEL }
}
continue
}
@@ -63,6 +67,8 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
agents[role] = { model: "anthropic/claude-haiku-4-5" }
} else if (avail.opencodeZen) {
agents[role] = { model: "opencode/claude-haiku-4-5" }
} else if (avail.opencodeGo) {
agents[role] = { model: "opencode-go/minimax-m2.5" }
} else if (avail.copilot) {
agents[role] = { model: "github-copilot/gpt-5-mini" }
} else {

View File

@@ -11,7 +11,8 @@ export function toProviderAvailability(config: InstallConfig): ProviderAvailabil
opencodeZen: config.hasOpencodeZen,
copilot: config.hasCopilot,
zai: config.hasZaiCodingPlan,
kimiForCoding: config.hasKimiForCoding,
kimiForCoding: config.hasKimiForCoding,
opencodeGo: config.hasOpencodeGo,
isMaxPlan: config.isMax20,
}
}
@@ -24,7 +25,8 @@ export function isProviderAvailable(provider: string, availability: ProviderAvai
"github-copilot": availability.copilot,
opencode: availability.opencodeZen,
"zai-coding-plan": availability.zai,
"kimi-for-coding": availability.kimiForCoding,
"kimi-for-coding": availability.kimiForCoding,
"opencode-go": availability.opencodeGo,
}
return mapping[provider] ?? false
}

View File

@@ -1,26 +1,41 @@
import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
import * as loggerModule from "../../shared/logger"
import { executeOnCompleteHook } from "./on-complete-hook"
describe("executeOnCompleteHook", () => {
function createProc(exitCode: number) {
function createStream(text: string): ReadableStream<Uint8Array> | undefined {
if (text.length === 0) {
return undefined
}
const encoder = new TextEncoder()
return new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(text))
controller.close()
},
})
}
function createProc(exitCode: number, output?: { stdout?: string; stderr?: string }) {
return {
exited: Promise.resolve(exitCode),
exitCode,
stdout: undefined,
stderr: undefined,
stdout: createStream(output?.stdout ?? ""),
stderr: createStream(output?.stderr ?? ""),
kill: () => {},
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
}
let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
let logSpy: ReturnType<typeof spyOn<typeof loggerModule, "log">>
beforeEach(() => {
consoleErrorSpy = spyOn(console, "error").mockImplementation(() => {})
logSpy = spyOn(loggerModule, "log").mockImplementation(() => {})
})
afterEach(() => {
consoleErrorSpy.mockRestore()
logSpy.mockRestore()
})
it("executes command with correct env vars", async () => {
@@ -46,8 +61,8 @@ describe("executeOnCompleteHook", () => {
expect(options?.env?.EXIT_CODE).toBe("0")
expect(options?.env?.DURATION_MS).toBe("5000")
expect(options?.env?.MESSAGE_COUNT).toBe("10")
expect(options?.stdout).toBe("inherit")
expect(options?.stderr).toBe("inherit")
expect(options?.stdout).toBe("pipe")
expect(options?.stderr).toBe("pipe")
} finally {
spawnSpy.mockRestore()
}
@@ -140,9 +155,8 @@ describe("executeOnCompleteHook", () => {
).resolves.toBeUndefined()
// then
expect(consoleErrorSpy).toHaveBeenCalled()
const warningCall = consoleErrorSpy.mock.calls.find(
(call) => typeof call[0] === "string" && call[0].includes("Warning: on-complete hook exited with code 1")
const warningCall = logSpy.mock.calls.find(
(call) => call[0] === "On-complete hook exited with non-zero code"
)
expect(warningCall).toBeDefined()
} finally {
@@ -170,12 +184,41 @@ describe("executeOnCompleteHook", () => {
).resolves.toBeUndefined()
// then
expect(consoleErrorSpy).toHaveBeenCalled()
const errorCalls = consoleErrorSpy.mock.calls.filter((call) => {
const firstArg = call[0]
return typeof firstArg === "string" && (firstArg.includes("Warning") || firstArg.toLowerCase().includes("error"))
const errorCall = logSpy.mock.calls.find(
(call) => call[0] === "Failed to execute on-complete hook"
)
expect(errorCall).toBeDefined()
} finally {
spawnSpy.mockRestore()
}
})
it("hook stdout and stderr are logged to file logger", async () => {
// given
const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(
createProc(0, { stdout: "hook output\n", stderr: "hook warning\n" })
)
try {
// when
await executeOnCompleteHook({
command: "echo test",
sessionId: "session-123",
exitCode: 0,
durationMs: 5000,
messageCount: 10,
})
expect(errorCalls.length).toBeGreaterThan(0)
// then
const stdoutCall = logSpy.mock.calls.find(
(call) => call[0] === "On-complete hook stdout"
)
const stderrCall = logSpy.mock.calls.find(
(call) => call[0] === "On-complete hook stderr"
)
expect(stdoutCall?.[1]).toEqual({ command: "echo test", stdout: "hook output" })
expect(stderrCall?.[1]).toEqual({ command: "echo test", stderr: "hook warning" })
} finally {
spawnSpy.mockRestore()
}

View File

@@ -1,5 +1,24 @@
import pc from "picocolors"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
import { log } from "../../shared"
async function readOutput(
stream: ReadableStream<Uint8Array> | undefined,
streamName: "stdout" | "stderr"
): Promise<string> {
if (!stream) {
return ""
}
try {
return await new Response(stream).text()
} catch (error) {
log("Failed to read on-complete hook output", {
stream: streamName,
error: error instanceof Error ? error.message : String(error),
})
return ""
}
}
export async function executeOnCompleteHook(options: {
command: string
@@ -15,7 +34,7 @@ export async function executeOnCompleteHook(options: {
return
}
console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
log("Running on-complete hook", { command: trimmedCommand })
try {
const proc = spawnWithWindowsHide(["sh", "-c", trimmedCommand], {
@@ -26,18 +45,34 @@ export async function executeOnCompleteHook(options: {
DURATION_MS: String(durationMs),
MESSAGE_COUNT: String(messageCount),
},
stdout: "inherit",
stderr: "inherit",
stdout: "pipe",
stderr: "pipe",
})
const hookExitCode = await proc.exited
const [hookExitCode, stdout, stderr] = await Promise.all([
proc.exited,
readOutput(proc.stdout, "stdout"),
readOutput(proc.stderr, "stderr"),
])
if (stdout.trim()) {
log("On-complete hook stdout", { command: trimmedCommand, stdout: stdout.trim() })
}
if (stderr.trim()) {
log("On-complete hook stderr", { command: trimmedCommand, stderr: stderr.trim() })
}
if (hookExitCode !== 0) {
console.error(
pc.yellow(`Warning: on-complete hook exited with code ${hookExitCode}`)
)
log("On-complete hook exited with non-zero code", {
command: trimmedCommand,
exitCode: hookExitCode,
})
}
} catch (error) {
console.error(pc.yellow(`Warning: Failed to execute on-complete hook: ${error instanceof Error ? error.message : String(error)}`))
log("Failed to execute on-complete hook", {
command: trimmedCommand,
error: error instanceof Error ? error.message : String(error),
})
}
}

View File

@@ -97,9 +97,19 @@ export async function promptInstallConfig(detected: DetectedConfig): Promise<Ins
{ value: "yes", label: "Yes", hint: "Kimi K2.5 for Sisyphus/Prometheus fallback" },
],
initialValue: initial.kimiForCoding,
})
})
if (!kimiForCoding) return null
const opencodeGo = await selectOrCancel({
message: "Do you have an OpenCode Go subscription?",
options: [
{ value: "no", label: "No", hint: "Will use other configured providers" },
{ value: "yes", label: "Yes", hint: "OpenCode Go for quick tasks" },
],
initialValue: initial.opencodeGo,
})
if (!opencodeGo) return null
return {
hasClaude: claude !== "no",
isMax20: claude === "max20",
@@ -109,5 +119,6 @@ export async function promptInstallConfig(detected: DetectedConfig): Promise<Ins
hasOpencodeZen: opencodeZen === "yes",
hasZaiCodingPlan: zaiCodingPlan === "yes",
hasKimiForCoding: kimiForCoding === "yes",
hasOpencodeGo: opencodeGo === "yes",
}
}

View File

@@ -9,7 +9,8 @@ export interface InstallArgs {
copilot?: BooleanArg
opencodeZen?: BooleanArg
zaiCodingPlan?: BooleanArg
kimiForCoding?: BooleanArg
kimiForCoding?: BooleanArg
opencodeGo?: BooleanArg
skipAuth?: boolean
}
@@ -22,6 +23,7 @@ export interface InstallConfig {
hasOpencodeZen: boolean
hasZaiCodingPlan: boolean
hasKimiForCoding: boolean
hasOpencodeGo: boolean
}
export interface ConfigMergeResult {
@@ -40,4 +42,5 @@ export interface DetectedConfig {
hasOpencodeZen: boolean
hasZaiCodingPlan: boolean
hasKimiForCoding: boolean
hasOpencodeGo: boolean
}

View File

@@ -8,7 +8,7 @@ export const BackgroundTaskConfigSchema = z.object({
maxDescendants: z.number().int().min(1).optional(),
/** Stale timeout in milliseconds - interrupt tasks with no activity for this duration (default: 180000 = 3 minutes, minimum: 60000 = 1 minute) */
staleTimeoutMs: z.number().min(60000).optional(),
/** Timeout for tasks that never received any progress update, falling back to startedAt (default: 600000 = 10 minutes, minimum: 60000 = 1 minute) */
/** Timeout for tasks that never received any progress update, falling back to startedAt (default: 1800000 = 30 minutes, minimum: 60000 = 1 minute) */
messageStalenessTimeoutMs: z.number().min(60000).optional(),
syncPollTimeoutMs: z.number().min(60000).optional(),
})

View File

@@ -51,6 +51,7 @@ export const HookNameSchema = z.enum([
"anthropic-effort",
"hashline-read-enhancer",
"read-image-resizer",
"delegate-task-english-directive",
])
export type HookName = z.infer<typeof HookNameSchema>

View File

@@ -11,6 +11,20 @@ import { createSkillHooks } from "./plugin/hooks/create-skill-hooks"
export type CreatedHooks = ReturnType<typeof createHooks>
type DisposableHook = { dispose?: () => void } | null | undefined
export type DisposableCreatedHooks = {
runtimeFallback?: DisposableHook
todoContinuationEnforcer?: DisposableHook
autoSlashCommand?: DisposableHook
}
export function disposeCreatedHooks(hooks: DisposableCreatedHooks): void {
hooks.runtimeFallback?.dispose?.()
hooks.todoContinuationEnforcer?.dispose?.()
hooks.autoSlashCommand?.dispose?.()
}
export function createHooks(args: {
ctx: PluginContext
pluginConfig: OhMyOpenCodeConfig
@@ -58,9 +72,16 @@ export function createHooks(args: {
availableSkills,
})
return {
const hooks = {
...core,
...continuation,
...skill,
}
return {
...hooks,
disposeHooks: (): void => {
disposeCreatedHooks(hooks)
},
}
}

View File

@@ -53,8 +53,8 @@ export function createManagers(args: {
log("[index] onSubagentSessionCreated callback completed")
},
onShutdown: () => {
tmuxSessionManager.cleanup().catch((error) => {
onShutdown: async () => {
await tmuxSessionManager.cleanup().catch((error) => {
log("[index] tmux cleanup error during shutdown:", error)
})
},

View File

@@ -0,0 +1,193 @@
import { tmpdir } from "node:os"
import type { PluginInput } from "@opencode-ai/plugin"
import { afterEach, describe, expect, test } from "bun:test"
import { ConcurrencyManager } from "./concurrency"
import { BackgroundManager } from "./manager"
import type { BackgroundTask, LaunchInput } from "./types"
const managersToShutdown: BackgroundManager[] = []
afterEach(() => {
while (managersToShutdown.length > 0) managersToShutdown.pop()?.shutdown()
})
function createBackgroundManager(config?: { defaultConcurrency?: number }): BackgroundManager {
const directory = tmpdir()
const client = { session: {} as PluginInput["client"]["session"] } as PluginInput["client"]
Reflect.set(client.session, "abort", async () => ({ data: true }))
Reflect.set(client.session, "create", async () => ({ data: { id: `session-${crypto.randomUUID().slice(0, 8)}` } }))
Reflect.set(client.session, "get", async () => ({ data: { directory } }))
Reflect.set(client.session, "messages", async () => ({ data: [] }))
Reflect.set(client.session, "prompt", async () => ({ data: { info: {}, parts: [] } }))
Reflect.set(client.session, "promptAsync", async () => ({ data: undefined }))
const manager = new BackgroundManager({
$: {} as PluginInput["$"],
client,
directory,
project: {} as PluginInput["project"],
serverUrl: new URL("http://localhost"),
worktree: directory,
}, config)
managersToShutdown.push(manager)
return manager
}
function createMockTask(overrides: Partial<BackgroundTask> & { id: string; parentSessionID: string }): BackgroundTask {
return {
id: overrides.id,
sessionID: overrides.sessionID,
parentSessionID: overrides.parentSessionID,
parentMessageID: overrides.parentMessageID ?? "parent-message-id",
description: overrides.description ?? "test task",
prompt: overrides.prompt ?? "test prompt",
agent: overrides.agent ?? "test-agent",
status: overrides.status ?? "running",
queuedAt: overrides.queuedAt,
startedAt: overrides.startedAt ?? new Date(),
completedAt: overrides.completedAt,
error: overrides.error,
model: overrides.model,
concurrencyKey: overrides.concurrencyKey,
concurrencyGroup: overrides.concurrencyGroup,
progress: overrides.progress,
}
}
function getTaskMap(manager: BackgroundManager): Map<string, BackgroundTask> { return Reflect.get(manager, "tasks") as Map<string, BackgroundTask> }
function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>> { return Reflect.get(manager, "pendingByParent") as Map<string, Set<string>> }
function getQueuesByKey(manager: BackgroundManager): Map<string, Array<{ task: BackgroundTask; input: LaunchInput }>> { return Reflect.get(manager, "queuesByKey") as Map<string, Array<{ task: BackgroundTask; input: LaunchInput }>> }
function getConcurrencyManager(manager: BackgroundManager): ConcurrencyManager { return Reflect.get(manager, "concurrencyManager") as ConcurrencyManager }
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> { return Reflect.get(manager, "completionTimers") as Map<string, ReturnType<typeof setTimeout>> }
async function processKeyForTest(manager: BackgroundManager, key: string): Promise<void> {
const processKey = Reflect.get(manager, "processKey") as (key: string) => Promise<void>
await processKey.call(manager, key)
}
function runScheduledCleanup(manager: BackgroundManager, taskId: string): void {
const timer = getCompletionTimers(manager).get(taskId)
if (!timer) {
throw new Error(`Expected cleanup timer for task ${taskId}`)
}
const onTimeout = Reflect.get(timer, "_onTimeout") as (() => void) | undefined
if (!onTimeout) {
throw new Error(`Expected cleanup callback for task ${taskId}`)
}
onTimeout()
}
describe("BackgroundManager.cancelTask cleanup", () => {
test("#given a running task in BackgroundManager #when cancelTask called with skipNotification=true #then task is eventually removed from this.tasks Map", async () => {
// given
const manager = createBackgroundManager()
const task = createMockTask({
id: "task-skip-notification-cleanup",
parentSessionID: "parent-session-skip-notification-cleanup",
sessionID: "session-skip-notification-cleanup",
})
getTaskMap(manager).set(task.id, task)
getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
// when
const cancelled = await manager.cancelTask(task.id, {
skipNotification: true,
source: "test",
})
// then
expect(cancelled).toBe(true)
expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined()
runScheduledCleanup(manager, task.id)
expect(manager.getTask(task.id)).toBeUndefined()
})
test("#given a running task #when cancelTask called with skipNotification=false #then task is also eventually removed", async () => {
// given
const manager = createBackgroundManager()
const task = createMockTask({
id: "task-notify-cleanup",
parentSessionID: "parent-session-notify-cleanup",
sessionID: "session-notify-cleanup",
})
getTaskMap(manager).set(task.id, task)
getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
// when
const cancelled = await manager.cancelTask(task.id, {
skipNotification: false,
source: "test",
})
// then
expect(cancelled).toBe(true)
runScheduledCleanup(manager, task.id)
expect(manager.getTask(task.id)).toBeUndefined()
})
test("#given a running task #when cancelTask called with skipNotification=true #then concurrency slot is freed and pending tasks can start", async () => {
// given
const manager = createBackgroundManager({ defaultConcurrency: 1 })
const concurrencyManager = getConcurrencyManager(manager)
const concurrencyKey = "test-provider/test-model"
await concurrencyManager.acquire(concurrencyKey)
const runningTask = createMockTask({
id: "task-running-before-cancel",
parentSessionID: "parent-session-concurrency-cleanup",
sessionID: "session-running-before-cancel",
concurrencyKey,
})
const pendingTask = createMockTask({
id: "task-pending-after-cancel",
parentSessionID: runningTask.parentSessionID,
status: "pending",
startedAt: undefined,
queuedAt: new Date(),
model: { providerID: "test-provider", modelID: "test-model" },
})
const queuedInput: LaunchInput = {
agent: pendingTask.agent,
description: pendingTask.description,
model: pendingTask.model,
parentMessageID: pendingTask.parentMessageID,
parentSessionID: pendingTask.parentSessionID,
prompt: pendingTask.prompt,
}
getTaskMap(manager).set(runningTask.id, runningTask)
getTaskMap(manager).set(pendingTask.id, pendingTask)
getPendingByParent(manager).set(runningTask.parentSessionID, new Set([runningTask.id, pendingTask.id]))
getQueuesByKey(manager).set(concurrencyKey, [{ input: queuedInput, task: pendingTask }])
Reflect.set(manager, "startTask", async ({ task }: { task: BackgroundTask; input: LaunchInput }) => {
task.status = "running"
task.startedAt = new Date()
task.sessionID = "session-started-after-cancel"
task.concurrencyKey = concurrencyKey
task.concurrencyGroup = concurrencyKey
})
// when
const cancelled = await manager.cancelTask(runningTask.id, {
abortSession: false,
skipNotification: true,
source: "test",
})
await processKeyForTest(manager, concurrencyKey)
// then
expect(cancelled).toBe(true)
expect(concurrencyManager.getCount(concurrencyKey)).toBe(1)
expect(manager.getTask(pendingTask.id)?.status).toBe("running")
})
})

View File

@@ -4,7 +4,7 @@ import type { BackgroundTask, LaunchInput } from "./types"
export const TASK_TTL_MS = 30 * 60 * 1000
export const MIN_STABILITY_TIME_MS = 10 * 1000
export const DEFAULT_STALE_TIMEOUT_MS = 180_000
export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 600_000
export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
export const MIN_IDLE_TIME_MS = 5000
export const POLLING_INTERVAL_MS = 3000

View File

@@ -0,0 +1,60 @@
declare const require: (name: string) => any
const { describe, expect, test, mock } = require("bun:test")
import { DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS } from "./constants"
import { checkAndInterruptStaleTasks } from "./task-poller"
import type { BackgroundTask } from "./types"
function createRunningTask(startedAt: Date): BackgroundTask {
return {
id: "task-1",
sessionID: "ses-1",
parentSessionID: "parent-ses-1",
parentMessageID: "msg-1",
description: "test",
prompt: "test",
agent: "explore",
status: "running",
startedAt,
progress: undefined,
}
}
describe("DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS", () => {
test("uses a 30 minute default", () => {
// #given
const expectedTimeout = 30 * 60 * 1000
// #when
const timeout = DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS
// #then
expect(timeout).toBe(expectedTimeout)
})
test("does not interrupt a never-updated task after 15 minutes when config is omitted", async () => {
// #given
const task = createRunningTask(new Date(Date.now() - 15 * 60 * 1000))
const client = {
session: {
abort: mock(() => Promise.resolve()),
},
}
const concurrencyManager = {
release: mock(() => {}),
}
const notifyParentSession = mock(() => Promise.resolve())
// #when
await checkAndInterruptStaleTasks({
tasks: [task],
client: client as never,
config: undefined,
concurrencyManager: concurrencyManager as never,
notifyParentSession,
})
// #then
expect(task.status).toBe("running")
})
})

View File

@@ -0,0 +1,97 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test"
import { tmpdir } from "node:os"
import { _resetForTesting, subagentSessions } from "../claude-code-session-state"
import { SessionCategoryRegistry } from "../../shared/session-category-registry"
import { BackgroundManager } from "./manager"
import type { BackgroundTask } from "./types"
function createTask(overrides: Partial<BackgroundTask> & { id: string; sessionID: string }): BackgroundTask {
return {
parentSessionID: "parent-session",
parentMessageID: "parent-message",
description: "test task",
prompt: "test prompt",
agent: "explore",
status: "running",
startedAt: new Date(),
...overrides,
}
}
function createBackgroundManager(): BackgroundManager {
return new BackgroundManager({
client: {
session: {
abort: async () => ({}),
prompt: async () => ({}),
promptAsync: async () => ({}),
},
} as never,
project: {} as never,
directory: tmpdir(),
worktree: tmpdir(),
serverUrl: new URL("https://example.com"),
$: {} as never,
} as never)
}
describe("BackgroundManager shutdown global cleanup", () => {
beforeEach(() => {
// given
_resetForTesting()
SessionCategoryRegistry.clear()
})
afterEach(() => {
// given
_resetForTesting()
SessionCategoryRegistry.clear()
})
test("removes tracked session IDs from subagentSessions and SessionCategoryRegistry on shutdown", async () => {
// given
const runningSessionID = "ses-running-shutdown-cleanup"
const completedSessionID = "ses-completed-shutdown-cleanup"
const unrelatedSessionID = "ses-unrelated-shutdown-cleanup"
const manager = createBackgroundManager()
const tasks = new Map<string, BackgroundTask>([
[
"task-running-shutdown-cleanup",
createTask({
id: "task-running-shutdown-cleanup",
sessionID: runningSessionID,
}),
],
[
"task-completed-shutdown-cleanup",
createTask({
id: "task-completed-shutdown-cleanup",
sessionID: completedSessionID,
status: "completed",
completedAt: new Date(),
}),
],
])
Object.assign(manager, { tasks })
subagentSessions.add(runningSessionID)
subagentSessions.add(completedSessionID)
subagentSessions.add(unrelatedSessionID)
SessionCategoryRegistry.register(runningSessionID, "quick")
SessionCategoryRegistry.register(completedSessionID, "deep")
SessionCategoryRegistry.register(unrelatedSessionID, "test")
// when
await manager.shutdown()
// then
expect(subagentSessions.has(runningSessionID)).toBe(false)
expect(subagentSessions.has(completedSessionID)).toBe(false)
expect(subagentSessions.has(unrelatedSessionID)).toBe(true)
expect(SessionCategoryRegistry.has(runningSessionID)).toBe(false)
expect(SessionCategoryRegistry.has(completedSessionID)).toBe(false)
expect(SessionCategoryRegistry.has(unrelatedSessionID)).toBe(true)
})
})

View File

@@ -1572,6 +1572,189 @@ describe("BackgroundManager.trackTask", () => {
})
})
describe("BackgroundManager.launch and resume cleanup regressions", () => {
test("launch should register pending task under parent before background start finishes", async () => {
//#given
let releaseCreate: (() => void) | undefined
const createGate = new Promise<void>((resolve) => {
releaseCreate = resolve
})
const client = {
session: {
get: async () => ({ data: { directory: "/test/dir" } }),
create: async () => {
await createGate
return { data: { id: "session-launch-pending" } }
},
promptAsync: async () => ({}),
abort: async () => ({}),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
//#when
const task = await manager.launch({
description: "pending registration",
prompt: "launch prompt",
agent: "explore",
parentSessionID: "parent-launch-pending",
parentMessageID: "msg-launch-pending",
})
//#then
expect(getPendingByParent(manager).get("parent-launch-pending")?.has(task.id)).toBe(true)
expect(manager.getTask(task.id)?.status).toBe("pending")
releaseCreate?.()
await flushBackgroundNotifications()
manager.shutdown()
})
test("launch should clean pending bookkeeping and format missing-agent prompt errors", async () => {
//#given
const abortedSessionIDs: string[] = []
const promptAsyncCalls: string[] = []
const client = {
session: {
get: async () => ({ data: { directory: "/test/dir" } }),
create: async () => ({ data: { id: "session-launch-error" } }),
promptAsync: async (args: { path: { id: string } }) => {
promptAsyncCalls.push(args.path.id)
if (args.path.id === "session-launch-error") {
throw new Error("agent.name is undefined")
}
return {}
},
abort: async (args: { path: { id: string } }) => {
abortedSessionIDs.push(args.path.id)
return {}
},
messages: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
//#when
const launchedTask = await manager.launch({
description: "launch prompt error",
prompt: "launch prompt",
agent: "missing-agent",
parentSessionID: "parent-launch-error",
parentMessageID: "msg-launch-error",
})
await flushBackgroundNotifications()
await new Promise((resolve) => setTimeout(resolve, 10))
//#then
const storedTask = manager.getTask(launchedTask.id)
expect(storedTask?.status).toBe("interrupt")
expect(storedTask?.error).toBe('Agent "missing-agent" not found. Make sure the agent is registered in your opencode.json or provided by a plugin.')
expect(storedTask?.concurrencyKey).toBeUndefined()
expect(storedTask?.completedAt).toBeInstanceOf(Date)
expect(getPendingByParent(manager).get("parent-launch-error")).toBeUndefined()
expect(abortedSessionIDs).toContain("session-launch-error")
expect(promptAsyncCalls).toContain("parent-launch-error")
manager.shutdown()
})
test("resume should clean pending bookkeeping and preserve raw prompt errors", async () => {
//#given
const abortedSessionIDs: string[] = []
const promptAsyncCalls: string[] = []
const client = {
session: {
promptAsync: async (args: { path: { id: string } }) => {
promptAsyncCalls.push(args.path.id)
if (args.path.id === "session-resume-error") {
throw new Error("resume prompt exploded")
}
return {}
},
abort: async (args: { path: { id: string } }) => {
abortedSessionIDs.push(args.path.id)
return {}
},
messages: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
const task: BackgroundTask = {
id: "task-resume-error",
sessionID: "session-resume-error",
parentSessionID: "parent-before-resume-error",
parentMessageID: "msg-before-resume-error",
description: "resume prompt error",
prompt: "resume prompt",
agent: "explore",
status: "completed",
startedAt: new Date(),
completedAt: new Date(),
concurrencyGroup: "explore",
}
getTaskMap(manager).set(task.id, task)
//#when
await manager.resume({
sessionId: "session-resume-error",
prompt: "resume now",
parentSessionID: "parent-resume-error",
parentMessageID: "msg-resume-error",
})
await flushBackgroundNotifications()
await new Promise((resolve) => setTimeout(resolve, 10))
//#then
expect(task.status).toBe("interrupt")
expect(task.error).toBe("resume prompt exploded")
expect(task.concurrencyKey).toBeUndefined()
expect(task.completedAt).toBeInstanceOf(Date)
expect(getPendingByParent(manager).get("parent-resume-error")).toBeUndefined()
expect(abortedSessionIDs).toContain("session-resume-error")
expect(promptAsyncCalls).toContain("parent-resume-error")
manager.shutdown()
})
test("trackTask should move pending bookkeeping when parent session changes", async () => {
//#given
const manager = createBackgroundManager()
stubNotifyParentSession(manager)
const existingTask: BackgroundTask = {
id: "task-parent-move",
sessionID: "session-parent-move",
parentSessionID: "parent-before-move",
parentMessageID: "msg-before-move",
description: "tracked external task",
prompt: "",
agent: "task",
status: "running",
startedAt: new Date(),
progress: {
toolCalls: 0,
lastUpdate: new Date(),
},
}
getTaskMap(manager).set(existingTask.id, existingTask)
getPendingByParent(manager).set("parent-before-move", new Set([existingTask.id]))
//#when
await manager.trackTask({
taskId: existingTask.id,
sessionID: existingTask.sessionID!,
parentSessionID: "parent-after-move",
description: existingTask.description,
agent: existingTask.agent,
})
//#then
expect(getPendingByParent(manager).get("parent-before-move")).toBeUndefined()
expect(getPendingByParent(manager).get("parent-after-move")?.has(existingTask.id)).toBe(true)
manager.shutdown()
})
})
describe("BackgroundManager.resume concurrency key", () => {
let manager: BackgroundManager
@@ -2111,6 +2294,254 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
// then
await expect(result).rejects.toThrow("background_task.maxDescendants cannot be enforced safely")
})
test("should release descendant quota when queued task is cancelled before session starts", async () => {
// given
manager.shutdown()
manager = new BackgroundManager(
{
client: createMockClientWithSessionChain({
"session-root": { directory: "/test/dir" },
}),
directory: tmpdir(),
} as unknown as PluginInput,
{ defaultConcurrency: 1, maxDescendants: 2 },
)
const input = {
description: "Test task",
prompt: "Do something",
agent: "test-agent",
parentSessionID: "session-root",
parentMessageID: "parent-message",
}
await manager.launch(input)
const queuedTask = await manager.launch(input)
await new Promise(resolve => setTimeout(resolve, 50))
expect(manager.getTask(queuedTask.id)?.status).toBe("pending")
// when
const cancelled = manager.cancelPendingTask(queuedTask.id)
const replacementTask = await manager.launch(input)
// then
expect(cancelled).toBe(true)
expect(replacementTask.status).toBe("pending")
})
test("should release descendant quota when session creation fails before session starts", async () => {
// given
let createAttempts = 0
manager.shutdown()
manager = new BackgroundManager(
{
client: {
session: {
create: async () => {
createAttempts += 1
if (createAttempts === 1) {
return { error: "session create failed", data: undefined }
}
return { data: { id: `ses_${crypto.randomUUID()}` } }
},
get: async () => ({ data: { directory: "/test/dir" } }),
prompt: async () => ({}),
promptAsync: async () => ({}),
messages: async () => ({ data: [] }),
todo: async () => ({ data: [] }),
status: async () => ({ data: {} }),
abort: async () => ({}),
},
},
directory: tmpdir(),
} as unknown as PluginInput,
{ maxDescendants: 1 },
)
const input = {
description: "Test task",
prompt: "Do something",
agent: "test-agent",
parentSessionID: "session-root",
parentMessageID: "parent-message",
}
await manager.launch(input)
await new Promise(resolve => setTimeout(resolve, 50))
expect(createAttempts).toBe(1)
// when
const retryTask = await manager.launch(input)
// then
expect(retryTask.status).toBe("pending")
})
test("should keep the next queued task when the first task is cancelled during session creation", async () => {
// given
const firstSessionID = "ses-first-cancelled-during-create"
const secondSessionID = "ses-second-survives-queue"
let createCallCount = 0
let resolveFirstCreate: ((value: { data: { id: string } }) => void) | undefined
let resolveFirstCreateStarted: (() => void) | undefined
let resolveSecondPromptAsync: (() => void) | undefined
const firstCreateStarted = new Promise<void>((resolve) => {
resolveFirstCreateStarted = resolve
})
const secondPromptAsyncStarted = new Promise<void>((resolve) => {
resolveSecondPromptAsync = resolve
})
manager.shutdown()
manager = new BackgroundManager(
{
client: {
session: {
create: async () => {
createCallCount += 1
if (createCallCount === 1) {
resolveFirstCreateStarted?.()
return await new Promise<{ data: { id: string } }>((resolve) => {
resolveFirstCreate = resolve
})
}
return { data: { id: secondSessionID } }
},
get: async () => ({ data: { directory: "/test/dir" } }),
prompt: async () => ({}),
promptAsync: async ({ path }: { path: { id: string } }) => {
if (path.id === secondSessionID) {
resolveSecondPromptAsync?.()
}
return {}
},
messages: async () => ({ data: [] }),
todo: async () => ({ data: [] }),
status: async () => ({ data: {} }),
abort: async () => ({}),
},
},
directory: tmpdir(),
} as unknown as PluginInput,
{ defaultConcurrency: 1 }
)
const input = {
description: "Test task",
prompt: "Do something",
agent: "test-agent",
parentSessionID: "parent-session",
parentMessageID: "parent-message",
}
const firstTask = await manager.launch(input)
const secondTask = await manager.launch(input)
await firstCreateStarted
// when
const cancelled = await manager.cancelTask(firstTask.id, {
source: "test",
abortSession: false,
})
resolveFirstCreate?.({ data: { id: firstSessionID } })
await Promise.race([
secondPromptAsyncStarted,
new Promise<never>((_, reject) => setTimeout(() => reject(new Error("timeout")), 100)),
])
// then
expect(cancelled).toBe(true)
expect(createCallCount).toBe(2)
expect(manager.getTask(firstTask.id)?.status).toBe("cancelled")
expect(manager.getTask(secondTask.id)?.status).toBe("running")
expect(manager.getTask(secondTask.id)?.sessionID).toBe(secondSessionID)
})
test("should keep task cancelled and abort the session when cancellation wins during session creation", async () => {
// given
const createdSessionID = "ses-cancelled-during-create"
let resolveCreate: ((value: { data: { id: string } }) => void) | undefined
let resolveCreateStarted: (() => void) | undefined
let resolveAbortCalled: (() => void) | undefined
const createStarted = new Promise<void>((resolve) => {
resolveCreateStarted = resolve
})
const abortCalled = new Promise<void>((resolve) => {
resolveAbortCalled = resolve
})
const abortCalls: string[] = []
const promptAsyncSessionIDs: string[] = []
manager.shutdown()
manager = new BackgroundManager(
{
client: {
session: {
create: async () => {
resolveCreateStarted?.()
return await new Promise<{ data: { id: string } }>((resolve) => {
resolveCreate = resolve
})
},
get: async () => ({ data: { directory: "/test/dir" } }),
prompt: async () => ({}),
promptAsync: async ({ path }: { path: { id: string } }) => {
promptAsyncSessionIDs.push(path.id)
return {}
},
messages: async () => ({ data: [] }),
todo: async () => ({ data: [] }),
status: async () => ({ data: {} }),
abort: async ({ path }: { path: { id: string } }) => {
abortCalls.push(path.id)
resolveAbortCalled?.()
return {}
},
},
},
directory: tmpdir(),
} as unknown as PluginInput,
{ defaultConcurrency: 1 }
)
const input = {
description: "Test task",
prompt: "Do something",
agent: "test-agent",
parentSessionID: "parent-session",
parentMessageID: "parent-message",
}
const task = await manager.launch(input)
await createStarted
// when
const cancelled = await manager.cancelTask(task.id, {
source: "test",
abortSession: false,
})
resolveCreate?.({ data: { id: createdSessionID } })
await Promise.race([
abortCalled,
new Promise<never>((_, reject) => setTimeout(() => reject(new Error("timeout")), 100)),
])
await Promise.resolve()
// then
const updatedTask = manager.getTask(task.id)
expect(cancelled).toBe(true)
expect(updatedTask?.status).toBe("cancelled")
expect(updatedTask?.sessionID).toBeUndefined()
expect(promptAsyncSessionIDs).not.toContain(createdSessionID)
expect(abortCalls).toEqual([createdSessionID])
expect(getConcurrencyManager(manager).getCount("test-agent")).toBe(0)
})
})
describe("pending task can be cancelled", () => {
@@ -3785,7 +4216,7 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
manager.shutdown()
})
test("should start cleanup timers only after all tasks complete", async () => {
test("should start per-task cleanup timers independently of sibling completion", async () => {
// given
const client = {
session: {
@@ -3832,7 +4263,7 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
// then
const completionTimers = getCompletionTimers(manager)
expect(completionTimers.size).toBe(0)
expect(completionTimers.size).toBe(1)
// when
await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })

View File

@@ -116,14 +116,16 @@ export class BackgroundManager {
private config?: BackgroundTaskConfig
private tmuxEnabled: boolean
private onSubagentSessionCreated?: OnSubagentSessionCreated
private onShutdown?: () => void
private onShutdown?: () => void | Promise<void>
private queuesByKey: Map<string, QueueItem[]> = new Map()
private processingKeys: Set<string> = new Set()
private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
private completedTaskSummaries: Map<string, Array<{id: string, description: string}>> = new Map()
private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
private notificationQueueByParent: Map<string, Promise<void>> = new Map()
private rootDescendantCounts: Map<string, number>
private preStartDescendantReservations: Set<string>
private enableParentSessionNotifications: boolean
readonly taskHistory = new TaskHistory()
@@ -133,7 +135,7 @@ export class BackgroundManager {
options?: {
tmuxConfig?: TmuxConfig
onSubagentSessionCreated?: OnSubagentSessionCreated
onShutdown?: () => void
onShutdown?: () => void | Promise<void>
enableParentSessionNotifications?: boolean
}
) {
@@ -149,6 +151,7 @@ export class BackgroundManager {
this.onSubagentSessionCreated = options?.onSubagentSessionCreated
this.onShutdown = options?.onShutdown
this.rootDescendantCounts = new Map()
this.preStartDescendantReservations = new Set()
this.enableParentSessionNotifications = options?.enableParentSessionNotifications ?? true
this.registerProcessCleanup()
}
@@ -219,6 +222,26 @@ export class BackgroundManager {
this.rootDescendantCounts.set(rootSessionID, currentCount - 1)
}
private markPreStartDescendantReservation(task: BackgroundTask): void {
this.preStartDescendantReservations.add(task.id)
}
private settlePreStartDescendantReservation(task: BackgroundTask): void {
this.preStartDescendantReservations.delete(task.id)
}
private rollbackPreStartDescendantReservation(task: BackgroundTask): void {
if (!this.preStartDescendantReservations.delete(task.id)) {
return
}
if (!task.rootSessionID) {
return
}
this.unregisterRootDescendant(task.rootSessionID)
}
async launch(input: LaunchInput): Promise<BackgroundTask> {
log("[background-agent] launch() called with:", {
agent: input.agent,
@@ -268,11 +291,7 @@ export class BackgroundManager {
this.taskHistory.record(input.parentSessionID, { id: task.id, agent: input.agent, description: input.description, status: "pending", category: input.category })
// Track for batched notifications immediately (pending state)
if (input.parentSessionID) {
const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
pending.add(task.id)
this.pendingByParent.set(input.parentSessionID, pending)
}
this.registerPendingTaskForParent(input.parentSessionID, task.id)
// Add to queue
const key = this.getConcurrencyKeyFromInput(input)
@@ -295,6 +314,7 @@ export class BackgroundManager {
}
spawnReservation.commit()
this.markPreStartDescendantReservation(task)
// Trigger processing (fire-and-forget)
this.processKey(key)
@@ -316,13 +336,16 @@ export class BackgroundManager {
try {
const queue = this.queuesByKey.get(key)
while (queue && queue.length > 0) {
const item = queue[0]
const item = queue.shift()
if (!item) {
continue
}
await this.concurrencyManager.acquire(key)
if (item.task.status === "cancelled" || item.task.status === "error" || item.task.status === "interrupt") {
this.rollbackPreStartDescendantReservation(item.task)
this.concurrencyManager.release(key)
queue.shift()
continue
}
@@ -330,6 +353,7 @@ export class BackgroundManager {
await this.startTask(item)
} catch (error) {
log("[background-agent] Error starting task:", error)
this.rollbackPreStartDescendantReservation(item.task)
if (item.task.concurrencyKey) {
this.concurrencyManager.release(item.task.concurrencyKey)
item.task.concurrencyKey = undefined
@@ -337,8 +361,6 @@ export class BackgroundManager {
this.concurrencyManager.release(key)
}
}
queue.shift()
}
} finally {
this.processingKeys.delete(key)
@@ -385,6 +407,18 @@ export class BackgroundManager {
}
const sessionID = createResult.data.id
if (task.status === "cancelled") {
await this.client.session.abort({
path: { id: sessionID },
}).catch((error) => {
log("[background-agent] Failed to abort cancelled pre-start session:", error)
})
this.concurrencyManager.release(concurrencyKey)
return
}
this.settlePreStartDescendantReservation(task)
subagentSessions.add(sessionID)
log("[background-agent] tmux callback check", {
@@ -443,59 +477,21 @@ export class BackgroundManager {
// Include model if caller provided one (e.g., from Sisyphus category configs)
// IMPORTANT: variant must be a top-level field in the body, NOT nested inside model
// OpenCode's PromptInput schema expects: { model: { providerID, modelID }, variant: "max" }
const launchModel = input.model
? { providerID: input.model.providerID, modelID: input.model.modelID }
: undefined
const launchVariant = input.model?.variant
promptWithModelSuggestionRetry(this.client, {
path: { id: sessionID },
body: {
body: this.buildTaskPromptBody({
sessionID,
agent: input.agent,
...(launchModel ? { model: launchModel } : {}),
...(launchVariant ? { variant: launchVariant } : {}),
system: input.skillContent,
tools: (() => {
const tools = {
task: false,
call_omo_agent: true,
question: false,
...getAgentToolRestrictions(input.agent),
}
setSessionTools(sessionID, tools)
return tools
})(),
parts: [createInternalAgentTextPart(input.prompt)],
},
model: input.model,
skillContent: input.skillContent,
prompt: input.prompt,
}),
}).catch((error) => {
log("[background-agent] promptAsync error:", error)
const existingTask = this.findBySession(sessionID)
if (existingTask) {
existingTask.status = "interrupt"
const errorMessage = error instanceof Error ? error.message : String(error)
if (errorMessage.includes("agent.name") || errorMessage.includes("undefined")) {
existingTask.error = `Agent "${input.agent}" not found. Make sure the agent is registered in your opencode.json or provided by a plugin.`
} else {
existingTask.error = errorMessage
}
existingTask.completedAt = new Date()
if (existingTask.concurrencyKey) {
this.concurrencyManager.release(existingTask.concurrencyKey)
existingTask.concurrencyKey = undefined
}
removeTaskToastTracking(existingTask.id)
// Abort the session to prevent infinite polling hang
this.client.session.abort({
path: { id: sessionID },
}).catch(() => {})
this.markForNotification(existingTask)
this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => {
log("[background-agent] Failed to notify on error:", err)
})
}
this.handlePromptDispatchError(task, error, {
agentName: input.agent,
errorLogLabel: "[background-agent] promptAsync error:",
notifyLogLabel: "[background-agent] Failed to notify on error:",
})
})
}
@@ -544,6 +540,95 @@ export class BackgroundManager {
return input.agent
}
private registerPendingTaskForParent(parentSessionID: string | undefined, taskId: string): void {
if (!parentSessionID) {
return
}
const pending = this.pendingByParent.get(parentSessionID) ?? new Set<string>()
pending.add(taskId)
this.pendingByParent.set(parentSessionID, pending)
}
private buildTaskPromptTools(sessionID: string, agent: string): Record<string, boolean> {
const tools = {
task: false,
call_omo_agent: true,
question: false,
...getAgentToolRestrictions(agent),
}
setSessionTools(sessionID, tools)
return tools
}
private buildTaskPromptBody(options: {
sessionID: string
agent: string
model?: BackgroundTask["model"]
skillContent?: string
prompt: string
}): {
agent: string
model?: { providerID: string; modelID: string }
variant?: string
system?: string
tools: Record<string, boolean>
parts: ReturnType<typeof createInternalAgentTextPart>[]
} {
const model = options.model
? { providerID: options.model.providerID, modelID: options.model.modelID }
: undefined
const variant = options.model?.variant
return {
agent: options.agent,
...(model ? { model } : {}),
...(variant ? { variant } : {}),
...(options.skillContent ? { system: options.skillContent } : {}),
tools: this.buildTaskPromptTools(options.sessionID, options.agent),
parts: [createInternalAgentTextPart(options.prompt)],
}
}
private handlePromptDispatchError(
task: BackgroundTask,
error: unknown,
options: {
agentName?: string
errorLogLabel: string
notifyLogLabel: string
}
): void {
log(options.errorLogLabel, error)
task.status = "interrupt"
const errorMessage = error instanceof Error ? error.message : String(error)
if (options.agentName && (errorMessage.includes("agent.name") || errorMessage.includes("undefined"))) {
task.error = `Agent "${options.agentName}" not found. Make sure the agent is registered in your opencode.json or provided by a plugin.`
} else {
task.error = errorMessage
}
task.completedAt = new Date()
if (task.concurrencyKey) {
this.concurrencyManager.release(task.concurrencyKey)
task.concurrencyKey = undefined
}
removeTaskToastTracking(task.id)
if (task.sessionID) {
this.client.session.abort({
path: { id: task.sessionID },
}).catch(() => {})
}
this.markForNotification(task)
this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)).catch(err => {
log(options.notifyLogLabel, err)
})
}
/**
* Track a task created elsewhere (e.g., from task) for notification tracking.
* This allows tasks created by other tools to receive the same toast/prompt notifications.
@@ -580,9 +665,7 @@ export class BackgroundManager {
// Track for batched notifications if task is pending or running
if (existingTask.status === "pending" || existingTask.status === "running") {
const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
pending.add(existingTask.id)
this.pendingByParent.set(input.parentSessionID, pending)
this.registerPendingTaskForParent(input.parentSessionID, existingTask.id)
} else if (!parentChanged) {
// Only clean up if parent didn't change (already cleaned above if it did)
this.cleanupPendingByParent(existingTask)
@@ -624,11 +707,7 @@ export class BackgroundManager {
this.startPolling()
this.taskHistory.record(input.parentSessionID, { id: task.id, sessionID: input.sessionID, agent: input.agent || "task", description: input.description, status: "running", startedAt: task.startedAt })
if (input.parentSessionID) {
const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
pending.add(task.id)
this.pendingByParent.set(input.parentSessionID, pending)
}
this.registerPendingTaskForParent(input.parentSessionID, task.id)
log("[background-agent] Registered external task:", { taskId: task.id, sessionID: input.sessionID })
@@ -690,11 +769,7 @@ export class BackgroundManager {
subagentSessions.add(existingTask.sessionID)
}
if (input.parentSessionID) {
const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
pending.add(existingTask.id)
this.pendingByParent.set(input.parentSessionID, pending)
}
this.registerPendingTaskForParent(input.parentSessionID, existingTask.id)
const toastManager = getTaskToastManager()
if (toastManager) {
@@ -718,54 +793,18 @@ export class BackgroundManager {
// Fire-and-forget prompt via promptAsync (no response body needed)
// Include model if task has one (preserved from original launch with category config)
// variant must be top-level in body, not nested inside model (OpenCode PromptInput schema)
const resumeModel = existingTask.model
? { providerID: existingTask.model.providerID, modelID: existingTask.model.modelID }
: undefined
const resumeVariant = existingTask.model?.variant
this.client.session.promptAsync({
path: { id: existingTask.sessionID },
body: {
body: this.buildTaskPromptBody({
sessionID: existingTask.sessionID,
agent: existingTask.agent,
...(resumeModel ? { model: resumeModel } : {}),
...(resumeVariant ? { variant: resumeVariant } : {}),
tools: (() => {
const tools = {
task: false,
call_omo_agent: true,
question: false,
...getAgentToolRestrictions(existingTask.agent),
}
setSessionTools(existingTask.sessionID!, tools)
return tools
})(),
parts: [createInternalAgentTextPart(input.prompt)],
},
model: existingTask.model,
prompt: input.prompt,
}),
}).catch((error) => {
log("[background-agent] resume prompt error:", error)
existingTask.status = "interrupt"
const errorMessage = error instanceof Error ? error.message : String(error)
existingTask.error = errorMessage
existingTask.completedAt = new Date()
// Release concurrency on error to prevent slot leaks
if (existingTask.concurrencyKey) {
this.concurrencyManager.release(existingTask.concurrencyKey)
existingTask.concurrencyKey = undefined
}
removeTaskToastTracking(existingTask.id)
// Abort the session to prevent infinite polling hang
if (existingTask.sessionID) {
this.client.session.abort({
path: { id: existingTask.sessionID },
}).catch(() => {})
}
this.markForNotification(existingTask)
this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => {
log("[background-agent] Failed to notify on resume error:", err)
this.handlePromptDispatchError(existingTask, error, {
errorLogLabel: "[background-agent] resume prompt error:",
notifyLogLabel: "[background-agent] Failed to notify on resume error:",
})
})
@@ -906,6 +945,13 @@ export class BackgroundManager {
this.idleDeferralTimers.delete(task.id)
}
this.cleanupPendingByParent(task)
this.clearNotificationsForTask(task.id)
const toastManager = getTaskToastManager()
if (toastManager) {
toastManager.removeTask(task.id)
}
this.scheduleTaskRemoval(task.id)
if (task.sessionID) {
SessionCategoryRegistry.remove(task.sessionID)
}
@@ -932,7 +978,12 @@ export class BackgroundManager {
this.pendingNotifications.delete(sessionID)
if (tasksToCancel.size === 0) return
if (tasksToCancel.size === 0) {
this.clearTaskHistoryWhenParentTasksGone(sessionID)
return
}
const parentSessionsToClear = new Set<string>()
const deletedSessionIDs = new Set<string>([sessionID])
for (const task of tasksToCancel.values()) {
@@ -942,6 +993,8 @@ export class BackgroundManager {
}
for (const task of tasksToCancel.values()) {
parentSessionsToClear.add(task.parentSessionID)
if (task.status === "running" || task.status === "pending") {
void this.cancelTask(task.id, {
source: "session.deleted",
@@ -959,6 +1012,10 @@ export class BackgroundManager {
}
}
for (const parentSessionID of parentSessionsToClear) {
this.clearTaskHistoryWhenParentTasksGone(parentSessionID)
}
this.rootDescendantCounts.delete(sessionID)
SessionCategoryRegistry.remove(sessionID)
}
@@ -1125,6 +1182,39 @@ export class BackgroundManager {
}
}
private clearTaskHistoryWhenParentTasksGone(parentSessionID: string | undefined): void {
if (!parentSessionID) return
if (this.getTasksByParentSession(parentSessionID).length > 0) return
this.taskHistory.clearSession(parentSessionID)
this.completedTaskSummaries.delete(parentSessionID)
}
private scheduleTaskRemoval(taskId: string): void {
const existingTimer = this.completionTimers.get(taskId)
if (existingTimer) {
clearTimeout(existingTimer)
this.completionTimers.delete(taskId)
}
const timer = setTimeout(() => {
this.completionTimers.delete(taskId)
const task = this.tasks.get(taskId)
if (task) {
this.clearNotificationsForTask(taskId)
this.tasks.delete(taskId)
this.clearTaskHistoryWhenParentTasksGone(task.parentSessionID)
if (task.sessionID) {
subagentSessions.delete(task.sessionID)
SessionCategoryRegistry.remove(task.sessionID)
}
log("[background-agent] Removed completed task from memory:", taskId)
this.clearTaskHistoryWhenParentTasksGone(task?.parentSessionID)
}
}, TASK_CLEANUP_DELAY_MS)
this.completionTimers.set(taskId, timer)
}
async cancelTask(
taskId: string,
options?: { source?: string; reason?: string; abortSession?: boolean; skipNotification?: boolean }
@@ -1152,6 +1242,7 @@ export class BackgroundManager {
}
}
}
this.rollbackPreStartDescendantReservation(task)
log("[background-agent] Cancelled pending task:", { taskId, key })
}
@@ -1190,6 +1281,8 @@ export class BackgroundManager {
removeTaskToastTracking(task.id)
if (options?.skipNotification) {
this.cleanupPendingByParent(task)
this.scheduleTaskRemoval(task.id)
log(`[background-agent] Task cancelled via ${source} (notification skipped):`, task.id)
return true
}
@@ -1328,6 +1421,14 @@ export class BackgroundManager {
})
}
if (!this.completedTaskSummaries.has(task.parentSessionID)) {
this.completedTaskSummaries.set(task.parentSessionID, [])
}
this.completedTaskSummaries.get(task.parentSessionID)!.push({
id: task.id,
description: task.description,
})
// Update pending tracking and check if all tasks complete
const pendingSet = this.pendingByParent.get(task.parentSessionID)
let allComplete = false
@@ -1347,10 +1448,13 @@ export class BackgroundManager {
}
const completedTasks = allComplete
? Array.from(this.tasks.values())
.filter(t => t.parentSessionID === task.parentSessionID && t.status !== "running" && t.status !== "pending")
? (this.completedTaskSummaries.get(task.parentSessionID) ?? [{ id: task.id, description: task.description }])
: []
if (allComplete) {
this.completedTaskSummaries.delete(task.parentSessionID)
}
const statusText = task.status === "completed"
? "COMPLETED"
: task.status === "interrupt"
@@ -1480,29 +1584,8 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
})
}
if (allComplete) {
for (const completedTask of completedTasks) {
const taskId = completedTask.id
const existingTimer = this.completionTimers.get(taskId)
if (existingTimer) {
clearTimeout(existingTimer)
this.completionTimers.delete(taskId)
}
const timer = setTimeout(() => {
this.completionTimers.delete(taskId)
const taskToRemove = this.tasks.get(taskId)
if (taskToRemove) {
this.clearNotificationsForTask(taskId)
if (taskToRemove.sessionID) {
subagentSessions.delete(taskToRemove.sessionID)
SessionCategoryRegistry.remove(taskToRemove.sessionID)
}
this.tasks.delete(taskId)
log("[background-agent] Removed completed task from memory:", taskId)
}
}, TASK_CLEANUP_DELAY_MS)
this.completionTimers.set(taskId, timer)
}
if (task.status !== "running" && task.status !== "pending") {
this.scheduleTaskRemoval(task.id)
}
}
@@ -1554,6 +1637,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
}
}
}
this.cleanupPendingByParent(task)
this.markForNotification(task)
this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)).catch(err => {
log("[background-agent] Error in notifyParentSession for stale-pruned task:", { taskId: task.id, error: err })
@@ -1657,14 +1741,19 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
* Cancels all pending concurrency waiters and clears timers.
* Should be called when the plugin is unloaded.
*/
shutdown(): void {
async shutdown(): Promise<void> {
if (this.shutdownTriggered) return
this.shutdownTriggered = true
log("[background-agent] Shutting down BackgroundManager")
this.stopPolling()
const trackedSessionIDs = new Set<string>()
// Abort all running sessions to prevent zombie processes (#1240)
for (const task of this.tasks.values()) {
if (task.sessionID) {
trackedSessionIDs.add(task.sessionID)
}
if (task.status === "running" && task.sessionID) {
this.client.session.abort({
path: { id: task.sessionID },
@@ -1675,7 +1764,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
// Notify shutdown listeners (e.g., tmux cleanup)
if (this.onShutdown) {
try {
this.onShutdown()
await this.onShutdown()
} catch (error) {
log("[background-agent] Error in onShutdown callback:", error)
}
@@ -1699,6 +1788,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
}
this.idleDeferralTimers.clear()
for (const sessionID of trackedSessionIDs) {
subagentSessions.delete(sessionID)
SessionCategoryRegistry.remove(sessionID)
}
this.concurrencyManager.clear()
this.tasks.clear()
this.notifications.clear()
@@ -1708,6 +1802,8 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
this.rootDescendantCounts.clear()
this.queuesByKey.clear()
this.processingKeys.clear()
this.taskHistory.clearAll()
this.completedTaskSummaries.clear()
this.unregisterProcessCleanup()
log("[background-agent] Shutdown complete")

View File

@@ -19,7 +19,7 @@ function registerProcessSignal(
}
interface CleanupTarget {
shutdown(): void
shutdown(): void | Promise<void>
}
const cleanupManagers = new Set<CleanupTarget>()
@@ -35,7 +35,9 @@ export function registerManagerForCleanup(manager: CleanupTarget): void {
const cleanupAll = () => {
for (const m of cleanupManagers) {
try {
m.shutdown()
void Promise.resolve(m.shutdown()).catch((error) => {
log("[background-agent] Error during async shutdown cleanup:", error)
})
} catch (error) {
log("[background-agent] Error during shutdown cleanup:", error)
}

View File

@@ -0,0 +1,245 @@
declare const require: (name: string) => any
const { describe, test, expect, afterEach } = require("bun:test")
import { tmpdir } from "node:os"
import type { PluginInput } from "@opencode-ai/plugin"
import { TASK_CLEANUP_DELAY_MS } from "./constants"
import { BackgroundManager } from "./manager"
import type { BackgroundTask } from "./types"
type PromptAsyncCall = {
path: { id: string }
body: {
noReply?: boolean
parts?: unknown[]
}
}
type FakeTimers = {
getDelay: (timer: ReturnType<typeof setTimeout>) => number | undefined
run: (timer: ReturnType<typeof setTimeout>) => void
restore: () => void
}
let managerUnderTest: BackgroundManager | undefined
let fakeTimers: FakeTimers | undefined
afterEach(() => {
managerUnderTest?.shutdown()
fakeTimers?.restore()
managerUnderTest = undefined
fakeTimers = undefined
})
function createTask(overrides: Partial<BackgroundTask> & { id: string; parentSessionID: string }): BackgroundTask {
const id = overrides.id
const parentSessionID = overrides.parentSessionID
const { id: _ignoredID, parentSessionID: _ignoredParentSessionID, ...rest } = overrides
return {
parentMessageID: overrides.parentMessageID ?? "parent-message-id",
description: overrides.description ?? overrides.id,
prompt: overrides.prompt ?? `Prompt for ${overrides.id}`,
agent: overrides.agent ?? "test-agent",
status: overrides.status ?? "running",
startedAt: overrides.startedAt ?? new Date("2026-03-11T00:00:00.000Z"),
...rest,
id,
parentSessionID,
}
}
function createManager(enableParentSessionNotifications: boolean): {
manager: BackgroundManager
promptAsyncCalls: PromptAsyncCall[]
} {
const promptAsyncCalls: PromptAsyncCall[] = []
const client = {
session: {
messages: async () => [],
prompt: async () => ({}),
promptAsync: async (call: PromptAsyncCall) => {
promptAsyncCalls.push(call)
return {}
},
abort: async () => ({}),
},
}
const placeholderClient = {} as PluginInput["client"]
const ctx: PluginInput = {
client: placeholderClient,
project: {} as PluginInput["project"],
directory: tmpdir(),
worktree: tmpdir(),
serverUrl: new URL("http://localhost"),
$: {} as PluginInput["$"],
}
const manager = new BackgroundManager(
ctx,
undefined,
{ enableParentSessionNotifications }
)
Reflect.set(manager, "client", client)
return { manager, promptAsyncCalls }
}
function installFakeTimers(): FakeTimers {
const originalSetTimeout = globalThis.setTimeout
const originalClearTimeout = globalThis.clearTimeout
const callbacks = new Map<ReturnType<typeof setTimeout>, () => void>()
const delays = new Map<ReturnType<typeof setTimeout>, number>()
globalThis.setTimeout = ((handler: Parameters<typeof setTimeout>[0], delay?: number, ...args: unknown[]): ReturnType<typeof setTimeout> => {
if (typeof handler !== "function") {
throw new Error("Expected function timeout handler")
}
const timer = originalSetTimeout(() => {}, 60_000)
originalClearTimeout(timer)
const callback = handler as (...callbackArgs: Array<unknown>) => void
callbacks.set(timer, () => callback(...args))
delays.set(timer, delay ?? 0)
return timer
}) as typeof setTimeout
globalThis.clearTimeout = ((timer: ReturnType<typeof setTimeout>): void => {
callbacks.delete(timer)
delays.delete(timer)
}) as typeof clearTimeout
return {
getDelay(timer) {
return delays.get(timer)
},
run(timer) {
const callback = callbacks.get(timer)
if (!callback) {
throw new Error(`Timer not found: ${String(timer)}`)
}
callbacks.delete(timer)
delays.delete(timer)
callback()
},
restore() {
globalThis.setTimeout = originalSetTimeout
globalThis.clearTimeout = originalClearTimeout
},
}
}
function getTasks(manager: BackgroundManager): Map<string, BackgroundTask> {
return Reflect.get(manager, "tasks") as Map<string, BackgroundTask>
}
function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>> {
return Reflect.get(manager, "pendingByParent") as Map<string, Set<string>>
}
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
return Reflect.get(manager, "completionTimers") as Map<string, ReturnType<typeof setTimeout>>
}
async function notifyParentSessionForTest(manager: BackgroundManager, task: BackgroundTask): Promise<void> {
const notifyParentSession = Reflect.get(manager, "notifyParentSession") as (task: BackgroundTask) => Promise<void>
return notifyParentSession.call(manager, task)
}
function getRequiredTimer(manager: BackgroundManager, taskID: string): ReturnType<typeof setTimeout> {
const timer = getCompletionTimers(manager).get(taskID)
expect(timer).toBeDefined()
if (timer === undefined) {
throw new Error(`Missing completion timer for ${taskID}`)
}
return timer
}
describe("BackgroundManager.notifyParentSession cleanup scheduling", () => {
describe("#given 2 tasks for same parent and task A completed", () => {
test("#when task B is still running #then task A is cleaned up from this.tasks after delay even though task B is not done", async () => {
// given
const { manager } = createManager(false)
managerUnderTest = manager
fakeTimers = installFakeTimers()
const taskA = createTask({ id: "task-a", parentSessionID: "parent-1", description: "task A", status: "completed", completedAt: new Date("2026-03-11T00:01:00.000Z") })
const taskB = createTask({ id: "task-b", parentSessionID: "parent-1", description: "task B", status: "running" })
getTasks(manager).set(taskA.id, taskA)
getTasks(manager).set(taskB.id, taskB)
getPendingByParent(manager).set(taskA.parentSessionID, new Set([taskA.id, taskB.id]))
// when
await notifyParentSessionForTest(manager, taskA)
const taskATimer = getRequiredTimer(manager, taskA.id)
expect(fakeTimers.getDelay(taskATimer)).toBe(TASK_CLEANUP_DELAY_MS)
fakeTimers.run(taskATimer)
// then
expect(fakeTimers.getDelay(taskATimer)).toBeUndefined()
expect(getTasks(manager).has(taskA.id)).toBe(false)
expect(getTasks(manager).get(taskB.id)).toBe(taskB)
})
})
describe("#given 2 tasks for same parent and both completed", () => {
test("#when the second completion notification is sent #then ALL BACKGROUND TASKS COMPLETE notification still works correctly", async () => {
// given
const { manager, promptAsyncCalls } = createManager(true)
managerUnderTest = manager
fakeTimers = installFakeTimers()
const taskA = createTask({ id: "task-a", parentSessionID: "parent-1", description: "task A", status: "completed", completedAt: new Date("2026-03-11T00:01:00.000Z") })
const taskB = createTask({ id: "task-b", parentSessionID: "parent-1", description: "task B", status: "running" })
getTasks(manager).set(taskA.id, taskA)
getTasks(manager).set(taskB.id, taskB)
getPendingByParent(manager).set(taskA.parentSessionID, new Set([taskA.id, taskB.id]))
await notifyParentSessionForTest(manager, taskA)
taskB.status = "completed"
taskB.completedAt = new Date("2026-03-11T00:02:00.000Z")
// when
await notifyParentSessionForTest(manager, taskB)
// then
expect(promptAsyncCalls).toHaveLength(2)
expect(getCompletionTimers(manager).size).toBe(2)
const allCompleteCall = promptAsyncCalls[1]
expect(allCompleteCall).toBeDefined()
if (!allCompleteCall) {
throw new Error("Missing all-complete notification call")
}
expect(allCompleteCall.body.noReply).toBe(false)
const allCompletePayload = JSON.stringify(allCompleteCall.body.parts)
expect(allCompletePayload).toContain("ALL BACKGROUND TASKS COMPLETE")
expect(allCompletePayload).toContain(taskA.id)
expect(allCompletePayload).toContain(taskB.id)
expect(allCompletePayload).toContain(taskA.description)
expect(allCompletePayload).toContain(taskB.description)
})
})
describe("#given a completed task with cleanup timer scheduled", () => {
test("#when cleanup timer fires #then task is deleted from this.tasks Map", async () => {
// given
const { manager } = createManager(false)
managerUnderTest = manager
fakeTimers = installFakeTimers()
const task = createTask({ id: "task-a", parentSessionID: "parent-1", description: "task A", status: "completed", completedAt: new Date("2026-03-11T00:01:00.000Z") })
getTasks(manager).set(task.id, task)
getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
await notifyParentSessionForTest(manager, task)
const cleanupTimer = getRequiredTimer(manager, task.id)
// when
expect(fakeTimers.getDelay(cleanupTimer)).toBe(TASK_CLEANUP_DELAY_MS)
fakeTimers.run(cleanupTimer)
// then
expect(getCompletionTimers(manager).has(task.id)).toBe(false)
expect(getTasks(manager).has(task.id)).toBe(false)
})
})
})

View File

@@ -0,0 +1,142 @@
import { afterEach, describe, expect, test } from "bun:test"
import { tmpdir } from "node:os"
import type { PluginInput } from "@opencode-ai/plugin"
import { BackgroundManager } from "./manager"
import { TaskHistory } from "./task-history"
import type { BackgroundTask } from "./types"
let managerUnderTest: BackgroundManager | undefined
afterEach(() => {
managerUnderTest?.shutdown()
managerUnderTest = undefined
})
function createManager(): BackgroundManager {
const client = {
session: {
abort: async () => ({}),
},
}
const placeholderClient = {} as PluginInput["client"]
const ctx: PluginInput = {
client: placeholderClient,
project: {} as PluginInput["project"],
directory: tmpdir(),
worktree: tmpdir(),
serverUrl: new URL("http://localhost"),
$: {} as PluginInput["$"],
}
const manager = new BackgroundManager(ctx)
Reflect.set(manager, "client", client)
return manager
}
function createTask(overrides: Partial<BackgroundTask> & { id: string; parentSessionID: string }): BackgroundTask {
const { id, parentSessionID, ...rest } = overrides
return {
...rest,
id,
parentSessionID,
parentMessageID: rest.parentMessageID ?? "parent-message-id",
description: rest.description ?? id,
prompt: rest.prompt ?? `Prompt for ${id}`,
agent: rest.agent ?? "test-agent",
status: rest.status ?? "running",
startedAt: rest.startedAt ?? new Date("2026-03-11T00:00:00.000Z"),
}
}
function getTaskMap(manager: BackgroundManager): Map<string, BackgroundTask> {
return Reflect.get(manager, "tasks") as Map<string, BackgroundTask>
}
function pruneStaleTasksAndNotificationsForTest(manager: BackgroundManager): void {
const pruneStaleTasksAndNotifications = Reflect.get(manager, "pruneStaleTasksAndNotifications") as () => void
pruneStaleTasksAndNotifications.call(manager)
}
describe("task history cleanup", () => {
test("#given TaskHistory with entries for multiple parents #when clearSession called for one parent #then only that parent's entries are removed, others remain", () => {
// given
const history = new TaskHistory()
history.record("parent-1", { id: "task-1", agent: "explore", description: "task 1", status: "pending" })
history.record("parent-2", { id: "task-2", agent: "oracle", description: "task 2", status: "running" })
// when
history.clearSession("parent-1")
// then
expect(history.getByParentSession("parent-1")).toHaveLength(0)
expect(history.getByParentSession("parent-2")).toHaveLength(1)
})
test("#given TaskHistory with entries for multiple parents #when clearAll called #then all entries are removed", () => {
// given
const history = new TaskHistory()
history.record("parent-1", { id: "task-1", agent: "explore", description: "task 1", status: "pending" })
history.record("parent-2", { id: "task-2", agent: "oracle", description: "task 2", status: "running" })
// when
history.clearAll()
// then
expect(history.getByParentSession("parent-1")).toHaveLength(0)
expect(history.getByParentSession("parent-2")).toHaveLength(0)
})
test("#given BackgroundManager with taskHistory entries #when shutdown() called #then taskHistory is cleared via clearAll()", () => {
// given
const manager = createManager()
managerUnderTest = manager
manager.taskHistory.record("parent-1", { id: "task-1", agent: "explore", description: "task 1", status: "pending" })
let clearAllCalls = 0
const originalClearAll = manager.taskHistory.clearAll.bind(manager.taskHistory)
manager.taskHistory.clearAll = (): void => {
clearAllCalls += 1
originalClearAll()
}
// when
manager.shutdown()
// then
expect(clearAllCalls).toBe(1)
expect(manager.taskHistory.getByParentSession("parent-1")).toHaveLength(0)
managerUnderTest = undefined
})
test("#given BackgroundManager with stale tasks for one parent #when pruneStaleTasksAndNotifications() runs #then history is preserved until delayed cleanup", () => {
// given
const manager = createManager()
managerUnderTest = manager
const staleTask = createTask({
id: "task-stale",
parentSessionID: "parent-1",
startedAt: new Date(Date.now() - 31 * 60 * 1000),
})
const liveTask = createTask({
id: "task-live",
parentSessionID: "parent-2",
startedAt: new Date(),
})
getTaskMap(manager).set(staleTask.id, staleTask)
getTaskMap(manager).set(liveTask.id, liveTask)
manager.taskHistory.record("parent-1", { id: staleTask.id, agent: staleTask.agent, description: staleTask.description, status: staleTask.status })
manager.taskHistory.record("parent-2", { id: liveTask.id, agent: liveTask.agent, description: liveTask.description, status: liveTask.status })
// when
pruneStaleTasksAndNotificationsForTest(manager)
// then
expect(manager.taskHistory.getByParentSession("parent-1")).toHaveLength(1)
expect(manager.taskHistory.getByParentSession("parent-2")).toHaveLength(1)
})
})

View File

@@ -54,6 +54,10 @@ export class TaskHistory {
this.entries.delete(parentSessionID)
}
clearAll(): void {
this.entries.clear()
}
formatForCompaction(parentSessionID: string): string | null {
const list = this.getByParentSession(parentSessionID)
if (list.length === 0) return null

View File

@@ -117,13 +117,13 @@ describe("checkAndInterruptStaleTasks", () => {
})
it("should use DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS when messageStalenessTimeoutMs is not configured", async () => {
//#given — task started 15 minutes ago, no config for messageStalenessTimeoutMs
//#given — task started 35 minutes ago, no config for messageStalenessTimeoutMs
const task = createRunningTask({
startedAt: new Date(Date.now() - 15 * 60 * 1000),
startedAt: new Date(Date.now() - 35 * 60 * 1000),
progress: undefined,
})
//#when — default is 10 minutes (600_000ms)
//#when — default is 30 minutes (1_800_000ms)
await checkAndInterruptStaleTasks({
tasks: [task],
client: mockClient as never,

View File

@@ -2,6 +2,7 @@ import { join } from "path"
import { homedir } from "os"
import { getClaudeConfigDir } from "../../shared/claude-config-dir"
import { getOpenCodeConfigDir } from "../../shared/opencode-config-dir"
import { getOpenCodeSkillDirs } from "../../shared/opencode-command-dirs"
import type { CommandDefinition } from "../claude-code-command-loader/types"
import type { LoadedSkill } from "./types"
import { skillsToCommandDefinitionRecord } from "./skill-definition-record"
@@ -21,10 +22,11 @@ export async function loadProjectSkills(directory?: string): Promise<Record<stri
}
export async function loadOpencodeGlobalSkills(): Promise<Record<string, CommandDefinition>> {
const configDir = getOpenCodeConfigDir({ binary: "opencode" })
const opencodeSkillsDir = join(configDir, "skills")
const skills = await loadSkillsFromDir({ skillsDir: opencodeSkillsDir, scope: "opencode" })
return skillsToCommandDefinitionRecord(skills)
const skillDirs = getOpenCodeSkillDirs({ binary: "opencode" })
const allSkills = await Promise.all(
skillDirs.map(skillsDir => loadSkillsFromDir({ skillsDir, scope: "opencode" }))
)
return skillsToCommandDefinitionRecord(deduplicateSkillsByName(allSkills.flat()))
}
export async function loadOpencodeProjectSkills(directory?: string): Promise<Record<string, CommandDefinition>> {
@@ -107,9 +109,11 @@ export async function discoverProjectClaudeSkills(directory?: string): Promise<L
}
export async function discoverOpencodeGlobalSkills(): Promise<LoadedSkill[]> {
const configDir = getOpenCodeConfigDir({ binary: "opencode" })
const opencodeSkillsDir = join(configDir, "skills")
return loadSkillsFromDir({ skillsDir: opencodeSkillsDir, scope: "opencode" })
const skillDirs = getOpenCodeSkillDirs({ binary: "opencode" })
const allSkills = await Promise.all(
skillDirs.map(skillsDir => loadSkillsFromDir({ skillsDir, scope: "opencode" }))
)
return deduplicateSkillsByName(allSkills.flat())
}
export async function discoverOpencodeProjectSkills(directory?: string): Promise<LoadedSkill[]> {

View File

@@ -19,11 +19,13 @@ export function registerProcessCleanup(state: SkillMcpManagerState): void {
state.cleanupRegistered = true
const cleanup = async (): Promise<void> => {
state.shutdownGeneration++
for (const managed of state.clients.values()) {
await closeManagedClient(managed)
}
state.clients.clear()
state.pendingConnections.clear()
state.disconnectedSessions.clear()
}
// Note: Node's 'exit' event is synchronous-only, so we rely on signal handlers for async cleanup.
@@ -79,12 +81,23 @@ async function cleanupIdleClients(state: SkillMcpManagerState): Promise<void> {
}
}
if (state.clients.size === 0) {
if (state.clients.size === 0 && state.pendingConnections.size === 0) {
stopCleanupTimer(state)
unregisterProcessCleanup(state)
}
}
export async function disconnectSession(state: SkillMcpManagerState, sessionID: string): Promise<void> {
let hasPendingForSession = false
for (const key of state.pendingConnections.keys()) {
if (key.startsWith(`${sessionID}:`)) {
hasPendingForSession = true
break
}
}
if (hasPendingForSession) {
state.disconnectedSessions.set(sessionID, (state.disconnectedSessions.get(sessionID) ?? 0) + 1)
}
const keysToRemove: string[] = []
for (const [key, managed] of state.clients.entries()) {
@@ -96,22 +109,33 @@ export async function disconnectSession(state: SkillMcpManagerState, sessionID:
}
}
for (const key of state.pendingConnections.keys()) {
if (key.startsWith(`${sessionID}:`)) {
keysToRemove.push(key)
}
}
for (const key of keysToRemove) {
state.pendingConnections.delete(key)
}
if (state.clients.size === 0) {
if (state.clients.size === 0 && state.pendingConnections.size === 0) {
stopCleanupTimer(state)
unregisterProcessCleanup(state)
}
}
export async function disconnectAll(state: SkillMcpManagerState): Promise<void> {
state.shutdownGeneration++
state.disposed = true
stopCleanupTimer(state)
unregisterProcessCleanup(state)
const clients = Array.from(state.clients.values())
state.clients.clear()
state.pendingConnections.clear()
state.disconnectedSessions.clear()
state.inFlightConnections.clear()
state.authProviders.clear()
for (const managed of clients) {

View File

@@ -0,0 +1,291 @@
import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"
import type { ClaudeCodeMcpServer } from "../claude-code-mcp-loader/types"
import type { SkillMcpClientInfo, SkillMcpManagerState } from "./types"
type Deferred<TValue> = {
promise: Promise<TValue>
resolve: (value: TValue) => void
reject: (error: Error) => void
}
const pendingConnects: Deferred<void>[] = []
const trackedStates: SkillMcpManagerState[] = []
const createdClients: MockClient[] = []
const createdTransports: MockStdioClientTransport[] = []
class MockClient {
readonly close = mock(async () => {})
constructor(
_clientInfo: { name: string; version: string },
_options: { capabilities: Record<string, never> }
) {
createdClients.push(this)
}
async connect(_transport: MockStdioClientTransport): Promise<void> {
const pendingConnect = pendingConnects.shift()
if (pendingConnect) {
await pendingConnect.promise
}
}
}
class MockStdioClientTransport {
readonly close = mock(async () => {})
constructor(_options: { command: string; args?: string[]; env?: Record<string, string>; stderr?: string }) {
createdTransports.push(this)
}
}
mock.module("@modelcontextprotocol/sdk/client/index.js", () => ({
Client: MockClient,
}))
mock.module("@modelcontextprotocol/sdk/client/stdio.js", () => ({
StdioClientTransport: MockStdioClientTransport,
}))
const { disconnectAll, disconnectSession } = await import("./cleanup")
const { getOrCreateClient } = await import("./connection")
function createDeferred<TValue>(): Deferred<TValue> {
let resolvePromise: ((value: TValue) => void) | null = null
let rejectPromise: ((error: Error) => void) | null = null
const promise = new Promise<TValue>((resolve, reject) => {
resolvePromise = resolve
rejectPromise = reject
})
if (!resolvePromise || !rejectPromise) {
throw new Error("Failed to create deferred promise")
}
return {
promise,
resolve: resolvePromise,
reject: rejectPromise,
}
}
function createState(): SkillMcpManagerState {
const state: SkillMcpManagerState = {
clients: new Map(),
pendingConnections: new Map(),
disconnectedSessions: new Map(),
authProviders: new Map(),
cleanupRegistered: false,
cleanupInterval: null,
cleanupHandlers: [],
idleTimeoutMs: 5 * 60 * 1000,
shutdownGeneration: 0,
inFlightConnections: new Map(),
disposed: false,
}
trackedStates.push(state)
return state
}
function createClientInfo(sessionID: string): SkillMcpClientInfo {
return {
serverName: "race-server",
skillName: "race-skill",
sessionID,
}
}
function createClientKey(info: SkillMcpClientInfo): string {
return `${info.sessionID}:${info.skillName}:${info.serverName}`
}
const stdioConfig: ClaudeCodeMcpServer = {
command: "mock-mcp-server",
}
beforeEach(() => {
pendingConnects.length = 0
createdClients.length = 0
createdTransports.length = 0
})
afterEach(async () => {
for (const state of trackedStates) {
await disconnectAll(state)
}
trackedStates.length = 0
pendingConnects.length = 0
createdClients.length = 0
createdTransports.length = 0
})
describe("getOrCreateClient disconnect race", () => {
it("#given pending connection for session A #when disconnectSession(A) is called before connection completes #then completed client is not added to state.clients", async () => {
const state = createState()
const info = createClientInfo("session-a")
const clientKey = createClientKey(info)
const pendingConnect = createDeferred<void>()
pendingConnects.push(pendingConnect)
const clientPromise = getOrCreateClient({ state, clientKey, info, config: stdioConfig })
expect(state.pendingConnections.has(clientKey)).toBe(true)
await disconnectSession(state, info.sessionID)
pendingConnect.resolve(undefined)
await expect(clientPromise).rejects.toThrow(/disconnected during MCP connection setup/)
expect(state.clients.has(clientKey)).toBe(false)
expect(state.pendingConnections.has(clientKey)).toBe(false)
expect(state.disconnectedSessions.has(info.sessionID)).toBe(false)
expect(createdClients).toHaveLength(1)
expect(createdClients[0]?.close).toHaveBeenCalledTimes(1)
expect(createdTransports[0]?.close).toHaveBeenCalledTimes(1)
})
it("#given session A in disconnectedSessions #when new connection completes with no remaining pending #then disconnectedSessions entry is cleaned up", async () => {
const state = createState()
const info = createClientInfo("session-a")
const clientKey = createClientKey(info)
state.disconnectedSessions.set(info.sessionID, 1)
const client = await getOrCreateClient({ state, clientKey, info, config: stdioConfig })
expect(state.disconnectedSessions.has(info.sessionID)).toBe(false)
expect(state.clients.get(clientKey)?.client).toBe(client)
expect(createdClients[0]?.close).not.toHaveBeenCalled()
})
it("#given no pending connections #when disconnectSession is called #then no errors occur and session is not added to disconnectedSessions", async () => {
const state = createState()
await expect(disconnectSession(state, "session-a")).resolves.toBeUndefined()
expect(state.disconnectedSessions.has("session-a")).toBe(false)
expect(state.pendingConnections.size).toBe(0)
expect(state.clients.size).toBe(0)
})
})
describe("getOrCreateClient disconnectAll race", () => {
it("#given pending connection #when disconnectAll() is called before connection completes #then client is not added to state.clients", async () => {
const state = createState()
const info = createClientInfo("session-a")
const clientKey = createClientKey(info)
const pendingConnect = createDeferred<void>()
pendingConnects.push(pendingConnect)
const clientPromise = getOrCreateClient({ state, clientKey, info, config: stdioConfig })
expect(state.pendingConnections.has(clientKey)).toBe(true)
await disconnectAll(state)
pendingConnect.resolve(undefined)
await expect(clientPromise).rejects.toThrow(/connection completed after shutdown/)
expect(state.clients.has(clientKey)).toBe(false)
})
it("#given state after disconnectAll() completed #when getOrCreateClient() is called #then it throws shut down error and registers nothing", async () => {
const state = createState()
const info = createClientInfo("session-b")
const clientKey = createClientKey(info)
await disconnectAll(state)
await expect(getOrCreateClient({ state, clientKey, info, config: stdioConfig })).rejects.toThrow(/has been shut down/)
expect(state.clients.size).toBe(0)
expect(state.pendingConnections.size).toBe(0)
expect(state.inFlightConnections.size).toBe(0)
expect(state.disposed).toBe(true)
expect(createdClients).toHaveLength(0)
expect(createdTransports).toHaveLength(0)
})
})
describe("getOrCreateClient multi-key disconnect race", () => {
it("#given 2 pending connections for session A #when disconnectSession(A) before both complete #then both old connections are rejected", async () => {
const state = createState()
const infoKey1 = createClientInfo("session-a")
const infoKey2 = { ...createClientInfo("session-a"), serverName: "server-2" }
const clientKey1 = createClientKey(infoKey1)
const clientKey2 = `${infoKey2.sessionID}:${infoKey2.skillName}:${infoKey2.serverName}`
const pendingConnect1 = createDeferred<void>()
const pendingConnect2 = createDeferred<void>()
pendingConnects.push(pendingConnect1)
pendingConnects.push(pendingConnect2)
const promise1 = getOrCreateClient({ state, clientKey: clientKey1, info: infoKey1, config: stdioConfig })
const promise2 = getOrCreateClient({ state, clientKey: clientKey2, info: infoKey2, config: stdioConfig })
expect(state.pendingConnections.size).toBe(2)
await disconnectSession(state, "session-a")
pendingConnect1.resolve(undefined)
await expect(promise1).rejects.toThrow(/disconnected during MCP connection setup/)
pendingConnect2.resolve(undefined)
await expect(promise2).rejects.toThrow(/disconnected during MCP connection setup/)
expect(state.clients.has(clientKey1)).toBe(false)
expect(state.clients.has(clientKey2)).toBe(false)
expect(state.disconnectedSessions.has("session-a")).toBe(false)
})
it("#given a superseded pending connection #when the old connection completes #then the stale client is removed from state.clients", async () => {
const state = createState()
const info = createClientInfo("session-a")
const clientKey = createClientKey(info)
const pendingConnect = createDeferred<void>()
const supersedingConnection = createDeferred<Awaited<ReturnType<typeof getOrCreateClient>>>()
pendingConnects.push(pendingConnect)
const clientPromise = getOrCreateClient({ state, clientKey, info, config: stdioConfig })
state.pendingConnections.set(clientKey, supersedingConnection.promise)
pendingConnect.resolve(undefined)
await expect(clientPromise).rejects.toThrow(/superseded by a newer connection attempt/)
expect(state.clients.has(clientKey)).toBe(false)
expect(createdClients[0]?.close).toHaveBeenCalledTimes(1)
})
it("#given a superseded pending connection #when a newer client already replaced the map entry #then the stale cleanup does not delete the newer client", async () => {
const state = createState()
const info = createClientInfo("session-a")
const clientKey = createClientKey(info)
const pendingConnect = createDeferred<void>()
const supersedingConnection = createDeferred<Awaited<ReturnType<typeof getOrCreateClient>>>()
pendingConnects.push(pendingConnect)
const newerClient = new MockClient(
{ name: "newer-client", version: "1.0.0" },
{ capabilities: {} },
)
const newerTransport = new MockStdioClientTransport({ command: "mock-mcp-server" })
let replacedEntry = false
const originalSet = state.clients.set.bind(state.clients)
Reflect.set(state.clients, "set", (key: string, value: SkillMcpManagerState["clients"] extends Map<string, infer TValue> ? TValue : never) => {
originalSet(key, value)
if (!replacedEntry && key === clientKey) {
replacedEntry = true
originalSet(key, {
client: newerClient as never,
transport: newerTransport as never,
skillName: info.skillName,
lastUsedAt: Date.now(),
connectionType: "stdio",
})
}
return state.clients
})
const clientPromise = getOrCreateClient({ state, clientKey, info, config: stdioConfig })
state.pendingConnections.set(clientKey, supersedingConnection.promise)
pendingConnect.resolve(undefined)
await expect(clientPromise).rejects.toThrow(/superseded by a newer connection attempt/)
expect(state.clients.get(clientKey)?.client.close).toBe(newerClient.close)
expect(newerClient.close).not.toHaveBeenCalled()
})
})

View File

@@ -7,6 +7,13 @@ import { createHttpClient } from "./http-client"
import { createStdioClient } from "./stdio-client"
import type { SkillMcpClientConnectionParams, SkillMcpClientInfo, SkillMcpManagerState } from "./types"
function removeClientIfCurrent(state: SkillMcpManagerState, clientKey: string, client: Client): void {
const managed = state.clients.get(clientKey)
if (managed?.client === client) {
state.clients.delete(clientKey)
}
}
export async function getOrCreateClient(params: {
state: SkillMcpManagerState
clientKey: string
@@ -15,6 +22,10 @@ export async function getOrCreateClient(params: {
}): Promise<Client> {
const { state, clientKey, info, config } = params
if (state.disposed) {
throw new Error(`MCP manager for "${info.sessionID}" has been shut down, cannot create new connections.`)
}
const existing = state.clients.get(clientKey)
if (existing) {
existing.lastUsedAt = Date.now()
@@ -28,14 +39,52 @@ export async function getOrCreateClient(params: {
}
const expandedConfig = expandEnvVarsInObject(config)
const connectionPromise = createClient({ state, clientKey, info, config: expandedConfig })
state.pendingConnections.set(clientKey, connectionPromise)
let currentConnectionPromise!: Promise<Client>
state.inFlightConnections.set(info.sessionID, (state.inFlightConnections.get(info.sessionID) ?? 0) + 1)
currentConnectionPromise = (async () => {
const disconnectGenAtStart = state.disconnectedSessions.get(info.sessionID) ?? 0
const shutdownGenAtStart = state.shutdownGeneration
const client = await createClient({ state, clientKey, info, config: expandedConfig })
const isStale = state.pendingConnections.has(clientKey) && state.pendingConnections.get(clientKey) !== currentConnectionPromise
if (isStale) {
removeClientIfCurrent(state, clientKey, client)
try { await client.close() } catch {}
throw new Error(`Connection for "${info.sessionID}" was superseded by a newer connection attempt.`)
}
if (state.shutdownGeneration !== shutdownGenAtStart) {
removeClientIfCurrent(state, clientKey, client)
try { await client.close() } catch {}
throw new Error(`Shutdown occurred during MCP connection for "${info.sessionID}"`)
}
const currentDisconnectGen = state.disconnectedSessions.get(info.sessionID) ?? 0
if (currentDisconnectGen > disconnectGenAtStart) {
await forceReconnect(state, clientKey)
throw new Error(`Session "${info.sessionID}" disconnected during MCP connection setup.`)
}
return client
})()
state.pendingConnections.set(clientKey, currentConnectionPromise)
try {
const client = await connectionPromise
const client = await currentConnectionPromise
return client
} finally {
state.pendingConnections.delete(clientKey)
if (state.pendingConnections.get(clientKey) === currentConnectionPromise) {
state.pendingConnections.delete(clientKey)
}
const remaining = (state.inFlightConnections.get(info.sessionID) ?? 1) - 1
if (remaining <= 0) {
state.inFlightConnections.delete(info.sessionID)
state.disconnectedSessions.delete(info.sessionID)
} else {
state.inFlightConnections.set(info.sessionID, remaining)
}
}
}

View File

@@ -0,0 +1,133 @@
import { Client } from "@modelcontextprotocol/sdk/client/index.js"
import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js"
import { afterEach, describe, expect, it } from "bun:test"
import { disconnectSession, registerProcessCleanup, unregisterProcessCleanup } from "./cleanup"
import type { ManagedClient, SkillMcpManagerState } from "./types"
const trackedStates: SkillMcpManagerState[] = []
afterEach(() => {
for (const state of trackedStates) {
unregisterProcessCleanup(state)
}
trackedStates.length = 0
})
const expectedCleanupHandlerCount = process.platform === "win32" ? 3 : 2
function createState(): SkillMcpManagerState {
const state: SkillMcpManagerState = {
clients: new Map(),
pendingConnections: new Map(),
disconnectedSessions: new Map(),
authProviders: new Map(),
cleanupRegistered: false,
cleanupInterval: null,
cleanupHandlers: [],
idleTimeoutMs: 5 * 60 * 1000,
shutdownGeneration: 0,
inFlightConnections: new Map(),
disposed: false,
}
trackedStates.push(state)
return state
}
function createManagedClient(skillName: string): ManagedClient {
return {
client: new Client(
{ name: `test-${skillName}`, version: "1.0.0" },
{ capabilities: {} }
),
transport: new StreamableHTTPClientTransport(new URL("https://example.com/mcp")),
skillName,
lastUsedAt: Date.now(),
connectionType: "http",
}
}
describe("disconnectSession cleanup registration", () => {
it("#given state with 1 client and cleanup registered #when disconnectSession removes last client #then process cleanup handlers are unregistered", async () => {
// given
const state = createState()
const signalIntCountBeforeRegister = process.listenerCount("SIGINT")
const signalTermCountBeforeRegister = process.listenerCount("SIGTERM")
state.clients.set("session-1:skill-1:server-1", createManagedClient("skill-1"))
registerProcessCleanup(state)
// when
await disconnectSession(state, "session-1")
// then
expect(state.cleanupRegistered).toBe(false)
expect(state.cleanupHandlers).toEqual([])
expect(process.listenerCount("SIGINT")).toBe(signalIntCountBeforeRegister)
expect(process.listenerCount("SIGTERM")).toBe(signalTermCountBeforeRegister)
})
it("#given state with 2 clients in different sessions #when disconnectSession removes one session #then process cleanup handlers remain registered", async () => {
// given
const state = createState()
const signalIntCountBeforeRegister = process.listenerCount("SIGINT")
const signalTermCountBeforeRegister = process.listenerCount("SIGTERM")
state.clients.set("session-1:skill-1:server-1", createManagedClient("skill-1"))
state.clients.set("session-2:skill-2:server-2", createManagedClient("skill-2"))
registerProcessCleanup(state)
// when
await disconnectSession(state, "session-1")
// then
expect(state.clients.has("session-2:skill-2:server-2")).toBe(true)
expect(state.cleanupRegistered).toBe(true)
expect(state.cleanupHandlers).toHaveLength(expectedCleanupHandlerCount)
expect(process.listenerCount("SIGINT")).toBe(signalIntCountBeforeRegister + 1)
expect(process.listenerCount("SIGTERM")).toBe(signalTermCountBeforeRegister + 1)
})
it("#given state with 2 clients in different sessions #when both sessions disconnected #then process cleanup handlers are unregistered", async () => {
// given
const state = createState()
const signalIntCountBeforeRegister = process.listenerCount("SIGINT")
const signalTermCountBeforeRegister = process.listenerCount("SIGTERM")
state.clients.set("session-1:skill-1:server-1", createManagedClient("skill-1"))
state.clients.set("session-2:skill-2:server-2", createManagedClient("skill-2"))
registerProcessCleanup(state)
// when
await disconnectSession(state, "session-1")
await disconnectSession(state, "session-2")
// then
expect(state.clients.size).toBe(0)
expect(state.cleanupRegistered).toBe(false)
expect(state.cleanupHandlers).toEqual([])
expect(process.listenerCount("SIGINT")).toBe(signalIntCountBeforeRegister)
expect(process.listenerCount("SIGTERM")).toBe(signalTermCountBeforeRegister)
})
it("#given state with 1 client and pending connection for different session and cleanup registered #when disconnectSession removes last client but pendingConnections remain #then process cleanup handlers stay registered", async () => {
const state = createState()
const signalIntCountBeforeRegister = process.listenerCount("SIGINT")
const signalTermCountBeforeRegister = process.listenerCount("SIGTERM")
const pendingClient = createManagedClient("skill-pending").client
state.clients.set("session-1:skill-1:server-1", createManagedClient("skill-1"))
state.pendingConnections.set("session-2:skill-2:server-2", Promise.resolve(pendingClient))
registerProcessCleanup(state)
await disconnectSession(state, "session-1")
expect(state.clients.size).toBe(0)
expect(state.pendingConnections.size).toBe(1)
expect(state.cleanupRegistered).toBe(true)
expect(state.cleanupHandlers).toHaveLength(expectedCleanupHandlerCount)
expect(process.listenerCount("SIGINT")).toBe(signalIntCountBeforeRegister + 1)
expect(process.listenerCount("SIGTERM")).toBe(signalTermCountBeforeRegister + 1)
})
})

View File

@@ -24,6 +24,7 @@ function redactUrl(urlStr: string): string {
export async function createHttpClient(params: SkillMcpClientConnectionParams): Promise<Client> {
const { state, clientKey, info, config } = params
const shutdownGenAtStart = state.shutdownGeneration
if (!config.url) {
throw new Error(`MCP server "${info.serverName}" is configured for HTTP but missing 'url' field.`)
@@ -72,6 +73,12 @@ export async function createHttpClient(params: SkillMcpClientConnectionParams):
)
}
if (state.shutdownGeneration !== shutdownGenAtStart) {
try { await client.close() } catch {}
try { await transport.close() } catch {}
throw new Error(`MCP server "${info.serverName}" connection completed after shutdown`)
}
const managedClient = {
client,
transport,

View File

@@ -10,11 +10,15 @@ export class SkillMcpManager {
private readonly state: SkillMcpManagerState = {
clients: new Map(),
pendingConnections: new Map(),
disconnectedSessions: new Map(),
authProviders: new Map(),
cleanupRegistered: false,
cleanupInterval: null,
cleanupHandlers: [],
idleTimeoutMs: 5 * 60 * 1000,
shutdownGeneration: 0,
inFlightConnections: new Map(),
disposed: false,
}
private getClientKey(info: SkillMcpClientInfo): string {

View File

@@ -14,6 +14,7 @@ function getStdioCommand(config: ClaudeCodeMcpServer, serverName: string): strin
export async function createStdioClient(params: SkillMcpClientConnectionParams): Promise<Client> {
const { state, clientKey, info, config } = params
const shutdownGenAtStart = state.shutdownGeneration
const command = getStdioCommand(config, info.serverName)
const args = config.args ?? []
@@ -55,6 +56,12 @@ export async function createStdioClient(params: SkillMcpClientConnectionParams):
)
}
if (state.shutdownGeneration !== shutdownGenAtStart) {
try { await client.close() } catch {}
try { await transport.close() } catch {}
throw new Error(`MCP server "${info.serverName}" connection completed after shutdown`)
}
const managedClient = {
client,
transport,

View File

@@ -51,11 +51,15 @@ export interface ProcessCleanupHandler {
export interface SkillMcpManagerState {
clients: Map<string, ManagedClient>
pendingConnections: Map<string, Promise<Client>>
disconnectedSessions: Map<string, number>
authProviders: Map<string, McpOAuthProvider>
cleanupRegistered: boolean
cleanupInterval: ReturnType<typeof setInterval> | null
cleanupHandlers: ProcessCleanupHandler[]
idleTimeoutMs: number
shutdownGeneration: number
inFlightConnections: Map<string, number>
disposed: boolean
}
export interface SkillMcpClientConnectionParams {

View File

@@ -1,6 +1,6 @@
import type { PluginInput } from "@opencode-ai/plugin"
import type { TmuxConfig } from "../../config/schema"
import type { TrackedSession, CapacityConfig } from "./types"
import type { TrackedSession, CapacityConfig, WindowState } from "./types"
import { log, normalizeSDKResponse } from "../../shared"
import {
isInsideTmux as defaultIsInsideTmux,
@@ -13,6 +13,7 @@ import { queryWindowState } from "./pane-state-querier"
import { decideSpawnActions, decideCloseAction, type SessionMapping } from "./decision-engine"
import { executeActions, executeAction } from "./action-executor"
import { TmuxPollingManager } from "./polling-manager"
import { createTrackedSession, markTrackedSessionClosePending } from "./tracked-session-state"
type OpencodeClient = PluginInput["client"]
interface SessionCreatedEvent {
@@ -38,6 +39,7 @@ const defaultTmuxDeps: TmuxUtilDeps = {
const DEFERRED_SESSION_TTL_MS = 5 * 60 * 1000
const MAX_DEFERRED_QUEUE_SIZE = 20
const MAX_CLOSE_RETRY_COUNT = 3
/**
* State-first Tmux Session Manager
@@ -71,7 +73,11 @@ export class TmuxSessionManager {
this.tmuxConfig = tmuxConfig
this.deps = deps
const defaultPort = process.env.OPENCODE_PORT ?? "4096"
this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
try {
this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
} catch {
this.serverUrl = `http://localhost:${defaultPort}`
}
this.sourcePaneId = deps.getCurrentPaneId()
this.pollingManager = new TmuxPollingManager(
this.client,
@@ -106,6 +112,123 @@ export class TmuxSessionManager {
}))
}
private removeTrackedSession(sessionId: string): void {
this.sessions.delete(sessionId)
if (this.sessions.size === 0) {
this.pollingManager.stopPolling()
}
}
private markSessionClosePending(sessionId: string): void {
const tracked = this.sessions.get(sessionId)
if (!tracked) return
this.sessions.set(sessionId, markTrackedSessionClosePending(tracked))
log("[tmux-session-manager] marked session close pending", {
sessionId,
paneId: tracked.paneId,
closeRetryCount: tracked.closeRetryCount,
})
}
private async queryWindowStateSafely(): Promise<WindowState | null> {
if (!this.sourcePaneId) return null
try {
return await queryWindowState(this.sourcePaneId)
} catch (error) {
log("[tmux-session-manager] failed to query window state for close", {
error: String(error),
})
return null
}
}
private async tryCloseTrackedSession(tracked: TrackedSession): Promise<boolean> {
const state = await this.queryWindowStateSafely()
if (!state) return false
try {
const result = await executeAction(
{ type: "close", paneId: tracked.paneId, sessionId: tracked.sessionId },
{
config: this.tmuxConfig,
serverUrl: this.serverUrl,
windowState: state,
sourcePaneId: this.sourcePaneId,
}
)
return result.success
} catch (error) {
log("[tmux-session-manager] close session pane failed", {
sessionId: tracked.sessionId,
paneId: tracked.paneId,
error: String(error),
})
return false
}
}
private async retryPendingCloses(): Promise<void> {
const pendingSessions = Array.from(this.sessions.values()).filter(
(tracked) => tracked.closePending,
)
for (const tracked of pendingSessions) {
if (!this.sessions.has(tracked.sessionId)) continue
if (tracked.closeRetryCount >= MAX_CLOSE_RETRY_COUNT) {
log("[tmux-session-manager] force removing close-pending session after max retries", {
sessionId: tracked.sessionId,
paneId: tracked.paneId,
closeRetryCount: tracked.closeRetryCount,
})
this.removeTrackedSession(tracked.sessionId)
continue
}
const closed = await this.tryCloseTrackedSession(tracked)
if (closed) {
log("[tmux-session-manager] retried close succeeded", {
sessionId: tracked.sessionId,
paneId: tracked.paneId,
closeRetryCount: tracked.closeRetryCount,
})
this.removeTrackedSession(tracked.sessionId)
continue
}
const currentTracked = this.sessions.get(tracked.sessionId)
if (!currentTracked || !currentTracked.closePending) {
continue
}
const nextRetryCount = currentTracked.closeRetryCount + 1
if (nextRetryCount >= MAX_CLOSE_RETRY_COUNT) {
log("[tmux-session-manager] force removing close-pending session after failed retry", {
sessionId: currentTracked.sessionId,
paneId: currentTracked.paneId,
closeRetryCount: nextRetryCount,
})
this.removeTrackedSession(currentTracked.sessionId)
continue
}
this.sessions.set(currentTracked.sessionId, {
...currentTracked,
closePending: true,
closeRetryCount: nextRetryCount,
})
log("[tmux-session-manager] retried close failed", {
sessionId: currentTracked.sessionId,
paneId: currentTracked.paneId,
closeRetryCount: nextRetryCount,
})
}
}
private enqueueDeferredSession(sessionId: string, title: string): void {
if (this.deferredSessions.has(sessionId)) return
if (this.deferredQueue.length >= MAX_DEFERRED_QUEUE_SIZE) {
@@ -257,14 +380,14 @@ export class TmuxSessionManager {
})
}
const now = Date.now()
this.sessions.set(sessionId, {
this.sessions.set(
sessionId,
paneId: result.spawnedPaneId,
description: deferred.title,
createdAt: new Date(now),
lastSeenAt: new Date(now),
})
createTrackedSession({
sessionId,
paneId: result.spawnedPaneId,
description: deferred.title,
}),
)
this.removeDeferredSession(sessionId)
this.pollingManager.startPolling()
log("[tmux-session-manager] deferred session attached", {
@@ -324,6 +447,13 @@ export class TmuxSessionManager {
const sessionId = info.id
const title = info.title ?? "Subagent"
if (!this.sourcePaneId) {
log("[tmux-session-manager] no source pane id")
return
}
await this.retryPendingCloses()
if (
this.sessions.has(sessionId) ||
this.pendingSessions.has(sessionId) ||
@@ -332,11 +462,6 @@ export class TmuxSessionManager {
log("[tmux-session-manager] session already tracked or pending", { sessionId })
return
}
if (!this.sourcePaneId) {
log("[tmux-session-manager] no source pane id")
return
}
const sourcePaneId = this.sourcePaneId
this.pendingSessions.add(sessionId)
@@ -418,14 +543,14 @@ export class TmuxSessionManager {
})
}
const now = Date.now()
this.sessions.set(sessionId, {
this.sessions.set(
sessionId,
paneId: result.spawnedPaneId,
description: title,
createdAt: new Date(now),
lastSeenAt: new Date(now),
})
createTrackedSession({
sessionId,
paneId: result.spawnedPaneId,
description: title,
}),
)
log("[tmux-session-manager] pane spawned and tracked", {
sessionId,
paneId: result.spawnedPaneId,
@@ -485,27 +610,40 @@ export class TmuxSessionManager {
log("[tmux-session-manager] onSessionDeleted", { sessionId: event.sessionID })
const state = await queryWindowState(this.sourcePaneId)
const state = await this.queryWindowStateSafely()
if (!state) {
this.sessions.delete(event.sessionID)
this.markSessionClosePending(event.sessionID)
return
}
const closeAction = decideCloseAction(state, event.sessionID, this.getSessionMappings())
if (closeAction) {
await executeAction(closeAction, {
if (!closeAction) {
this.removeTrackedSession(event.sessionID)
return
}
try {
const result = await executeAction(closeAction, {
config: this.tmuxConfig,
serverUrl: this.serverUrl,
windowState: state,
sourcePaneId: this.sourcePaneId,
})
if (!result.success) {
this.markSessionClosePending(event.sessionID)
return
}
} catch (error) {
log("[tmux-session-manager] failed to close pane for deleted session", {
sessionId: event.sessionID,
error: String(error),
})
this.markSessionClosePending(event.sessionID)
return
}
this.sessions.delete(event.sessionID)
if (this.sessions.size === 0) {
this.pollingManager.stopPolling()
}
this.removeTrackedSession(event.sessionID)
}
@@ -513,29 +651,28 @@ export class TmuxSessionManager {
const tracked = this.sessions.get(sessionId)
if (!tracked) return
if (tracked.closePending && tracked.closeRetryCount >= MAX_CLOSE_RETRY_COUNT) {
log("[tmux-session-manager] force removing close-pending session after max retries", {
sessionId,
paneId: tracked.paneId,
closeRetryCount: tracked.closeRetryCount,
})
this.removeTrackedSession(sessionId)
return
}
log("[tmux-session-manager] closing session pane", {
sessionId,
paneId: tracked.paneId,
})
const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
if (state) {
await executeAction(
{ type: "close", paneId: tracked.paneId, sessionId },
{
config: this.tmuxConfig,
serverUrl: this.serverUrl,
windowState: state,
sourcePaneId: this.sourcePaneId,
}
)
const closed = await this.tryCloseTrackedSession(tracked)
if (!closed) {
this.markSessionClosePending(sessionId)
return
}
this.sessions.delete(sessionId)
if (this.sessions.size === 0) {
this.pollingManager.stopPolling()
}
this.removeTrackedSession(sessionId)
}
createEventHandler(): (input: { event: { type: string; properties?: unknown } }) => Promise<void> {
@@ -552,30 +689,22 @@ export class TmuxSessionManager {
if (this.sessions.size > 0) {
log("[tmux-session-manager] closing all panes", { count: this.sessions.size })
const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
if (state) {
const closePromises = Array.from(this.sessions.values()).map((s) =>
executeAction(
{ type: "close", paneId: s.paneId, sessionId: s.sessionId },
{
config: this.tmuxConfig,
serverUrl: this.serverUrl,
windowState: state,
sourcePaneId: this.sourcePaneId,
}
).catch((err) =>
log("[tmux-session-manager] cleanup error for pane", {
paneId: s.paneId,
error: String(err),
}),
),
)
await Promise.all(closePromises)
const sessionIds = Array.from(this.sessions.keys())
for (const sessionId of sessionIds) {
try {
await this.closeSessionById(sessionId)
} catch (error) {
log("[tmux-session-manager] cleanup error for pane", {
sessionId,
error: String(error),
})
}
}
this.sessions.clear()
}
await this.retryPendingCloses()
log("[tmux-session-manager] cleanup complete")
}
}

View File

@@ -12,6 +12,8 @@ describe("TmuxPollingManager overlap", () => {
description: "test",
createdAt: new Date(),
lastSeenAt: new Date(),
closePending: false,
closeRetryCount: 0,
})
let activeCalls = 0

View File

@@ -6,6 +6,7 @@ import { queryWindowState } from "./pane-state-querier"
import { decideSpawnActions, type SessionMapping } from "./decision-engine"
import { executeActions } from "./action-executor"
import type { SessionCreatedEvent } from "./session-created-event"
import { createTrackedSession } from "./tracked-session-state"
type OpencodeClient = PluginInput["client"]
@@ -152,14 +153,14 @@ export async function handleSessionCreated(
return
}
const now = Date.now()
deps.sessions.set(sessionId, {
deps.sessions.set(
sessionId,
paneId: result.spawnedPaneId,
description: title,
createdAt: new Date(now),
lastSeenAt: new Date(now),
})
createTrackedSession({
sessionId,
paneId: result.spawnedPaneId,
description: title,
}),
)
log("[tmux-session-manager] pane spawned and tracked", {
sessionId,

View File

@@ -0,0 +1,28 @@
import type { TrackedSession } from "./types"
export function createTrackedSession(params: {
sessionId: string
paneId: string
description: string
now?: Date
}): TrackedSession {
const now = params.now ?? new Date()
return {
sessionId: params.sessionId,
paneId: params.paneId,
description: params.description,
createdAt: now,
lastSeenAt: now,
closePending: false,
closeRetryCount: 0,
}
}
export function markTrackedSessionClosePending(tracked: TrackedSession): TrackedSession {
return {
...tracked,
closePending: true,
closeRetryCount: tracked.closePending ? tracked.closeRetryCount + 1 : tracked.closeRetryCount,
}
}

View File

@@ -4,6 +4,8 @@ export interface TrackedSession {
description: string
createdAt: Date
lastSeenAt: Date
closePending: boolean
closeRetryCount: number
// Stability detection fields (prevents premature closure)
lastMessageCount?: number
stableIdlePolls?: number

View File

@@ -0,0 +1,271 @@
import { beforeEach, describe, expect, mock, test } from "bun:test"
import type { TmuxConfig } from "../../config/schema"
import type { ActionResult, ExecuteContext, ExecuteActionsResult } from "./action-executor"
import type { TmuxUtilDeps } from "./manager"
import type { TrackedSession, WindowState } from "./types"
const mockQueryWindowState = mock<(paneId: string) => Promise<WindowState | null>>(async () => ({
windowWidth: 220,
windowHeight: 44,
mainPane: { paneId: "%0", width: 110, height: 44, left: 0, top: 0, title: "main", isActive: true },
agentPanes: [],
}))
const mockExecuteAction = mock<(
action: { type: string },
ctx: ExecuteContext,
) => Promise<ActionResult>>(async () => ({ success: true }))
const mockExecuteActions = mock<(
actions: unknown[],
ctx: ExecuteContext,
) => Promise<ExecuteActionsResult>>(async () => ({
success: true,
spawnedPaneId: "%1",
results: [],
}))
const mockIsInsideTmux = mock<() => boolean>(() => true)
const mockGetCurrentPaneId = mock<() => string | undefined>(() => "%0")
mock.module("./pane-state-querier", () => ({
queryWindowState: mockQueryWindowState,
}))
mock.module("./action-executor", () => ({
executeAction: mockExecuteAction,
executeActions: mockExecuteActions,
}))
mock.module("../../shared/tmux", () => ({
isInsideTmux: mockIsInsideTmux,
getCurrentPaneId: mockGetCurrentPaneId,
POLL_INTERVAL_BACKGROUND_MS: 10,
SESSION_READY_POLL_INTERVAL_MS: 10,
SESSION_READY_TIMEOUT_MS: 50,
SESSION_MISSING_GRACE_MS: 1_000,
}))
const mockTmuxDeps: TmuxUtilDeps = {
isInsideTmux: mockIsInsideTmux,
getCurrentPaneId: mockGetCurrentPaneId,
}
function createConfig(): TmuxConfig {
return {
enabled: true,
layout: "main-vertical",
main_pane_size: 60,
main_pane_min_width: 80,
agent_pane_min_width: 40,
}
}
function createContext() {
const shell = Object.assign(
() => {
throw new Error("shell should not be called in this test")
},
{
braces: () => [],
escape: (input: string) => input,
env() {
return shell
},
cwd() {
return shell
},
nothrow() {
return shell
},
throws() {
return shell
},
},
)
return {
project: {
id: "project-id",
worktree: "/tmp/omo-fix-memory-leaks",
time: { created: Date.now() },
},
directory: "/tmp/omo-fix-memory-leaks",
worktree: "/tmp/omo-fix-memory-leaks",
serverUrl: new URL("http://localhost:4096"),
$: shell,
client: {
session: {
status: mock(async () => ({ data: {} })),
messages: mock(async () => ({ data: [] })),
},
},
}
}
function createTrackedSession(overrides?: Partial<TrackedSession>): TrackedSession {
return {
sessionId: "ses_pending",
paneId: "%1",
description: "Pending pane",
createdAt: new Date(),
lastSeenAt: new Date(),
closePending: false,
closeRetryCount: 0,
...overrides,
}
}
function getTrackedSessions(target: object): Map<string, TrackedSession> {
const sessions = Reflect.get(target, "sessions")
if (!(sessions instanceof Map)) {
throw new Error("Expected sessions map")
}
return sessions
}
function getRetryPendingCloses(target: object): () => Promise<void> {
const retryPendingCloses = Reflect.get(target, "retryPendingCloses")
if (typeof retryPendingCloses !== "function") {
throw new Error("Expected retryPendingCloses method")
}
return retryPendingCloses.bind(target)
}
function getCloseSessionById(target: object): (sessionId: string) => Promise<void> {
const closeSessionById = Reflect.get(target, "closeSessionById")
if (typeof closeSessionById !== "function") {
throw new Error("Expected closeSessionById method")
}
return closeSessionById.bind(target)
}
function createManager(
TmuxSessionManager: typeof import("./manager").TmuxSessionManager,
): import("./manager").TmuxSessionManager {
return Reflect.construct(TmuxSessionManager, [createContext(), createConfig(), mockTmuxDeps])
}
describe("TmuxSessionManager zombie pane handling", () => {
beforeEach(() => {
mockQueryWindowState.mockClear()
mockExecuteAction.mockClear()
mockExecuteActions.mockClear()
mockIsInsideTmux.mockClear()
mockGetCurrentPaneId.mockClear()
mockQueryWindowState.mockImplementation(async () => ({
windowWidth: 220,
windowHeight: 44,
mainPane: { paneId: "%0", width: 110, height: 44, left: 0, top: 0, title: "main", isActive: true },
agentPanes: [],
}))
mockExecuteAction.mockImplementation(async () => ({ success: true }))
mockExecuteActions.mockImplementation(async () => ({
success: true,
spawnedPaneId: "%1",
results: [],
}))
mockIsInsideTmux.mockReturnValue(true)
mockGetCurrentPaneId.mockReturnValue("%0")
})
test("#given session in sessions Map #when onSessionDeleted called with null window state #then session stays in Map with closePending true", async () => {
// given
mockQueryWindowState.mockImplementation(async () => null)
const { TmuxSessionManager } = await import("./manager")
const manager = createManager(TmuxSessionManager)
const sessions = getTrackedSessions(manager)
sessions.set("ses_pending", createTrackedSession())
// when
await manager.onSessionDeleted({ sessionID: "ses_pending" })
// then
const tracked = sessions.get("ses_pending")
expect(tracked).toBeDefined()
expect(tracked?.closePending).toBe(true)
expect(tracked?.closeRetryCount).toBe(0)
expect(mockExecuteAction).not.toHaveBeenCalled()
})
test("#given session with closePending true #when retryPendingCloses succeeds #then session is removed from Map", async () => {
// given
const { TmuxSessionManager } = await import("./manager")
const manager = createManager(TmuxSessionManager)
const sessions = getTrackedSessions(manager)
sessions.set(
"ses_pending",
createTrackedSession({ closePending: true, closeRetryCount: 0 }),
)
// when
await getRetryPendingCloses(manager)()
// then
expect(sessions.has("ses_pending")).toBe(false)
expect(mockExecuteAction).toHaveBeenCalledTimes(1)
})
test("#given session with closePending true and closeRetryCount >= 3 #when retryPendingCloses called #then session is force-removed from Map", async () => {
// given
const { TmuxSessionManager } = await import("./manager")
const manager = createManager(TmuxSessionManager)
const sessions = getTrackedSessions(manager)
sessions.set(
"ses_pending",
createTrackedSession({ closePending: true, closeRetryCount: 3 }),
)
// when
await getRetryPendingCloses(manager)()
// then
expect(sessions.has("ses_pending")).toBe(false)
expect(mockQueryWindowState).not.toHaveBeenCalled()
expect(mockExecuteAction).not.toHaveBeenCalled()
})
test("#given session with closePending true and closeRetryCount >= 3 #when closeSessionById called #then session is force-removed without retrying close", async () => {
// given
const { TmuxSessionManager } = await import("./manager")
const manager = createManager(TmuxSessionManager)
const sessions = getTrackedSessions(manager)
sessions.set(
"ses_pending",
createTrackedSession({ closePending: true, closeRetryCount: 3 }),
)
// when
await getCloseSessionById(manager)("ses_pending")
// then
expect(sessions.has("ses_pending")).toBe(false)
expect(mockQueryWindowState).not.toHaveBeenCalled()
expect(mockExecuteAction).not.toHaveBeenCalled()
})
test("#given close-pending session removed during async close #when retryPendingCloses fails #then it does not resurrect stale session state", async () => {
// given
const { TmuxSessionManager } = await import("./manager")
const manager = createManager(TmuxSessionManager)
const sessions = getTrackedSessions(manager)
sessions.set(
"ses_pending",
createTrackedSession({ closePending: true, closeRetryCount: 0 }),
)
mockExecuteAction.mockImplementationOnce(async () => {
sessions.delete("ses_pending")
return { success: false }
})
// when
await getRetryPendingCloses(manager)()
// then
expect(sessions.has("ses_pending")).toBe(false)
})
})

View File

@@ -21,6 +21,6 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
return {
handler: createAtlasEventHandler({ ctx, options, sessions, getState }),
"tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }),
"tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit }),
"tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit, getState }),
}
}

View File

@@ -0,0 +1,44 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { log } from "../../shared/logger"
import { HOOK_NAME } from "./hook-name"
export async function isSessionInBoulderLineage(input: {
client: PluginInput["client"]
sessionID: string
boulderSessionIDs: string[]
}): Promise<boolean> {
const visitedSessionIDs = new Set<string>()
let currentSessionID = input.sessionID
while (!visitedSessionIDs.has(currentSessionID)) {
visitedSessionIDs.add(currentSessionID)
const sessionResult = await input.client.session
.get({ path: { id: currentSessionID } })
.catch((error: unknown) => {
log(`[${HOOK_NAME}] Failed to resolve session lineage`, {
sessionID: input.sessionID,
currentSessionID,
error,
})
return null
})
if (!sessionResult || sessionResult.error) {
return false
}
const parentSessionID = sessionResult.data?.parentID
if (!parentSessionID) {
return false
}
if (input.boulderSessionIDs.includes(parentSessionID)) {
return true
}
currentSessionID = parentSessionID
}
return false
}

View File

@@ -38,11 +38,15 @@ export function createAtlasEventHandler(input: {
if (event.type === "message.updated") {
const info = props?.info as Record<string, unknown> | undefined
const sessionID = info?.sessionID as string | undefined
const role = info?.role as string | undefined
if (!sessionID) return
const state = sessions.get(sessionID)
if (state) {
state.lastEventWasAbortError = false
if (role === "user") {
state.waitingForFinalWaveApproval = false
}
}
return
}

View File

@@ -0,0 +1,224 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"
import { randomUUID } from "node:crypto"
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { createOpencodeClient } from "@opencode-ai/sdk"
import type { AssistantMessage, Session } from "@opencode-ai/sdk"
import type { BoulderState } from "../../features/boulder-state"
import { clearBoulderState, writeBoulderState } from "../../features/boulder-state"
const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-final-wave-storage-${randomUUID()}`)
const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message")
const TEST_PART_STORAGE = join(TEST_STORAGE_ROOT, "part")
mock.module("../../features/hook-message-injector/constants", () => ({
OPENCODE_STORAGE: TEST_STORAGE_ROOT,
MESSAGE_STORAGE: TEST_MESSAGE_STORAGE,
PART_STORAGE: TEST_PART_STORAGE,
}))
mock.module("../../shared/opencode-message-dir", () => ({
getMessageDir: (sessionID: string) => {
const directoryPath = join(TEST_MESSAGE_STORAGE, sessionID)
return existsSync(directoryPath) ? directoryPath : null
},
}))
mock.module("../../shared/opencode-storage-detection", () => ({
isSqliteBackend: () => false,
}))
const { createAtlasHook } = await import("./index")
const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector")
type AtlasHookContext = Parameters<typeof createAtlasHook>[0]
type PromptMock = ReturnType<typeof mock>
describe("Atlas final verification approval gate", () => {
let testDirectory = ""
function createMockPluginInput(): AtlasHookContext & { _promptMock: PromptMock } {
const client = createOpencodeClient({ baseUrl: "http://localhost" })
const promptMock = mock((input: unknown) => input)
Reflect.set(client.session, "prompt", async (input: unknown) => {
promptMock(input)
return {
data: { info: {} as AssistantMessage, parts: [] },
request: new Request("http://localhost/session/prompt"),
response: new Response(),
}
})
Reflect.set(client.session, "promptAsync", async (input: unknown) => {
promptMock(input)
return {
data: undefined,
request: new Request("http://localhost/session/prompt_async"),
response: new Response(),
}
})
Reflect.set(client.session, "get", async () => {
return {
data: { parentID: "main-session-123" } as Session,
request: new Request("http://localhost/session/main-session-123"),
response: new Response(),
}
})
return {
directory: testDirectory,
project: {} as AtlasHookContext["project"],
worktree: testDirectory,
serverUrl: new URL("http://localhost"),
$: {} as AtlasHookContext["$"],
client,
_promptMock: promptMock,
}
}
function setupMessageStorage(sessionID: string): void {
const messageDirectory = join(MESSAGE_STORAGE, sessionID)
if (!existsSync(messageDirectory)) {
mkdirSync(messageDirectory, { recursive: true })
}
writeFileSync(
join(messageDirectory, "msg_test001.json"),
JSON.stringify({
agent: "atlas",
model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
}),
)
}
function cleanupMessageStorage(sessionID: string): void {
const messageDirectory = join(MESSAGE_STORAGE, sessionID)
if (existsSync(messageDirectory)) {
rmSync(messageDirectory, { recursive: true, force: true })
}
}
beforeEach(() => {
testDirectory = join(tmpdir(), `atlas-final-wave-test-${randomUUID()}`)
mkdirSync(join(testDirectory, ".sisyphus"), { recursive: true })
clearBoulderState(testDirectory)
})
afterEach(() => {
clearBoulderState(testDirectory)
if (existsSync(testDirectory)) {
rmSync(testDirectory, { recursive: true, force: true })
}
})
test("waits for explicit user approval after the last final-wave approval arrives", async () => {
// given
const sessionID = "atlas-final-wave-session"
setupMessageStorage(sessionID)
const planPath = join(testDirectory, "final-wave-plan.md")
writeFileSync(
planPath,
`# Plan
## TODOs
- [x] 1. Ship the implementation
## Final Verification Wave (MANDATORY - after ALL implementation tasks)
- [x] F1. **Plan Compliance Audit** - \`oracle\`
- [x] F2. **Code Quality Review** - \`unspecified-high\`
- [x] F3. **Real Manual QA** - \`unspecified-high\`
- [ ] F4. **Scope Fidelity Check** - \`deep\`
`,
)
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [sessionID],
plan_name: "final-wave-plan",
agent: "atlas",
}
writeBoulderState(testDirectory, state)
const mockInput = createMockPluginInput()
const hook = createAtlasHook(mockInput)
const toolOutput = {
title: "Sisyphus Task",
output: `Tasks [4/4 compliant] | Contamination [CLEAN] | Unaccounted [CLEAN] | VERDICT: APPROVE
<task_metadata>
session_id: ses_final_wave_review
</task_metadata>`,
metadata: {},
}
// when
await hook["tool.execute.after"]({ tool: "task", sessionID }, toolOutput)
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
// then
expect(toolOutput.output).toContain("FINAL WAVE APPROVAL GATE")
expect(toolOutput.output).toContain("explicit user approval")
expect(toolOutput.output).not.toContain("STEP 8: PROCEED TO NEXT TASK")
expect(mockInput._promptMock).not.toHaveBeenCalled()
cleanupMessageStorage(sessionID)
})
test("keeps normal auto-continue instructions for non-final tasks", async () => {
// given
const sessionID = "atlas-non-final-session"
setupMessageStorage(sessionID)
const planPath = join(testDirectory, "implementation-plan.md")
writeFileSync(
planPath,
`# Plan
## TODOs
- [x] 1. Setup
- [ ] 2. Implement feature
## Final Verification Wave (MANDATORY - after ALL implementation tasks)
- [ ] F1. **Plan Compliance Audit** - \`oracle\`
- [ ] F2. **Code Quality Review** - \`unspecified-high\`
- [ ] F3. **Real Manual QA** - \`unspecified-high\`
- [ ] F4. **Scope Fidelity Check** - \`deep\`
`,
)
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [sessionID],
plan_name: "implementation-plan",
agent: "atlas",
}
writeBoulderState(testDirectory, state)
const hook = createAtlasHook(createMockPluginInput())
const toolOutput = {
title: "Sisyphus Task",
output: `Implementation finished successfully
<task_metadata>
session_id: ses_feature_task
</task_metadata>`,
metadata: {},
}
// when
await hook["tool.execute.after"]({ tool: "task", sessionID }, toolOutput)
// then
expect(toolOutput.output).toContain("COMPLETION GATE")
expect(toolOutput.output).toContain("STEP 8: PROCEED TO NEXT TASK")
expect(toolOutput.output).not.toContain("FINAL WAVE APPROVAL GATE")
cleanupMessageStorage(sessionID)
})
})

View File

@@ -0,0 +1,47 @@
import { existsSync, readFileSync } from "node:fs"
const APPROVE_VERDICT_PATTERN = /\bVERDICT:\s*APPROVE\b/i
const FINAL_VERIFICATION_HEADING_PATTERN = /^##\s+Final Verification Wave\b/i
const UNCHECKED_TASK_PATTERN = /^\s*[-*]\s*\[\s*\]\s*(.+)$/
const FINAL_WAVE_TASK_PATTERN = /^F\d+\./i
export function shouldPauseForFinalWaveApproval(input: {
planPath: string
taskOutput: string
}): boolean {
if (!APPROVE_VERDICT_PATTERN.test(input.taskOutput)) {
return false
}
if (!existsSync(input.planPath)) {
return false
}
try {
const content = readFileSync(input.planPath, "utf-8")
const lines = content.split(/\r?\n/)
let inFinalVerificationWave = false
let uncheckedTaskCount = 0
let uncheckedFinalWaveTaskCount = 0
for (const line of lines) {
if (/^##\s+/.test(line)) {
inFinalVerificationWave = FINAL_VERIFICATION_HEADING_PATTERN.test(line)
}
const uncheckedTaskMatch = line.match(UNCHECKED_TASK_PATTERN)
if (!uncheckedTaskMatch) {
continue
}
uncheckedTaskCount += 1
if (inFinalVerificationWave && FINAL_WAVE_TASK_PATTERN.test(uncheckedTaskMatch[1].trim())) {
uncheckedFinalWaveTaskCount += 1
}
}
return uncheckedTaskCount === 1 && uncheckedFinalWaveTaskCount === 1
} catch {
return false
}
}

View File

@@ -0,0 +1,122 @@
import { afterEach, beforeEach, describe, it } from "bun:test"
import assert from "node:assert/strict"
import { randomUUID } from "node:crypto"
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { clearBoulderState, readBoulderState, writeBoulderState } from "../../features/boulder-state"
import type { BoulderState } from "../../features/boulder-state"
import { _resetForTesting, subagentSessions } from "../../features/claude-code-session-state"
const { createAtlasHook } = await import("./index")
describe("atlas hook idle-event session lineage", () => {
const MAIN_SESSION_ID = "main-session-123"
let testDirectory = ""
let promptCalls: Array<unknown> = []
function writeIncompleteBoulder(): void {
const planPath = join(testDirectory, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(testDirectory, state)
}
function createHook(parentSessionIDs?: Record<string, string | undefined>) {
return createAtlasHook({
directory: testDirectory,
client: {
session: {
get: async (input: { path: { id: string } }) => ({
data: {
parentID: parentSessionIDs?.[input.path.id],
},
}),
messages: async () => ({ data: [] }),
prompt: async (input: unknown) => {
promptCalls.push(input)
return { data: {} }
},
promptAsync: async (input: unknown) => {
promptCalls.push(input)
return { data: {} }
},
},
},
} as unknown as Parameters<typeof createAtlasHook>[0])
}
beforeEach(() => {
testDirectory = join(tmpdir(), `atlas-idle-lineage-${randomUUID()}`)
if (!existsSync(testDirectory)) {
mkdirSync(testDirectory, { recursive: true })
}
promptCalls = []
clearBoulderState(testDirectory)
_resetForTesting()
subagentSessions.clear()
})
afterEach(() => {
clearBoulderState(testDirectory)
if (existsSync(testDirectory)) {
rmSync(testDirectory, { recursive: true, force: true })
}
_resetForTesting()
})
it("does not append unrelated subagent sessions during idle", async () => {
const unrelatedSubagentSessionID = "subagent-session-unrelated"
const unrelatedParentSessionID = "unrelated-parent-session"
writeIncompleteBoulder()
subagentSessions.add(unrelatedSubagentSessionID)
const hook = createHook({
[unrelatedSubagentSessionID]: unrelatedParentSessionID,
})
await hook.handler({
event: {
type: "session.idle",
properties: { sessionID: unrelatedSubagentSessionID },
},
})
assert.equal(readBoulderState(testDirectory)?.session_ids.includes(unrelatedSubagentSessionID), false)
assert.equal(promptCalls.length, 0)
})
it("appends boulder-owned subagent sessions during idle when lineage reaches tracked session", async () => {
const subagentSessionID = "subagent-session-456"
const intermediateParentSessionID = "subagent-parent-789"
writeIncompleteBoulder()
subagentSessions.add(subagentSessionID)
const hook = createHook({
[subagentSessionID]: intermediateParentSessionID,
[intermediateParentSessionID]: MAIN_SESSION_ID,
})
await hook.handler({
event: {
type: "session.idle",
properties: { sessionID: subagentSessionID },
},
})
assert.equal(readBoulderState(testDirectory)?.session_ids.includes(subagentSessionID), true)
assert.equal(promptCalls.length, 1)
})
})

View File

@@ -1,10 +1,9 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state"
import type { BoulderState, PlanProgress } from "../../features/boulder-state"
import { subagentSessions } from "../../features/claude-code-session-state"
import { getPlanProgress, readBoulderState } from "../../features/boulder-state"
import { log } from "../../shared/logger"
import { injectBoulderContinuation } from "./boulder-continuation-injector"
import { HOOK_NAME } from "./hook-name"
import { resolveActiveBoulderSession } from "./resolve-active-boulder-session"
import type { AtlasHookOptions, SessionState } from "./types"
const CONTINUATION_COOLDOWN_MS = 5000
@@ -18,44 +17,6 @@ function hasRunningBackgroundTasks(sessionID: string, options?: AtlasHookOptions
: false
}
function resolveActiveBoulderSession(input: {
directory: string
sessionID: string
}): {
boulderState: BoulderState
progress: PlanProgress
appendedSession: boolean
} | null {
const boulderState = readBoulderState(input.directory)
if (!boulderState) {
return null
}
const progress = getPlanProgress(boulderState.active_plan)
if (progress.isComplete) {
return { boulderState, progress, appendedSession: false }
}
if (boulderState.session_ids.includes(input.sessionID)) {
return { boulderState, progress, appendedSession: false }
}
if (!subagentSessions.has(input.sessionID)) {
return null
}
const updatedBoulderState = appendSessionId(input.directory, input.sessionID)
if (!updatedBoulderState?.session_ids.includes(input.sessionID)) {
return null
}
return {
boulderState: updatedBoulderState,
progress,
appendedSession: true,
}
}
async function injectContinuation(input: {
ctx: PluginInput
sessionID: string
@@ -102,6 +63,7 @@ function scheduleRetry(input: {
sessionState.pendingRetryTimer = undefined
if (sessionState.promptFailureCount >= 2) return
if (sessionState.waitingForFinalWaveApproval) return
const currentBoulder = readBoulderState(ctx.directory)
if (!currentBoulder) return
@@ -136,7 +98,8 @@ export async function handleAtlasSessionIdle(input: {
log(`[${HOOK_NAME}] session.idle`, { sessionID })
const activeBoulderSession = resolveActiveBoulderSession({
const activeBoulderSession = await resolveActiveBoulderSession({
client: ctx.client,
directory: ctx.directory,
sessionID,
})
@@ -161,6 +124,11 @@ export async function handleAtlasSessionIdle(input: {
const sessionState = getState(sessionID)
const now = Date.now()
if (sessionState.waitingForFinalWaveApproval) {
log(`[${HOOK_NAME}] Skipped: waiting for explicit final-wave approval`, { sessionID })
return
}
if (sessionState.lastEventWasAbortError) {
sessionState.lastEventWasAbortError = false
log(`[${HOOK_NAME}] Skipped: abort error immediately before idle`, { sessionID })

View File

@@ -45,6 +45,7 @@ describe("atlas hook", () => {
directory: TEST_DIR,
client: {
session: {
get: async () => ({ data: { parentID: "main-session-123" } }),
prompt: promptMock,
promptAsync: promptMock,
},

View File

@@ -0,0 +1,53 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state"
import type { BoulderState, PlanProgress } from "../../features/boulder-state"
import { subagentSessions } from "../../features/claude-code-session-state"
import { isSessionInBoulderLineage } from "./boulder-session-lineage"
export async function resolveActiveBoulderSession(input: {
client: PluginInput["client"]
directory: string
sessionID: string
}): Promise<{
boulderState: BoulderState
progress: PlanProgress
appendedSession: boolean
} | null> {
const boulderState = readBoulderState(input.directory)
if (!boulderState) {
return null
}
const progress = getPlanProgress(boulderState.active_plan)
if (progress.isComplete) {
return { boulderState, progress, appendedSession: false }
}
if (boulderState.session_ids.includes(input.sessionID)) {
return { boulderState, progress, appendedSession: false }
}
if (!subagentSessions.has(input.sessionID)) {
return null
}
const belongsToActiveBoulder = await isSessionInBoulderLineage({
client: input.client,
sessionID: input.sessionID,
boulderSessionIDs: boulderState.session_ids,
})
if (!belongsToActiveBoulder) {
return null
}
const updatedBoulderState = appendSessionId(input.directory, input.sessionID)
if (!updatedBoulderState?.session_ids.includes(input.sessionID)) {
return null
}
return {
boulderState: updatedBoulderState,
progress,
appendedSession: true,
}
}

View File

@@ -3,20 +3,28 @@ import { appendSessionId, getPlanProgress, readBoulderState } from "../../featur
import { log } from "../../shared/logger"
import { isCallerOrchestrator } from "../../shared/session-utils"
import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree"
import { shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate"
import { HOOK_NAME } from "./hook-name"
import { DIRECT_WORK_REMINDER } from "./system-reminder-templates"
import { isSisyphusPath } from "./sisyphus-path"
import { extractSessionIdFromOutput } from "./subagent-session-id"
import { buildCompletionGate, buildOrchestratorReminder, buildStandaloneVerificationReminder } from "./verification-reminders"
import {
buildCompletionGate,
buildFinalWaveApprovalReminder,
buildOrchestratorReminder,
buildStandaloneVerificationReminder,
} from "./verification-reminders"
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
import type { SessionState } from "./types"
import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types"
export function createToolExecuteAfterHandler(input: {
ctx: PluginInput
pendingFilePaths: Map<string, string>
autoCommit: boolean
}): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise<void> {
const { ctx, pendingFilePaths, autoCommit } = input
getState: (sessionID: string) => SessionState
}): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise<void> {
const { ctx, pendingFilePaths, autoCommit, getState } = input
return async (toolInput, toolOutput): Promise<void> => {
// Guard against undefined output (e.g., from /review command - see issue #1035)
if (!toolOutput) {
@@ -75,10 +83,31 @@ export function createToolExecuteAfterHandler(input: {
// Preserve original subagent response - critical for debugging failed tasks
const originalResponse = toolOutput.output
const shouldPauseForApproval = shouldPauseForFinalWaveApproval({
planPath: boulderState.active_plan,
taskOutput: originalResponse,
})
if (toolInput.sessionID) {
const sessionState = getState(toolInput.sessionID)
sessionState.waitingForFinalWaveApproval = shouldPauseForApproval
if (shouldPauseForApproval && sessionState.pendingRetryTimer) {
clearTimeout(sessionState.pendingRetryTimer)
sessionState.pendingRetryTimer = undefined
}
}
const leadReminder = shouldPauseForApproval
? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, subagentSessionId)
: buildCompletionGate(boulderState.plan_name, subagentSessionId)
const followupReminder = shouldPauseForApproval
? null
: buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)
toolOutput.output = `
<system-reminder>
${buildCompletionGate(boulderState.plan_name, subagentSessionId)}
${leadReminder}
</system-reminder>
## SUBAGENT WORK COMPLETED
@@ -91,13 +120,16 @@ ${fileChanges}
${originalResponse}
<system-reminder>
${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)}
</system-reminder>`
${
followupReminder === null
? ""
: `<system-reminder>\n${followupReminder}\n</system-reminder>`
}`
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
plan: boulderState.plan_name,
progress: `${progress.completed}/${progress.total}`,
fileCount: gitStats.length,
waitingForFinalWaveApproval: shouldPauseForApproval,
})
} else {
toolOutput.output += `\n<system-reminder>\n${buildStandaloneVerificationReminder(subagentSessionId)}\n</system-reminder>`

View File

@@ -0,0 +1,9 @@
{
"extends": "../../../tsconfig.json",
"compilerOptions": {
"noEmit": true,
"types": ["bun-types"]
},
"include": ["./**/*.ts", "./**/*.d.ts"],
"exclude": []
}

View File

@@ -31,4 +31,5 @@ export interface SessionState {
promptFailureCount: number
lastFailureAt?: number
pendingRetryTimer?: ReturnType<typeof setTimeout>
waitingForFinalWaveApproval?: boolean
}

View File

@@ -108,6 +108,45 @@ ${commitStep}
**${remaining} tasks remain. Keep bouldering.**`
}
export function buildFinalWaveApprovalReminder(
planName: string,
progress: { total: number; completed: number },
sessionId: string
): string {
const remaining = progress.total - progress.completed
return `
---
**BOULDER STATE:** Plan: \
\`${planName}\` | ${progress.completed}/${progress.total} done | ${remaining} remaining
---
${buildVerificationReminder(sessionId)}
**FINAL WAVE APPROVAL GATE**
The last Final Verification Wave result just passed.
This is the ONLY point where approval-style user interaction is required.
1. Read \
\`.sisyphus/plans/${planName}.md\` again and confirm the remaining unchecked item is the last final-wave task.
2. Consolidate the F1-F4 verdicts into a short summary for the user.
3. Tell the user all final reviewers approved.
4. Ask for explicit user approval before editing the last final-wave checkbox or marking the plan complete.
5. Wait for the user's explicit approval. Do NOT auto-continue. Do NOT call \
\`task()\` again unless the user rejects and requests fixes.
If the user rejects or requests changes:
- delegate the required fix
- re-run the affected final-wave reviewer
- present the updated results again
- wait again for explicit user approval
**DO NOT mark the final-wave checkbox complete until the user explicitly says okay.**`
}
export function buildStandaloneVerificationReminder(sessionId: string): string {
return `
---

View File

@@ -0,0 +1,142 @@
import { beforeEach, describe, expect, it, mock, spyOn } from "bun:test"
import { AUTO_SLASH_COMMAND_TAG_OPEN } from "./constants"
import type {
AutoSlashCommandHookInput,
AutoSlashCommandHookOutput,
CommandExecuteBeforeInput,
CommandExecuteBeforeOutput,
} from "./types"
import * as shared from "../../shared"
const executeSlashCommandMock = mock(
async (parsed: { command: string; args: string; raw: string }) => ({
success: true,
replacementText: parsed.raw,
})
)
mock.module("./executor", () => ({
executeSlashCommand: executeSlashCommandMock,
}))
const logMock = spyOn(shared, "log").mockImplementation(() => {})
const { createAutoSlashCommandHook } = await import("./hook")
function createChatInput(sessionID: string, messageID: string): AutoSlashCommandHookInput {
return {
sessionID,
messageID,
}
}
function createChatOutput(text: string): AutoSlashCommandHookOutput {
return {
message: {},
parts: [{ type: "text", text }],
}
}
function createCommandInput(sessionID: string, command: string): CommandExecuteBeforeInput {
return {
sessionID,
command,
arguments: "",
}
}
function createCommandOutput(text: string): CommandExecuteBeforeOutput {
return {
parts: [{ type: "text", text }],
}
}
describe("createAutoSlashCommandHook leak prevention", () => {
beforeEach(() => {
executeSlashCommandMock.mockClear()
logMock.mockClear()
})
describe("#given hook with sessionProcessedCommandExecutions", () => {
describe("#when same command executed twice for same session", () => {
it("#then second execution is deduplicated", async () => {
const hook = createAutoSlashCommandHook()
const input = createCommandInput("session-dedup", "leak-test-command")
const firstOutput = createCommandOutput("first")
const secondOutput = createCommandOutput("second")
await hook["command.execute.before"](input, firstOutput)
await hook["command.execute.before"](input, secondOutput)
expect(executeSlashCommandMock).toHaveBeenCalledTimes(1)
expect(firstOutput.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
expect(secondOutput.parts[0].text).toBe("second")
})
})
})
describe("#given hook with entries from multiple sessions", () => {
describe("#when dispose() is called", () => {
it("#then both Sets are empty", async () => {
const hook = createAutoSlashCommandHook()
await hook["chat.message"](
createChatInput("session-chat", "message-chat"),
createChatOutput("/leak-chat")
)
await hook["command.execute.before"](
createCommandInput("session-command", "leak-command"),
createCommandOutput("before")
)
executeSlashCommandMock.mockClear()
hook.dispose()
const chatOutputAfterDispose = createChatOutput("/leak-chat")
const commandOutputAfterDispose = createCommandOutput("after")
await hook["chat.message"](
createChatInput("session-chat", "message-chat"),
chatOutputAfterDispose
)
await hook["command.execute.before"](
createCommandInput("session-command", "leak-command"),
commandOutputAfterDispose
)
expect(executeSlashCommandMock).toHaveBeenCalledTimes(2)
expect(chatOutputAfterDispose.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
expect(commandOutputAfterDispose.parts[0].text).toContain(
AUTO_SLASH_COMMAND_TAG_OPEN
)
})
})
})
describe("#given Set with more than 10000 entries", () => {
describe("#when new entry added", () => {
it("#then Set size is reduced", async () => {
const hook = createAutoSlashCommandHook()
const oldestInput = createChatInput("session-oldest", "message-oldest")
await hook["chat.message"](oldestInput, createChatOutput("/leak-oldest"))
for (let index = 0; index < 10000; index += 1) {
await hook["chat.message"](
createChatInput(`session-${index}`, `message-${index}`),
createChatOutput(`/leak-${index}`)
)
}
const newestInput = createChatInput("session-newest", "message-newest")
await hook["chat.message"](newestInput, createChatOutput("/leak-newest"))
executeSlashCommandMock.mockClear()
const oldestRetryOutput = createChatOutput("/leak-oldest")
const newestRetryOutput = createChatOutput("/leak-newest")
await hook["chat.message"](oldestInput, oldestRetryOutput)
await hook["chat.message"](newestInput, newestRetryOutput)
expect(executeSlashCommandMock).toHaveBeenCalledTimes(1)
expect(oldestRetryOutput.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
expect(newestRetryOutput.parts[0].text).toBe("/leak-newest")
})
})
})
})

View File

@@ -1,86 +1,25 @@
import { existsSync, readdirSync, readFileSync } from "fs"
import { join, basename, dirname } from "path"
import { dirname } from "path"
import {
parseFrontmatter,
resolveCommandsInText,
resolveFileReferencesInText,
sanitizeModelField,
getClaudeConfigDir,
getOpenCodeConfigDir,
discoverPluginCommandDefinitions,
} from "../../shared"
import { loadBuiltinCommands } from "../../features/builtin-commands"
import type { CommandFrontmatter } from "../../features/claude-code-command-loader/types"
import { isMarkdownFile } from "../../shared/file-utils"
import { discoverAllSkills, type LoadedSkill, type LazyContentLoader } from "../../features/opencode-skill-loader"
import { discoverCommandsSync } from "../../tools/slashcommand"
import type { CommandInfo as DiscoveredCommandInfo, CommandMetadata } from "../../tools/slashcommand/types"
import type { ParsedSlashCommand } from "./types"
interface CommandScope {
type: "user" | "project" | "opencode" | "opencode-project" | "skill" | "builtin" | "plugin"
}
interface CommandMetadata {
name: string
description: string
argumentHint?: string
model?: string
agent?: string
subtask?: boolean
}
interface CommandInfo {
interface SkillCommandInfo {
name: string
path?: string
metadata: CommandMetadata
content?: string
scope: CommandScope["type"]
scope: "skill"
lazyContentLoader?: LazyContentLoader
}
function discoverCommandsFromDir(commandsDir: string, scope: CommandScope["type"]): CommandInfo[] {
if (!existsSync(commandsDir)) {
return []
}
type CommandInfo = DiscoveredCommandInfo | SkillCommandInfo
const entries = readdirSync(commandsDir, { withFileTypes: true })
const commands: CommandInfo[] = []
for (const entry of entries) {
if (!isMarkdownFile(entry)) continue
const commandPath = join(commandsDir, entry.name)
const commandName = basename(entry.name, ".md")
try {
const content = readFileSync(commandPath, "utf-8")
const { data, body } = parseFrontmatter<CommandFrontmatter>(content)
const isOpencodeSource = scope === "opencode" || scope === "opencode-project"
const metadata: CommandMetadata = {
name: commandName,
description: data.description || "",
argumentHint: data["argument-hint"],
model: sanitizeModelField(data.model, isOpencodeSource ? "opencode" : "claude-code"),
agent: data.agent,
subtask: Boolean(data.subtask),
}
commands.push({
name: commandName,
path: commandPath,
metadata,
content: body,
scope,
})
} catch {
continue
}
}
return commands
}
function skillToCommandInfo(skill: LoadedSkill): CommandInfo {
function skillToCommandInfo(skill: LoadedSkill): SkillCommandInfo {
return {
name: skill.name,
path: skill.path,
@@ -104,60 +43,30 @@ export interface ExecutorOptions {
enabledPluginsOverride?: Record<string, boolean>
}
function discoverPluginCommands(options?: ExecutorOptions): CommandInfo[] {
const pluginDefinitions = discoverPluginCommandDefinitions(options)
return Object.entries(pluginDefinitions).map(([name, definition]) => ({
name,
metadata: {
name,
description: definition.description || "",
model: definition.model,
agent: definition.agent,
subtask: definition.subtask,
},
content: definition.template,
scope: "plugin",
}))
function filterDiscoveredCommandsByScope(
commands: DiscoveredCommandInfo[],
scope: DiscoveredCommandInfo["scope"],
): DiscoveredCommandInfo[] {
return commands.filter(command => command.scope === scope)
}
async function discoverAllCommands(options?: ExecutorOptions): Promise<CommandInfo[]> {
const configDir = getOpenCodeConfigDir({ binary: "opencode" })
const userCommandsDir = join(getClaudeConfigDir(), "commands")
const projectCommandsDir = join(process.cwd(), ".claude", "commands")
const opencodeGlobalDir = join(configDir, "command")
const opencodeProjectDir = join(process.cwd(), ".opencode", "command")
const userCommands = discoverCommandsFromDir(userCommandsDir, "user")
const opencodeGlobalCommands = discoverCommandsFromDir(opencodeGlobalDir, "opencode")
const projectCommands = discoverCommandsFromDir(projectCommandsDir, "project")
const opencodeProjectCommands = discoverCommandsFromDir(opencodeProjectDir, "opencode-project")
const builtinCommandsMap = loadBuiltinCommands()
const builtinCommands: CommandInfo[] = Object.values(builtinCommandsMap).map(cmd => ({
name: cmd.name,
metadata: {
name: cmd.name,
description: cmd.description || "",
model: cmd.model,
agent: cmd.agent,
subtask: cmd.subtask,
},
content: cmd.template,
scope: "builtin",
}))
const discoveredCommands = discoverCommandsSync(process.cwd(), {
pluginsEnabled: options?.pluginsEnabled,
enabledPluginsOverride: options?.enabledPluginsOverride,
})
const skills = options?.skills ?? await discoverAllSkills()
const skillCommands = skills.map(skillToCommandInfo)
const pluginCommands = discoverPluginCommands(options)
return [
...builtinCommands,
...opencodeProjectCommands,
...projectCommands,
...opencodeGlobalCommands,
...userCommands,
...filterDiscoveredCommandsByScope(discoveredCommands, "builtin"),
...filterDiscoveredCommandsByScope(discoveredCommands, "opencode-project"),
...filterDiscoveredCommandsByScope(discoveredCommands, "project"),
...filterDiscoveredCommandsByScope(discoveredCommands, "opencode"),
...filterDiscoveredCommandsByScope(discoveredCommands, "user"),
...skillCommands,
...pluginCommands,
...filterDiscoveredCommandsByScope(discoveredCommands, "plugin"),
]
}

View File

@@ -9,6 +9,7 @@ import {
AUTO_SLASH_COMMAND_TAG_CLOSE,
AUTO_SLASH_COMMAND_TAG_OPEN,
} from "./constants"
import { createProcessedCommandStore } from "./processed-command-store"
import type {
AutoSlashCommandHookInput,
AutoSlashCommandHookOutput,
@@ -17,8 +18,22 @@ import type {
} from "./types"
import type { LoadedSkill } from "../../features/opencode-skill-loader"
const sessionProcessedCommands = new Set<string>()
const sessionProcessedCommandExecutions = new Set<string>()
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null
}
function getDeletedSessionID(properties: unknown): string | null {
if (!isRecord(properties)) {
return null
}
const info = properties.info
if (!isRecord(info)) {
return null
}
return typeof info.id === "string" ? info.id : null
}
export interface AutoSlashCommandHookOptions {
skills?: LoadedSkill[]
@@ -32,6 +47,13 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
pluginsEnabled: options?.pluginsEnabled,
enabledPluginsOverride: options?.enabledPluginsOverride,
}
const sessionProcessedCommands = createProcessedCommandStore()
const sessionProcessedCommandExecutions = createProcessedCommandStore()
const dispose = (): void => {
sessionProcessedCommands.clear()
sessionProcessedCommandExecutions.clear()
}
return {
"chat.message": async (
@@ -61,7 +83,9 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
return
}
const commandKey = `${input.sessionID}:${input.messageID}:${parsed.command}`
const commandKey = input.messageID
? `${input.sessionID}:${input.messageID}:${parsed.command}`
: `${input.sessionID}:${parsed.command}`
if (sessionProcessedCommands.has(commandKey)) {
return
}
@@ -101,7 +125,7 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
input: CommandExecuteBeforeInput,
output: CommandExecuteBeforeOutput
): Promise<void> => {
const commandKey = `${input.sessionID}:${input.command}:${Date.now()}`
const commandKey = `${input.sessionID}:${input.command.toLowerCase()}:${input.arguments || ""}`
if (sessionProcessedCommandExecutions.has(commandKey)) {
return
}
@@ -145,5 +169,23 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
command: input.command,
})
},
event: async ({
event,
}: {
event: { type: string; properties?: unknown }
}): Promise<void> => {
if (event.type !== "session.deleted") {
return
}
const sessionID = getDeletedSessionID(event.properties)
if (!sessionID) {
return
}
sessionProcessedCommands.cleanupSession(sessionID)
sessionProcessedCommandExecutions.cleanupSession(sessionID)
},
dispose,
}
}

View File

@@ -1,4 +1,5 @@
import { describe, expect, it, beforeEach, mock, spyOn } from "bun:test"
import type { LoadedSkill } from "../../features/opencode-skill-loader/types"
import type {
AutoSlashCommandHookInput,
AutoSlashCommandHookOutput,
@@ -328,5 +329,89 @@ describe("createAutoSlashCommandHook", () => {
})
)
})
})
describe("skills as slash commands", () => {
function createTestSkill(name: string, template: string): LoadedSkill {
return {
name,
path: `/test/skills/${name}/SKILL.md`,
definition: {
name,
description: `Test skill: ${name}`,
template,
},
scope: "user",
}
}
it("should replace message with skill template when skill is used as slash command via chat.message", async () => {
// given a hook with a skill
const skill = createTestSkill("my-test-skill", "This is the skill template content")
const hook = createAutoSlashCommandHook({ skills: [skill] })
const sessionID = `test-session-skill-chat-${Date.now()}`
const input = createMockInput(sessionID)
const output = createMockOutput("/my-test-skill some arguments")
// when hook processes the message
await hook["chat.message"](input, output)
// then should replace message with skill template
expect(output.parts[0].text).toContain("<auto-slash-command>")
expect(output.parts[0].text).toContain("/my-test-skill Command")
expect(output.parts[0].text).toContain("This is the skill template content")
})
it("should inject skill template via command.execute.before", async () => {
// given a hook with a skill
const skill = createTestSkill("my-test-skill", "Skill template for command execute")
const hook = createAutoSlashCommandHook({ skills: [skill] })
const input: CommandExecuteBeforeInput = {
command: "my-test-skill",
sessionID: `test-session-skill-cmd-${Date.now()}-${Math.random()}`,
arguments: "extra args",
}
const output: CommandExecuteBeforeOutput = {
parts: [{ type: "text", text: "original" }],
}
// when hook processes the command
await hook["command.execute.before"](input, output)
// then should inject skill template
expect(output.parts[0].text).toContain("<auto-slash-command>")
expect(output.parts[0].text).toContain("/my-test-skill Command")
expect(output.parts[0].text).toContain("Skill template for command execute")
expect(output.parts[0].text).toContain("extra args")
})
it("should handle skill with lazy content loader", async () => {
// given a skill with lazy content (no inline template)
const skill: LoadedSkill = {
name: "lazy-skill",
path: "/test/skills/lazy-skill/SKILL.md",
definition: {
name: "lazy-skill",
description: "A lazy-loaded skill",
template: "",
},
scope: "user",
lazyContent: {
loaded: false,
load: async () => "Lazy loaded skill content here",
},
}
const hook = createAutoSlashCommandHook({ skills: [skill] })
const sessionID = `test-session-lazy-skill-${Date.now()}`
const input = createMockInput(sessionID)
const output = createMockOutput("/lazy-skill")
// when hook processes the message
await hook["chat.message"](input, output)
// then should replace message with lazily loaded content
expect(output.parts[0].text).toContain("<auto-slash-command>")
expect(output.parts[0].text).toContain("Lazy loaded skill content here")
})
})
})

View File

@@ -0,0 +1,41 @@
const MAX_PROCESSED_ENTRY_COUNT = 10_000
function trimProcessedEntries(entries: Set<string>): Set<string> {
if (entries.size <= MAX_PROCESSED_ENTRY_COUNT) {
return entries
}
return new Set(Array.from(entries).slice(Math.floor(entries.size / 2)))
}
function removeSessionEntries(entries: Set<string>, sessionID: string): Set<string> {
const sessionPrefix = `${sessionID}:`
return new Set(Array.from(entries).filter((entry) => !entry.startsWith(sessionPrefix)))
}
export interface ProcessedCommandStore {
has(commandKey: string): boolean
add(commandKey: string): void
cleanupSession(sessionID: string): void
clear(): void
}
export function createProcessedCommandStore(): ProcessedCommandStore {
let entries = new Set<string>()
return {
has(commandKey: string): boolean {
return entries.has(commandKey)
},
add(commandKey: string): void {
entries.add(commandKey)
entries = trimProcessedEntries(entries)
},
cleanupSession(sessionID: string): void {
entries = removeSessionEntries(entries, sessionID)
},
clear(): void {
entries.clear()
},
}
}

View File

@@ -3,7 +3,8 @@ export { getLocalDevVersion } from "./checker/local-dev-version"
export { findPluginEntry } from "./checker/plugin-entry"
export type { PluginEntryInfo } from "./checker/plugin-entry"
export { getCachedVersion } from "./checker/cached-version"
export { updatePinnedVersion, revertPinnedVersion } from "./checker/pinned-version-updater"
export { updatePinnedVersion } from "./checker/pinned-version-updater"
export { getLatestVersion } from "./checker/latest-version"
export { checkForUpdate } from "./checker/check-for-update"
export { syncCachePackageJsonToIntent } from "./checker/sync-package-json"
export type { SyncResult } from "./checker/sync-package-json"

View File

@@ -11,9 +11,7 @@ export interface PluginEntryInfo {
configPath: string
}
function isExplicitVersionPin(pinnedVersion: string): boolean {
return /^\d+\.\d+\.\d+/.test(pinnedVersion)
}
const EXACT_SEMVER_REGEX = /^\d+\.\d+\.\d+(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?(\+[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$/
export function findPluginEntry(directory: string): PluginEntryInfo | null {
for (const configPath of getConfigPaths(directory)) {
@@ -29,7 +27,7 @@ export function findPluginEntry(directory: string): PluginEntryInfo | null {
}
if (entry.startsWith(`${PACKAGE_NAME}@`)) {
const pinnedVersion = entry.slice(PACKAGE_NAME.length + 1)
const isPinned = isExplicitVersionPin(pinnedVersion)
const isPinned = EXACT_SEMVER_REGEX.test(pinnedVersion.trim())
return { entry, isPinned, pinnedVersion, configPath }
}
}

View File

@@ -8,6 +8,14 @@ const TEST_CACHE_DIR = join(import.meta.dir, "__test-sync-cache__")
mock.module("../constants", () => ({
CACHE_DIR: TEST_CACHE_DIR,
PACKAGE_NAME: "oh-my-opencode",
NPM_REGISTRY_URL: "https://registry.npmjs.org/-/package/oh-my-opencode/dist-tags",
NPM_FETCH_TIMEOUT: 5000,
VERSION_FILE: join(TEST_CACHE_DIR, "version"),
USER_CONFIG_DIR: "/tmp/opencode-config",
USER_OPENCODE_CONFIG: "/tmp/opencode-config/opencode.json",
USER_OPENCODE_CONFIG_JSONC: "/tmp/opencode-config/opencode.jsonc",
INSTALLED_PACKAGE_JSON: join(TEST_CACHE_DIR, "node_modules", "oh-my-opencode", "package.json"),
getWindowsAppdataDir: () => null,
}))
mock.module("../../../shared/logger", () => ({
@@ -59,11 +67,10 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
const result = syncCachePackageJsonToIntent(pluginInfo)
//#then
expect(result).toBe(true)
expect(result.synced).toBe(true)
expect(result.error).toBeNull()
expect(readCachePackageJsonVersion()).toBe("latest")
})
})
@@ -79,11 +86,10 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
const result = syncCachePackageJsonToIntent(pluginInfo)
//#then
expect(result).toBe(true)
expect(result.synced).toBe(true)
expect(result.error).toBeNull()
expect(readCachePackageJsonVersion()).toBe("next")
})
})
@@ -99,19 +105,17 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
const result = syncCachePackageJsonToIntent(pluginInfo)
//#then
expect(result).toBe(true)
expect(result.synced).toBe(true)
expect(result.error).toBeNull()
expect(readCachePackageJsonVersion()).toBe("latest")
})
})
})
describe("#given cache package.json already matches intent", () => {
it("#then returns false without modifying package.json", async () => {
//#given
it("#then returns synced false with no error", async () => {
resetTestCache("latest")
const { syncCachePackageJsonToIntent } = await import("./sync-package-json")
@@ -122,18 +126,16 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
const result = syncCachePackageJsonToIntent(pluginInfo)
//#then
expect(result).toBe(false)
expect(result.synced).toBe(false)
expect(result.error).toBeNull()
expect(readCachePackageJsonVersion()).toBe("latest")
})
})
describe("#given cache package.json does not exist", () => {
it("#then returns false", async () => {
//#given
it("#then returns file_not_found error", async () => {
cleanupTestCache()
const { syncCachePackageJsonToIntent } = await import("./sync-package-json")
@@ -144,17 +146,15 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
const result = syncCachePackageJsonToIntent(pluginInfo)
//#then
expect(result).toBe(false)
expect(result.synced).toBe(false)
expect(result.error).toBe("file_not_found")
})
})
describe("#given plugin not in cache package.json dependencies", () => {
it("#then returns false", async () => {
//#given
it("#then returns plugin_not_in_deps error", async () => {
cleanupTestCache()
mkdirSync(TEST_CACHE_DIR, { recursive: true })
writeFileSync(
@@ -171,17 +171,15 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
const result = syncCachePackageJsonToIntent(pluginInfo)
//#then
expect(result).toBe(false)
expect(result.synced).toBe(false)
expect(result.error).toBe("plugin_not_in_deps")
})
})
describe("#given user explicitly pinned a different semver", () => {
describe("#given user explicitly changed from one semver to another", () => {
it("#then updates package.json to new version", async () => {
//#given
resetTestCache("3.9.0")
const { syncCachePackageJsonToIntent } = await import("./sync-package-json")
@@ -192,18 +190,16 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
const result = syncCachePackageJsonToIntent(pluginInfo)
//#then
expect(result).toBe(true)
expect(result.synced).toBe(true)
expect(result.error).toBeNull()
expect(readCachePackageJsonVersion()).toBe("3.10.0")
})
})
describe("#given other dependencies exist in cache package.json", () => {
it("#then preserves other dependencies while updating the plugin", async () => {
//#given
describe("#given cache package.json with other dependencies", () => {
it("#then other dependencies are preserved when updating plugin version", async () => {
const { syncCachePackageJsonToIntent } = await import("./sync-package-json")
const pluginInfo: PluginEntryInfo = {
@@ -213,14 +209,133 @@ describe("syncCachePackageJsonToIntent", () => {
configPath: "/tmp/opencode.json",
}
//#when
syncCachePackageJsonToIntent(pluginInfo)
const result = syncCachePackageJsonToIntent(pluginInfo)
expect(result.synced).toBe(true)
expect(result.error).toBeNull()
//#then
const content = readFileSync(join(TEST_CACHE_DIR, "package.json"), "utf-8")
const pkg = JSON.parse(content) as { dependencies?: Record<string, string> }
expect(pkg.dependencies?.other).toBe("1.0.0")
expect(pkg.dependencies?.["oh-my-opencode"]).toBe("latest")
expect(pkg.dependencies?.["other"]).toBe("1.0.0")
})
})
describe("#given malformed JSON in cache package.json", () => {
it("#then returns parse_error", async () => {
cleanupTestCache()
mkdirSync(TEST_CACHE_DIR, { recursive: true })
writeFileSync(join(TEST_CACHE_DIR, "package.json"), "{ invalid json }")
const { syncCachePackageJsonToIntent } = await import("./sync-package-json")
const pluginInfo: PluginEntryInfo = {
entry: "oh-my-opencode@latest",
isPinned: false,
pinnedVersion: "latest",
configPath: "/tmp/opencode.json",
}
const result = syncCachePackageJsonToIntent(pluginInfo)
expect(result.synced).toBe(false)
expect(result.error).toBe("parse_error")
})
})
describe("#given write permission denied", () => {
it("#then returns write_error", async () => {
cleanupTestCache()
mkdirSync(TEST_CACHE_DIR, { recursive: true })
writeFileSync(
join(TEST_CACHE_DIR, "package.json"),
JSON.stringify({ dependencies: { "oh-my-opencode": "3.10.0" } }, null, 2)
)
const fs = await import("node:fs")
const originalWriteFileSync = fs.writeFileSync
const originalRenameSync = fs.renameSync
mock.module("node:fs", () => ({
...fs,
writeFileSync: mock(() => {
throw new Error("EACCES: permission denied")
}),
renameSync: fs.renameSync,
}))
try {
const { syncCachePackageJsonToIntent } = await import("./sync-package-json")
const pluginInfo: PluginEntryInfo = {
entry: "oh-my-opencode@latest",
isPinned: false,
pinnedVersion: "latest",
configPath: "/tmp/opencode.json",
}
const result = syncCachePackageJsonToIntent(pluginInfo)
expect(result.synced).toBe(false)
expect(result.error).toBe("write_error")
} finally {
mock.module("node:fs", () => ({
...fs,
writeFileSync: originalWriteFileSync,
renameSync: originalRenameSync,
}))
}
})
})
describe("#given rename fails after successful write", () => {
it("#then returns write_error and cleans up temp file", async () => {
cleanupTestCache()
mkdirSync(TEST_CACHE_DIR, { recursive: true })
writeFileSync(
join(TEST_CACHE_DIR, "package.json"),
JSON.stringify({ dependencies: { "oh-my-opencode": "3.10.0" } }, null, 2)
)
const fs = await import("node:fs")
const originalWriteFileSync = fs.writeFileSync
const originalRenameSync = fs.renameSync
let tempFilePath: string | null = null
mock.module("node:fs", () => ({
...fs,
writeFileSync: mock((path: string, data: string) => {
tempFilePath = path
return originalWriteFileSync(path, data)
}),
renameSync: mock(() => {
throw new Error("EXDEV: cross-device link not permitted")
}),
}))
try {
const { syncCachePackageJsonToIntent } = await import("./sync-package-json")
const pluginInfo: PluginEntryInfo = {
entry: "oh-my-opencode@latest",
isPinned: false,
pinnedVersion: "latest",
configPath: "/tmp/opencode.json",
}
const result = syncCachePackageJsonToIntent(pluginInfo)
expect(result.synced).toBe(false)
expect(result.error).toBe("write_error")
expect(tempFilePath).not.toBeNull()
expect(existsSync(tempFilePath!)).toBe(false)
} finally {
mock.module("node:fs", () => ({
...fs,
writeFileSync: originalWriteFileSync,
renameSync: originalRenameSync,
}))
}
})
})
})

View File

@@ -1,3 +1,4 @@
import * as crypto from "node:crypto"
import * as fs from "node:fs"
import * as path from "node:path"
import { CACHE_DIR, PACKAGE_NAME } from "../constants"
@@ -8,6 +9,22 @@ interface CachePackageJson {
dependencies?: Record<string, string>
}
export interface SyncResult {
synced: boolean
error: "file_not_found" | "plugin_not_in_deps" | "parse_error" | "write_error" | null
message?: string
}
const EXACT_SEMVER_REGEX = /^\d+\.\d+\.\d+(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?(\+[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$/
function safeUnlink(filePath: string): void {
try {
fs.unlinkSync(filePath)
} catch (err) {
log(`[auto-update-checker] Failed to cleanup temp file: ${filePath}`, err)
}
}
function getIntentVersion(pluginInfo: PluginEntryInfo): string {
if (!pluginInfo.pinnedVersion) {
return "latest"
@@ -15,49 +32,67 @@ function getIntentVersion(pluginInfo: PluginEntryInfo): string {
return pluginInfo.pinnedVersion
}
/**
* Sync cache package.json to match opencode.json plugin intent before bun install.
*
* OpenCode pins resolved versions in cache package.json (e.g., "3.11.0" instead of "latest").
* When auto-update detects a newer version and runs `bun install`, it re-resolves the pinned
* version instead of the user's declared tag, causing updates to silently fail.
*
* @returns true if package.json was updated, false otherwise
*/
export function syncCachePackageJsonToIntent(pluginInfo: PluginEntryInfo): boolean {
export function syncCachePackageJsonToIntent(pluginInfo: PluginEntryInfo): SyncResult {
const cachePackageJsonPath = path.join(CACHE_DIR, "package.json")
if (!fs.existsSync(cachePackageJsonPath)) {
log("[auto-update-checker] Cache package.json not found, nothing to sync")
return false
return { synced: false, error: "file_not_found", message: "Cache package.json not found" }
}
let content: string
let pkgJson: CachePackageJson
try {
content = fs.readFileSync(cachePackageJsonPath, "utf-8")
} catch (err) {
log("[auto-update-checker] Failed to read cache package.json:", err)
return { synced: false, error: "parse_error", message: "Failed to read cache package.json" }
}
try {
const content = fs.readFileSync(cachePackageJsonPath, "utf-8")
const pkgJson = JSON.parse(content) as CachePackageJson
if (!pkgJson.dependencies?.[PACKAGE_NAME]) {
log("[auto-update-checker] Plugin not in cache package.json dependencies, nothing to sync")
return false
}
const currentVersion = pkgJson.dependencies[PACKAGE_NAME]
const intentVersion = getIntentVersion(pluginInfo)
if (currentVersion === intentVersion) {
log("[auto-update-checker] Cache package.json already matches intent:", intentVersion)
return false
}
log(
`[auto-update-checker] Syncing cache package.json: "${currentVersion}" → "${intentVersion}"`
)
pkgJson.dependencies[PACKAGE_NAME] = intentVersion
fs.writeFileSync(cachePackageJsonPath, JSON.stringify(pkgJson, null, 2))
return true
pkgJson = JSON.parse(content) as CachePackageJson
} catch (err) {
log("[auto-update-checker] Failed to sync cache package.json:", err)
return false
log("[auto-update-checker] Failed to parse cache package.json:", err)
return { synced: false, error: "parse_error", message: "Failed to parse cache package.json (malformed JSON)" }
}
if (!pkgJson || !pkgJson.dependencies?.[PACKAGE_NAME]) {
log("[auto-update-checker] Plugin not in cache package.json dependencies, nothing to sync")
return { synced: false, error: "plugin_not_in_deps", message: "Plugin not in cache package.json dependencies" }
}
const currentVersion = pkgJson.dependencies[PACKAGE_NAME]
const intentVersion = getIntentVersion(pluginInfo)
if (currentVersion === intentVersion) {
log("[auto-update-checker] Cache package.json already matches intent:", intentVersion)
return { synced: false, error: null, message: `Already matches intent: ${intentVersion}` }
}
const intentIsTag = !EXACT_SEMVER_REGEX.test(intentVersion.trim())
const currentIsSemver = EXACT_SEMVER_REGEX.test(String(currentVersion).trim())
if (intentIsTag && currentIsSemver) {
log(
`[auto-update-checker] Syncing cache package.json: "${currentVersion}" → "${intentVersion}" (opencode.json intent)`
)
} else {
log(
`[auto-update-checker] Updating cache package.json: "${currentVersion}" → "${intentVersion}"`
)
}
pkgJson.dependencies[PACKAGE_NAME] = intentVersion
const tmpPath = `${cachePackageJsonPath}.${crypto.randomUUID()}`
try {
fs.writeFileSync(tmpPath, JSON.stringify(pkgJson, null, 2))
fs.renameSync(tmpPath, cachePackageJsonPath)
return { synced: true, error: null, message: `Updated: "${currentVersion}" → "${intentVersion}"` }
} catch (err) {
log("[auto-update-checker] Failed to write cache package.json:", err)
safeUnlink(tmpPath)
return { synced: false, error: "write_error", message: "Failed to write cache package.json" }
}
}

View File

@@ -1,12 +1,6 @@
/// <reference types="bun-types" />
import type { BunInstallResult } from "../../../cli/config-manager"
import type { PluginInput } from "@opencode-ai/plugin"
import { beforeEach, describe, expect, it, mock } from "bun:test"
type PluginInput = {
directory: string
}
type PluginEntry = {
entry: string
isPinned: boolean
@@ -30,14 +24,8 @@ const mockFindPluginEntry = mock((_directory: string): PluginEntry | null => cre
const mockGetCachedVersion = mock((): string | null => "3.4.0")
const mockGetLatestVersion = mock(async (): Promise<string | null> => "3.5.0")
const mockExtractChannel = mock(() => "latest")
const operationOrder: string[] = []
const mockSyncCachePackageJsonToIntent = mock((_pluginEntry: PluginEntry) => {
operationOrder.push("sync")
})
const mockInvalidatePackage = mock((_packageName: string) => {
operationOrder.push("invalidate")
})
const mockRunBunInstallWithDetails = mock(async (): Promise<BunInstallResult> => ({ success: true }))
const mockInvalidatePackage = mock(() => {})
const mockRunBunInstallWithDetails = mock(async () => ({ success: true }))
const mockShowUpdateAvailableToast = mock(
async (_ctx: PluginInput, _latestVersion: string, _getToastMessage: ToastMessageGetter): Promise<void> => {}
)
@@ -45,6 +33,8 @@ const mockShowAutoUpdatedToast = mock(
async (_ctx: PluginInput, _fromVersion: string, _toVersion: string): Promise<void> => {}
)
const mockSyncCachePackageJsonToIntent = mock(() => false)
mock.module("../checker", () => ({
findPluginEntry: mockFindPluginEntry,
getCachedVersion: mockGetCachedVersion,
@@ -64,89 +54,85 @@ mock.module("../../../shared/logger", () => ({ log: () => {} }))
const modulePath = "./background-update-check?test"
const { runBackgroundUpdateCheck } = await import(modulePath)
const mockContext = { directory: "/test" } as PluginInput
const getToastMessage: ToastMessageGetter = (isUpdate, version) =>
isUpdate ? `Update to ${version}` : "Up to date"
async function runCheck(autoUpdate = true): Promise<void> {
await runBackgroundUpdateCheck(mockContext, autoUpdate, getToastMessage)
}
function expectNoUpdateEffects(): void {
expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockSyncCachePackageJsonToIntent).not.toHaveBeenCalled()
expect(mockInvalidatePackage).not.toHaveBeenCalled()
}
describe("runBackgroundUpdateCheck", () => {
let pluginEntry: PluginEntry
const mockCtx = { directory: "/test" } as PluginInput
const getToastMessage: ToastMessageGetter = (isUpdate, version) =>
isUpdate ? `Update to ${version}` : "Up to date"
beforeEach(() => {
mockFindPluginEntry.mockReset()
mockGetCachedVersion.mockReset()
mockGetLatestVersion.mockReset()
mockExtractChannel.mockReset()
mockSyncCachePackageJsonToIntent.mockReset()
mockInvalidatePackage.mockReset()
mockRunBunInstallWithDetails.mockReset()
mockShowUpdateAvailableToast.mockReset()
mockShowAutoUpdatedToast.mockReset()
mockSyncCachePackageJsonToIntent.mockReset()
operationOrder.length = 0
mockSyncCachePackageJsonToIntent.mockImplementation((_pluginEntry: PluginEntry) => {
operationOrder.push("sync")
})
mockInvalidatePackage.mockImplementation((_packageName: string) => {
operationOrder.push("invalidate")
})
pluginEntry = createPluginEntry()
mockFindPluginEntry.mockReturnValue(pluginEntry)
mockFindPluginEntry.mockReturnValue(createPluginEntry())
mockGetCachedVersion.mockReturnValue("3.4.0")
mockGetLatestVersion.mockResolvedValue("3.5.0")
mockExtractChannel.mockReturnValue("latest")
mockRunBunInstallWithDetails.mockResolvedValue({ success: true })
mockSyncCachePackageJsonToIntent.mockReturnValue({ synced: true, error: null })
})
describe("#given no-op scenarios", () => {
it.each([
{
name: "plugin entry is missing",
setup: () => {
mockFindPluginEntry.mockReturnValue(null)
},
},
{
name: "no cached or pinned version exists",
setup: () => {
mockFindPluginEntry.mockReturnValue(createPluginEntry({ entry: "oh-my-opencode" }))
mockGetCachedVersion.mockReturnValue(null)
},
},
{
name: "latest version lookup fails",
setup: () => {
mockGetLatestVersion.mockResolvedValue(null)
},
},
{
name: "current version is already latest",
setup: () => {
mockGetLatestVersion.mockResolvedValue("3.4.0")
},
},
])("returns without user-visible update effects when $name", async ({ setup }) => {
describe("#given no plugin entry found", () => {
it("returns early without showing any toast", async () => {
//#given
setup()
mockFindPluginEntry.mockReturnValue(null)
//#when
await runCheck()
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expectNoUpdateEffects()
expect(mockFindPluginEntry).toHaveBeenCalledTimes(1)
expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
})
})
describe("#given no version available", () => {
it("returns early when neither cached nor pinned version exists", async () => {
//#given
mockFindPluginEntry.mockReturnValue(createPluginEntry({ entry: "oh-my-opencode" }))
mockGetCachedVersion.mockReturnValue(null)
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockGetCachedVersion).toHaveBeenCalledTimes(1)
expect(mockGetLatestVersion).not.toHaveBeenCalled()
expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
})
})
describe("#given latest version fetch fails", () => {
it("returns early without toasts", async () => {
//#given
mockGetLatestVersion.mockResolvedValue(null)
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockGetLatestVersion).toHaveBeenCalledWith("latest")
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
})
})
describe("#given already on latest version", () => {
it("returns early without any action", async () => {
//#given
mockGetCachedVersion.mockReturnValue("3.4.0")
mockGetLatestVersion.mockResolvedValue("3.4.0")
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockGetLatestVersion).toHaveBeenCalledTimes(1)
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
})
})
@@ -155,12 +141,11 @@ describe("runBackgroundUpdateCheck", () => {
//#given
const autoUpdate = false
//#when
await runCheck(autoUpdate)
await runBackgroundUpdateCheck(mockCtx, autoUpdate, getToastMessage)
//#then
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockContext, "3.5.0", getToastMessage)
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
expect(operationOrder).toEqual([])
})
})
@@ -169,7 +154,7 @@ describe("runBackgroundUpdateCheck", () => {
//#given
mockFindPluginEntry.mockReturnValue(createPluginEntry({ isPinned: true, pinnedVersion: "3.4.0" }))
//#when
await runCheck()
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockShowUpdateAvailableToast).toHaveBeenCalledTimes(1)
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
@@ -186,7 +171,7 @@ describe("runBackgroundUpdateCheck", () => {
}
)
//#when
await runCheck()
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockShowUpdateAvailableToast).toHaveBeenCalledTimes(1)
expect(capturedToastMessage).toBeDefined()
@@ -200,35 +185,126 @@ describe("runBackgroundUpdateCheck", () => {
})
describe("#given unpinned with auto-update and install succeeds", () => {
it("invalidates cache, installs, and shows auto-updated toast", async () => {
it("syncs cache, invalidates, installs, and shows auto-updated toast", async () => {
//#given
mockRunBunInstallWithDetails.mockResolvedValue({ success: true })
//#when
await runCheck()
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockSyncCachePackageJsonToIntent).toHaveBeenCalledWith(pluginEntry)
expect(mockSyncCachePackageJsonToIntent).toHaveBeenCalledTimes(1)
expect(mockInvalidatePackage).toHaveBeenCalledTimes(1)
expect(mockRunBunInstallWithDetails).toHaveBeenCalledTimes(1)
expect(mockRunBunInstallWithDetails).toHaveBeenCalledWith({ outputMode: "pipe" })
expect(mockShowAutoUpdatedToast).toHaveBeenCalledWith(mockContext, "3.4.0", "3.5.0")
expect(mockShowAutoUpdatedToast).toHaveBeenCalledWith(mockCtx, "3.4.0", "3.5.0")
expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
expect(operationOrder).toEqual(["sync", "invalidate"])
})
it("syncs before invalidate and install (correct order)", async () => {
//#given
const callOrder: string[] = []
mockSyncCachePackageJsonToIntent.mockImplementation(() => {
callOrder.push("sync")
return { synced: true, error: null }
})
mockInvalidatePackage.mockImplementation(() => {
callOrder.push("invalidate")
})
mockRunBunInstallWithDetails.mockImplementation(async () => {
callOrder.push("install")
return { success: true }
})
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(callOrder).toEqual(["sync", "invalidate", "install"])
})
})
describe("#given unpinned with auto-update and install fails", () => {
it("falls back to notification-only toast", async () => {
//#given
mockRunBunInstallWithDetails.mockResolvedValue({ success: false, error: "install failed" })
mockRunBunInstallWithDetails.mockResolvedValue({ success: false })
//#when
await runCheck()
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockRunBunInstallWithDetails).toHaveBeenCalledTimes(1)
expect(mockRunBunInstallWithDetails).toHaveBeenCalledWith({ outputMode: "pipe" })
expect(mockSyncCachePackageJsonToIntent).toHaveBeenCalledWith(pluginEntry)
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockContext, "3.5.0", getToastMessage)
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
})
})
describe("#given sync fails with file_not_found", () => {
it("aborts update and shows notification-only toast", async () => {
//#given
mockSyncCachePackageJsonToIntent.mockReturnValue({
synced: false,
error: "file_not_found",
message: "Cache package.json not found",
})
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockSyncCachePackageJsonToIntent).toHaveBeenCalledTimes(1)
expect(mockInvalidatePackage).not.toHaveBeenCalled()
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
})
})
describe("#given sync fails with plugin_not_in_deps", () => {
it("aborts update and shows notification-only toast", async () => {
//#given
mockSyncCachePackageJsonToIntent.mockReturnValue({
synced: false,
error: "plugin_not_in_deps",
message: "Plugin not in cache package.json dependencies",
})
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockSyncCachePackageJsonToIntent).toHaveBeenCalledTimes(1)
expect(mockInvalidatePackage).not.toHaveBeenCalled()
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
})
})
describe("#given sync fails with parse_error", () => {
it("aborts update and shows notification-only toast", async () => {
//#given
mockSyncCachePackageJsonToIntent.mockReturnValue({
synced: false,
error: "parse_error",
message: "Failed to parse cache package.json (malformed JSON)",
})
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockSyncCachePackageJsonToIntent).toHaveBeenCalledTimes(1)
expect(mockInvalidatePackage).not.toHaveBeenCalled()
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
})
})
describe("#given sync fails with write_error", () => {
it("aborts update and shows notification-only toast", async () => {
//#given
mockSyncCachePackageJsonToIntent.mockReturnValue({
synced: false,
error: "write_error",
message: "Failed to write cache package.json",
})
//#when
await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
//#then
expect(mockSyncCachePackageJsonToIntent).toHaveBeenCalledTimes(1)
expect(mockInvalidatePackage).not.toHaveBeenCalled()
expect(mockRunBunInstallWithDetails).not.toHaveBeenCalled()
expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
expect(operationOrder).toEqual(["sync", "invalidate"])
})
})
})

View File

@@ -4,7 +4,7 @@ import { log } from "../../../shared/logger"
import { invalidatePackage } from "../cache"
import { PACKAGE_NAME } from "../constants"
import { extractChannel } from "../version-channel"
import { findPluginEntry, getCachedVersion, getLatestVersion, revertPinnedVersion, syncCachePackageJsonToIntent } from "../checker"
import { findPluginEntry, getCachedVersion, getLatestVersion, syncCachePackageJsonToIntent } from "../checker"
import { showAutoUpdatedToast, showUpdateAvailableToast } from "./update-toasts"
function getPinnedVersionToastMessage(latestVersion: string): string {
@@ -15,9 +15,8 @@ async function runBunInstallSafe(): Promise<boolean> {
try {
const result = await runBunInstallWithDetails({ outputMode: "pipe" })
if (!result.success && result.error) {
log("[auto-update-checker] bun install failed:", result.error)
log("[auto-update-checker] bun install error:", result.error)
}
return result.success
} catch (err) {
const errorMessage = err instanceof Error ? err.message : String(err)
@@ -70,7 +69,17 @@ export async function runBackgroundUpdateCheck(
return
}
syncCachePackageJsonToIntent(pluginInfo)
// Sync cache package.json to match opencode.json intent before updating
// This handles the case where user switched from pinned version to tag (e.g., 3.10.0 -> @latest)
const syncResult = syncCachePackageJsonToIntent(pluginInfo)
// Abort on ANY sync error to prevent corrupting a bad state further
if (syncResult.error) {
log(`[auto-update-checker] Sync failed with error: ${syncResult.error}`, syncResult.message)
await showUpdateAvailableToast(ctx, latestVersion, getToastMessage)
return
}
invalidatePackage(PACKAGE_NAME)
const installSuccess = await runBunInstallSafe()
@@ -81,11 +90,6 @@ export async function runBackgroundUpdateCheck(
return
}
if (pluginInfo.isPinned) {
revertPinnedVersion(pluginInfo.configPath, latestVersion, pluginInfo.entry)
log("[auto-update-checker] Config reverted due to install failure")
}
await showUpdateAvailableToast(ctx, latestVersion, getToastMessage)
log("[auto-update-checker] bun install failed; update not installed (falling back to notification-only)")
}

View File

@@ -0,0 +1,24 @@
export const TARGET_SUBAGENT_TYPES = ["explore", "librarian", "oracle", "plan"] as const
export const ENGLISH_DIRECTIVE =
"**YOU MUST ALWAYS THINK, REASON, AND RESPOND IN ENGLISH REGARDLESS OF THE USER'S QUERY LANGUAGE.**"
export function createDelegateTaskEnglishDirectiveHook() {
return {
"tool.execute.before": async (
input: { tool: string; sessionID: string; callID: string; input: Record<string, unknown> },
_output: { title: string; output: string; metadata: unknown }
) => {
if (input.tool.toLowerCase() !== "task") return
const args = input.input
const subagentType = args.subagent_type
if (typeof subagentType !== "string") return
if (!TARGET_SUBAGENT_TYPES.includes(subagentType as (typeof TARGET_SUBAGENT_TYPES)[number])) return
if (typeof args.prompt === "string") {
args.prompt = `${args.prompt}\n\n${ENGLISH_DIRECTIVE}`
}
},
}
}

Some files were not shown because too many files have changed in this diff Show More