Compare commits
38 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a06e656565 | ||
|
|
30ed086c40 | ||
|
|
7c15b06da7 | ||
|
|
0e7ee2ac30 | ||
|
|
ca93d2f0fe | ||
|
|
3ab4529bc7 | ||
|
|
9d3e152b19 | ||
|
|
68c8f3dda7 | ||
|
|
03f6e72c9b | ||
|
|
4fd9f0fd04 | ||
|
|
4413336724 | ||
|
|
895f366a11 | ||
|
|
acc19fcd41 | ||
|
|
68e0a32183 | ||
|
|
dee89c1556 | ||
|
|
315c75c51e | ||
|
|
3dd80889a5 | ||
|
|
8f6ed5b20f | ||
|
|
01500f1ebe | ||
|
|
48f6c5e06d | ||
|
|
3e32afe646 | ||
|
|
d11c4a1f81 | ||
|
|
5558ddf468 | ||
|
|
aa03d9b811 | ||
|
|
28a0dd06c7 | ||
|
|
995b7751af | ||
|
|
5087788f66 | ||
|
|
19524c8a27 | ||
|
|
fbb4d46945 | ||
|
|
5dc8d577a4 | ||
|
|
c249763d7e | ||
|
|
b2d618e851 | ||
|
|
6f348a8a5c | ||
|
|
1da0adcbe8 | ||
|
|
8a9d966a3d | ||
|
|
76f8c500cb | ||
|
|
388516bcc5 | ||
|
|
8dff875929 |
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -14,6 +14,8 @@ body:
|
||||
label: Prerequisites
|
||||
description: Please confirm the following before submitting
|
||||
options:
|
||||
- label: I will write this issue in English (see our [Language Policy](https://github.com/code-yeongyu/oh-my-opencode/blob/dev/CONTRIBUTING.md#language-policy))
|
||||
required: true
|
||||
- label: I have searched existing issues to avoid duplicates
|
||||
required: true
|
||||
- label: I am using the latest version of oh-my-opencode
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
2
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -14,6 +14,8 @@ body:
|
||||
label: Prerequisites
|
||||
description: Please confirm the following before submitting
|
||||
options:
|
||||
- label: I will write this issue in English (see our [Language Policy](https://github.com/code-yeongyu/oh-my-opencode/blob/dev/CONTRIBUTING.md#language-policy))
|
||||
required: true
|
||||
- label: I have searched existing issues and discussions to avoid duplicates
|
||||
required: true
|
||||
- label: This feature request is specific to oh-my-opencode (not OpenCode core)
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/general.yml
vendored
2
.github/ISSUE_TEMPLATE/general.yml
vendored
@@ -14,6 +14,8 @@ body:
|
||||
label: Prerequisites
|
||||
description: Please confirm the following before submitting
|
||||
options:
|
||||
- label: I will write this issue in English (see our [Language Policy](https://github.com/code-yeongyu/oh-my-opencode/blob/dev/CONTRIBUTING.md#language-policy))
|
||||
required: true
|
||||
- label: I have searched existing issues and discussions
|
||||
required: true
|
||||
- label: I have read the [documentation](https://github.com/code-yeongyu/oh-my-opencode#readme)
|
||||
|
||||
30
.github/workflows/ci.yml
vendored
30
.github/workflows/ci.yml
vendored
@@ -44,8 +44,34 @@ jobs:
|
||||
env:
|
||||
BUN_INSTALL_ALLOW_SCRIPTS: "@ast-grep/napi"
|
||||
|
||||
- name: Run tests
|
||||
run: bun test
|
||||
- name: Run mock-heavy tests (isolated)
|
||||
run: |
|
||||
# These files use mock.module() which pollutes module cache
|
||||
# Run them in separate processes to prevent cross-file contamination
|
||||
bun test src/plugin-handlers
|
||||
bun test src/hooks/atlas
|
||||
bun test src/hooks/compaction-context-injector
|
||||
bun test src/features/tmux-subagent
|
||||
|
||||
- name: Run remaining tests
|
||||
run: |
|
||||
# Run all other tests (mock-heavy ones are re-run but that's acceptable)
|
||||
bun test bin script src/cli src/config src/mcp src/index.test.ts \
|
||||
src/agents src/tools src/shared \
|
||||
src/hooks/anthropic-context-window-limit-recovery \
|
||||
src/hooks/claude-code-compatibility \
|
||||
src/hooks/context-injection \
|
||||
src/hooks/provider-toast \
|
||||
src/hooks/session-notification \
|
||||
src/hooks/sisyphus \
|
||||
src/hooks/todo-continuation-enforcer \
|
||||
src/features/background-agent \
|
||||
src/features/builtin-commands \
|
||||
src/features/builtin-skills \
|
||||
src/features/claude-code-session-state \
|
||||
src/features/hook-message-injector \
|
||||
src/features/opencode-skill-loader \
|
||||
src/features/skill-mcp-manager
|
||||
|
||||
typecheck:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
2
.github/workflows/cla.yml
vendored
2
.github/workflows/cla.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
path-to-signatures: 'signatures/cla.json'
|
||||
path-to-document: 'https://github.com/code-yeongyu/oh-my-opencode/blob/master/CLA.md'
|
||||
branch: 'dev'
|
||||
allowlist: bot*,dependabot*,github-actions*,*[bot],sisyphus-dev-ai
|
||||
allowlist: code-yeongyu,bot*,dependabot*,github-actions*,*[bot],sisyphus-dev-ai
|
||||
custom-notsigned-prcomment: |
|
||||
Thank you for your contribution! Before we can merge this PR, we need you to sign our [Contributor License Agreement (CLA)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/CLA.md).
|
||||
|
||||
|
||||
36
.github/workflows/publish.yml
vendored
36
.github/workflows/publish.yml
vendored
@@ -45,16 +45,34 @@ jobs:
|
||||
env:
|
||||
BUN_INSTALL_ALLOW_SCRIPTS: "@ast-grep/napi"
|
||||
|
||||
- name: Run tests
|
||||
- name: Run mock-heavy tests (isolated)
|
||||
run: |
|
||||
# Run tests that use mock.module() in isolated processes first
|
||||
bun test src/plugin-handlers/config-handler.test.ts
|
||||
bun test src/hooks/compaction-context-injector/index.test.ts
|
||||
# Run remaining tests (find all test files, exclude mock-heavy ones, run in single batch)
|
||||
find src -name '*.test.ts' \
|
||||
! -path '**/config-handler.test.ts' \
|
||||
! -path '**/compaction-context-injector/index.test.ts' \
|
||||
| xargs bun test
|
||||
# These files use mock.module() which pollutes module cache
|
||||
# Run them in separate processes to prevent cross-file contamination
|
||||
bun test src/plugin-handlers
|
||||
bun test src/hooks/atlas
|
||||
bun test src/hooks/compaction-context-injector
|
||||
bun test src/features/tmux-subagent
|
||||
|
||||
- name: Run remaining tests
|
||||
run: |
|
||||
# Run all other tests (mock-heavy ones are re-run but that's acceptable)
|
||||
bun test bin script src/cli src/config src/mcp src/index.test.ts \
|
||||
src/agents src/tools src/shared \
|
||||
src/hooks/anthropic-context-window-limit-recovery \
|
||||
src/hooks/claude-code-compatibility \
|
||||
src/hooks/context-injection \
|
||||
src/hooks/provider-toast \
|
||||
src/hooks/session-notification \
|
||||
src/hooks/sisyphus \
|
||||
src/hooks/todo-continuation-enforcer \
|
||||
src/features/background-agent \
|
||||
src/features/builtin-commands \
|
||||
src/features/builtin-skills \
|
||||
src/features/claude-code-session-state \
|
||||
src/features/hook-message-injector \
|
||||
src/features/opencode-skill-loader \
|
||||
src/features/skill-mcp-manager
|
||||
|
||||
typecheck:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -2768,7 +2768,8 @@
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"playwright",
|
||||
"agent-browser"
|
||||
"agent-browser",
|
||||
"dev-browser"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,6 +85,66 @@ When both `oh-my-opencode.jsonc` and `oh-my-opencode.json` files exist, `.jsonc`
|
||||
|
||||
**Recommended**: For Google Gemini authentication, install the [`opencode-antigravity-auth`](https://github.com/NoeFabris/opencode-antigravity-auth) plugin (`@latest`). It provides multi-account load balancing, variant-based thinking levels, dual quota system (Antigravity + Gemini CLI), and active maintenance. See [Installation > Google Gemini](docs/guide/installation.md#google-gemini-antigravity-oauth).
|
||||
|
||||
## Ollama Provider
|
||||
|
||||
**IMPORTANT**: When using Ollama as a provider, you **must** disable streaming to avoid JSON parsing errors.
|
||||
|
||||
### Required Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"agents": {
|
||||
"explore": {
|
||||
"model": "ollama/qwen3-coder",
|
||||
"stream": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Why `stream: false` is Required
|
||||
|
||||
Ollama returns NDJSON (newline-delimited JSON) when streaming is enabled, but Claude Code SDK expects a single JSON object. This causes `JSON Parse error: Unexpected EOF` when agents attempt tool calls.
|
||||
|
||||
**Example of the problem**:
|
||||
```json
|
||||
// Ollama streaming response (NDJSON - multiple lines)
|
||||
{"message":{"tool_calls":[...]}, "done":false}
|
||||
{"message":{"content":""}, "done":true}
|
||||
|
||||
// Claude Code SDK expects (single JSON object)
|
||||
{"message":{"tool_calls":[...], "content":""}, "done":true}
|
||||
```
|
||||
|
||||
### Supported Models
|
||||
|
||||
Common Ollama models that work with oh-my-opencode:
|
||||
|
||||
| Model | Best For | Configuration |
|
||||
|-------|----------|---------------|
|
||||
| `ollama/qwen3-coder` | Code generation, build fixes | `{"model": "ollama/qwen3-coder", "stream": false}` |
|
||||
| `ollama/ministral-3:14b` | Exploration, codebase search | `{"model": "ollama/ministral-3:14b", "stream": false}` |
|
||||
| `ollama/lfm2.5-thinking` | Documentation, writing | `{"model": "ollama/lfm2.5-thinking", "stream": false}` |
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
If you encounter `JSON Parse error: Unexpected EOF`:
|
||||
|
||||
1. **Verify `stream: false` is set** in your agent configuration
|
||||
2. **Check Ollama is running**: `curl http://localhost:11434/api/tags`
|
||||
3. **Test with curl**:
|
||||
```bash
|
||||
curl -s http://localhost:11434/api/chat \
|
||||
-d '{"model": "qwen3-coder", "messages": [{"role": "user", "content": "Hello"}], "stream": false}'
|
||||
```
|
||||
4. **See detailed troubleshooting**: [docs/troubleshooting/ollama-streaming-issue.md](troubleshooting/ollama-streaming-issue.md)
|
||||
|
||||
### Future SDK Fix
|
||||
|
||||
The proper long-term fix requires Claude Code SDK to parse NDJSON responses correctly. Until then, use `stream: false` as a workaround.
|
||||
|
||||
**Tracking**: https://github.com/code-yeongyu/oh-my-opencode/issues/1124
|
||||
|
||||
## Agents
|
||||
|
||||
Override built-in agent settings:
|
||||
@@ -768,6 +828,8 @@ Disable specific built-in hooks via `disabled_hooks` in `~/.config/opencode/oh-m
|
||||
|
||||
Available hooks: `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`
|
||||
|
||||
**Note on `directory-agents-injector`**: This hook is **automatically disabled** when running on OpenCode 1.1.37+ because OpenCode now has native support for dynamically resolving AGENTS.md files from subdirectories (PR #10678). This prevents duplicate AGENTS.md injection. For older OpenCode versions, the hook remains active to provide the same functionality.
|
||||
|
||||
**Note on `auto-update-checker` and `startup-toast`**: The `startup-toast` hook is a sub-feature of `auto-update-checker`. To disable only the startup toast notification while keeping update checking enabled, add `"startup-toast"` to `disabled_hooks`. To disable all update checking features (including the toast), add `"auto-update-checker"` to `disabled_hooks`.
|
||||
|
||||
## MCPs
|
||||
|
||||
@@ -320,7 +320,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
|
||||
|
||||
| Hook | Event | Description |
|
||||
|------|-------|-------------|
|
||||
| **directory-agents-injector** | PostToolUse | Auto-injects AGENTS.md when reading files. Walks from file to project root, collecting all AGENTS.md files. |
|
||||
| **directory-agents-injector** | PostToolUse | Auto-injects AGENTS.md when reading files. Walks from file to project root, collecting all AGENTS.md files. **Deprecated for OpenCode 1.1.37+** - Auto-disabled when native AGENTS.md injection is available. |
|
||||
| **directory-readme-injector** | PostToolUse | Auto-injects README.md for directory context. |
|
||||
| **rules-injector** | PostToolUse | Injects rules from `.claude/rules/` when conditions match. Supports globs and alwaysApply. |
|
||||
| **compaction-context-injector** | Stop | Preserves critical context during session compaction. |
|
||||
|
||||
126
docs/troubleshooting/ollama-streaming-issue.md
Normal file
126
docs/troubleshooting/ollama-streaming-issue.md
Normal file
@@ -0,0 +1,126 @@
|
||||
# Ollama Streaming Issue - JSON Parse Error
|
||||
|
||||
## Problem
|
||||
|
||||
When using Ollama as a provider with oh-my-opencode agents, you may encounter:
|
||||
|
||||
```
|
||||
JSON Parse error: Unexpected EOF
|
||||
```
|
||||
|
||||
This occurs when agents attempt tool calls (e.g., `explore` agent using `mcp_grep_search`).
|
||||
|
||||
## Root Cause
|
||||
|
||||
Ollama returns **NDJSON** (newline-delimited JSON) when `stream: true` is used in API requests:
|
||||
|
||||
```json
|
||||
{"message":{"tool_calls":[{"function":{"name":"read","arguments":{"filePath":"README.md"}}}]}, "done":false}
|
||||
{"message":{"content":""}, "done":true}
|
||||
```
|
||||
|
||||
Claude Code SDK expects a single JSON object, not multiple NDJSON lines, causing the parse error.
|
||||
|
||||
### Why This Happens
|
||||
|
||||
- **Ollama API**: Returns streaming responses as NDJSON by design
|
||||
- **Claude Code SDK**: Doesn't properly handle NDJSON responses for tool calls
|
||||
- **oh-my-opencode**: Passes through the SDK's behavior (can't fix at this layer)
|
||||
|
||||
## Solutions
|
||||
|
||||
### Option 1: Disable Streaming (Recommended - Immediate Fix)
|
||||
|
||||
Configure your Ollama provider to use `stream: false`:
|
||||
|
||||
```json
|
||||
{
|
||||
"provider": "ollama",
|
||||
"model": "qwen3-coder",
|
||||
"stream": false
|
||||
}
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Works immediately
|
||||
- No code changes needed
|
||||
- Simple configuration
|
||||
|
||||
**Cons:**
|
||||
- Slightly slower response time (no streaming)
|
||||
- Less interactive feedback
|
||||
|
||||
### Option 2: Use Non-Tool Agents Only
|
||||
|
||||
If you need streaming, avoid agents that use tools:
|
||||
|
||||
- ✅ **Safe**: Simple text generation, non-tool tasks
|
||||
- ❌ **Problematic**: Any agent with tool calls (explore, librarian, etc.)
|
||||
|
||||
### Option 3: Wait for SDK Fix (Long-term)
|
||||
|
||||
The proper fix requires Claude Code SDK to:
|
||||
|
||||
1. Detect NDJSON responses
|
||||
2. Parse each line separately
|
||||
3. Merge `tool_calls` from multiple lines
|
||||
4. Return a single merged response
|
||||
|
||||
**Tracking**: https://github.com/code-yeongyu/oh-my-opencode/issues/1124
|
||||
|
||||
## Workaround Implementation
|
||||
|
||||
Until the SDK is fixed, here's how to implement NDJSON parsing (for SDK maintainers):
|
||||
|
||||
```typescript
|
||||
async function parseOllamaStreamResponse(response: string): Promise<object> {
|
||||
const lines = response.split('\n').filter(line => line.trim());
|
||||
const mergedMessage = { tool_calls: [] };
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
if (json.message?.tool_calls) {
|
||||
mergedMessage.tool_calls.push(...json.message.tool_calls);
|
||||
}
|
||||
if (json.message?.content) {
|
||||
mergedMessage.content = json.message.content;
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip malformed lines
|
||||
console.warn('Skipping malformed NDJSON line:', line);
|
||||
}
|
||||
}
|
||||
|
||||
return mergedMessage;
|
||||
}
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
To verify the fix works:
|
||||
|
||||
```bash
|
||||
# Test with curl (should work with stream: false)
|
||||
curl -s http://localhost:11434/api/chat \
|
||||
-d '{
|
||||
"model": "qwen3-coder",
|
||||
"messages": [{"role": "user", "content": "Read file README.md"}],
|
||||
"stream": false,
|
||||
"tools": [{"type": "function", "function": {"name": "read", "description": "Read a file", "parameters": {"type": "object", "properties": {"filePath": {"type": "string"}}, "required": ["filePath"]}}}]
|
||||
}'
|
||||
```
|
||||
|
||||
## Related Issues
|
||||
|
||||
- **oh-my-opencode**: https://github.com/code-yeongyu/oh-my-opencode/issues/1124
|
||||
- **Ollama API Docs**: https://github.com/ollama/ollama/blob/main/docs/api.md
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you encounter this issue:
|
||||
|
||||
1. Check your Ollama provider configuration
|
||||
2. Set `stream: false` as a workaround
|
||||
3. Report any additional errors to the issue tracker
|
||||
4. Provide your configuration (without secrets) for debugging
|
||||
16
package.json
16
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -73,13 +73,13 @@
|
||||
"typescript": "^5.7.3"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.1.3",
|
||||
"oh-my-opencode-darwin-x64": "3.1.3",
|
||||
"oh-my-opencode-linux-arm64": "3.1.3",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.1.3",
|
||||
"oh-my-opencode-linux-x64": "3.1.3",
|
||||
"oh-my-opencode-linux-x64-musl": "3.1.3",
|
||||
"oh-my-opencode-windows-x64": "3.1.3"
|
||||
"oh-my-opencode-darwin-arm64": "3.1.6",
|
||||
"oh-my-opencode-darwin-x64": "3.1.6",
|
||||
"oh-my-opencode-linux-arm64": "3.1.6",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.1.6",
|
||||
"oh-my-opencode-linux-x64": "3.1.6",
|
||||
"oh-my-opencode-linux-x64-musl": "3.1.6",
|
||||
"oh-my-opencode-windows-x64": "3.1.6"
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"@ast-grep/cli",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-arm64",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-x64",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64-musl",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-musl",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-windows-x64",
|
||||
"version": "3.1.3",
|
||||
"version": "3.1.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (windows-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -911,6 +911,46 @@
|
||||
"created_at": "2026-01-27T12:36:21Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1179
|
||||
},
|
||||
{
|
||||
"name": "zycaskevin",
|
||||
"id": 223135116,
|
||||
"comment_id": 3806137669,
|
||||
"created_at": "2026-01-27T16:20:38Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1184
|
||||
},
|
||||
{
|
||||
"name": "agno01",
|
||||
"id": 4479380,
|
||||
"comment_id": 3808373433,
|
||||
"created_at": "2026-01-28T01:02:02Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1188
|
||||
},
|
||||
{
|
||||
"name": "rooftop-Owl",
|
||||
"id": 254422872,
|
||||
"comment_id": 3809867225,
|
||||
"created_at": "2026-01-28T08:46:58Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1197
|
||||
},
|
||||
{
|
||||
"name": "youming-ai",
|
||||
"id": 173424537,
|
||||
"comment_id": 3811195276,
|
||||
"created_at": "2026-01-28T13:04:16Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1203
|
||||
},
|
||||
{
|
||||
"name": "KennyDizi",
|
||||
"id": 16578966,
|
||||
"comment_id": 3811619818,
|
||||
"created_at": "2026-01-28T14:26:10Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1214
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -230,6 +230,8 @@ call_omo_agent(subagent_type="librarian", prompt="Find OSS implementations of Z.
|
||||
- [Risk 2]: [Mitigation]
|
||||
|
||||
## Directives for Prometheus
|
||||
|
||||
### Core Directives
|
||||
- MUST: [Required action]
|
||||
- MUST: [Required action]
|
||||
- MUST NOT: [Forbidden action]
|
||||
@@ -237,6 +239,29 @@ call_omo_agent(subagent_type="librarian", prompt="Find OSS implementations of Z.
|
||||
- PATTERN: Follow \`[file:lines]\`
|
||||
- TOOL: Use \`[specific tool]\` for [purpose]
|
||||
|
||||
### QA/Acceptance Criteria Directives (MANDATORY)
|
||||
> **ZERO USER INTERVENTION PRINCIPLE**: All acceptance criteria MUST be executable by agents.
|
||||
|
||||
- MUST: Write acceptance criteria as executable commands (curl, bun test, playwright actions)
|
||||
- MUST: Include exact expected outputs, not vague descriptions
|
||||
- MUST: Specify verification tool for each deliverable type (playwright for UI, curl for API, etc.)
|
||||
- MUST NOT: Create criteria requiring "user manually tests..."
|
||||
- MUST NOT: Create criteria requiring "user visually confirms..."
|
||||
- MUST NOT: Create criteria requiring "user clicks/interacts..."
|
||||
- MUST NOT: Use placeholders without concrete examples (bad: "[endpoint]", good: "/api/users")
|
||||
|
||||
Example of GOOD acceptance criteria:
|
||||
\`\`\`
|
||||
curl -s http://localhost:3000/api/health | jq '.status'
|
||||
# Assert: Output is "ok"
|
||||
\`\`\`
|
||||
|
||||
Example of BAD acceptance criteria (FORBIDDEN):
|
||||
\`\`\`
|
||||
User opens browser and checks if the page loads correctly.
|
||||
User confirms the button works as expected.
|
||||
\`\`\`
|
||||
|
||||
## Recommended Approach
|
||||
[1-2 sentence summary of how to proceed]
|
||||
\`\`\`
|
||||
@@ -263,12 +288,16 @@ call_omo_agent(subagent_type="librarian", prompt="Find OSS implementations of Z.
|
||||
- Ask generic questions ("What's the scope?")
|
||||
- Proceed without addressing ambiguity
|
||||
- Make assumptions about user's codebase
|
||||
- Suggest acceptance criteria requiring user intervention ("user manually tests", "user confirms", "user clicks")
|
||||
- Leave QA/acceptance criteria vague or placeholder-heavy
|
||||
|
||||
**ALWAYS**:
|
||||
- Classify intent FIRST
|
||||
- Be specific ("Should this change UserService only, or also AuthService?")
|
||||
- Explore before asking (for Build/Research intents)
|
||||
- Provide actionable directives for Prometheus
|
||||
- Include QA automation directives in every output
|
||||
- Ensure acceptance criteria are agent-executable (commands, not human actions)
|
||||
`
|
||||
|
||||
const metisRestrictions = createAgentToolRestrictions([
|
||||
|
||||
@@ -953,27 +953,37 @@ Each TODO follows RED-GREEN-REFACTOR:
|
||||
- Example: Create \`src/__tests__/example.test.ts\`
|
||||
- Verify: \`bun test\` → 1 test passes
|
||||
|
||||
### If Manual QA Only
|
||||
### If Automated Verification Only (NO User Intervention)
|
||||
|
||||
**CRITICAL**: Without automated tests, manual verification MUST be exhaustive.
|
||||
> **CRITICAL PRINCIPLE: ZERO USER INTERVENTION**
|
||||
>
|
||||
> **NEVER** create acceptance criteria that require:
|
||||
> - "User manually tests..." / "사용자가 직접 테스트..."
|
||||
> - "User visually confirms..." / "사용자가 눈으로 확인..."
|
||||
> - "User interacts with..." / "사용자가 직접 조작..."
|
||||
> - "Ask user to verify..." / "사용자에게 확인 요청..."
|
||||
> - ANY step that requires a human to perform an action
|
||||
>
|
||||
> **ALL verification MUST be automated and executable by the agent.**
|
||||
> If a verification cannot be automated, find an automated alternative or explicitly note it as a known limitation.
|
||||
|
||||
Each TODO includes detailed verification procedures:
|
||||
Each TODO includes EXECUTABLE verification procedures that agents can run directly:
|
||||
|
||||
**By Deliverable Type:**
|
||||
|
||||
| Type | Verification Tool | Procedure |
|
||||
|------|------------------|-----------|
|
||||
| **Frontend/UI** | Playwright browser | Navigate, interact, screenshot |
|
||||
| **TUI/CLI** | interactive_bash (tmux) | Run command, verify output |
|
||||
| **API/Backend** | curl / httpie | Send request, verify response |
|
||||
| **Library/Module** | Node/Python REPL | Import, call, verify |
|
||||
| **Config/Infra** | Shell commands | Apply, verify state |
|
||||
| Type | Verification Tool | Automated Procedure |
|
||||
|------|------------------|---------------------|
|
||||
| **Frontend/UI** | Playwright browser via playwright skill | Agent navigates, clicks, screenshots, asserts DOM state |
|
||||
| **TUI/CLI** | interactive_bash (tmux) | Agent runs command, captures output, validates expected strings |
|
||||
| **API/Backend** | curl / httpie via Bash | Agent sends request, parses response, validates JSON fields |
|
||||
| **Library/Module** | Node/Python REPL via Bash | Agent imports, calls function, compares output |
|
||||
| **Config/Infra** | Shell commands via Bash | Agent applies config, runs state check, validates output |
|
||||
|
||||
**Evidence Required:**
|
||||
- Commands run with actual output
|
||||
- Screenshots for visual changes
|
||||
- Response bodies for API changes
|
||||
- Terminal output for CLI changes
|
||||
**Evidence Requirements (Agent-Executable):**
|
||||
- Command output captured and compared against expected patterns
|
||||
- Screenshots saved to .sisyphus/evidence/ for visual verification
|
||||
- JSON response fields validated with specific assertions
|
||||
- Exit codes checked (0 = success)
|
||||
|
||||
---
|
||||
|
||||
@@ -1083,53 +1093,76 @@ Parallel Speedup: ~40% faster than sequential
|
||||
|
||||
**Acceptance Criteria**:
|
||||
|
||||
> CRITICAL: Acceptance = EXECUTION, not just "it should work".
|
||||
> The executor MUST run these commands and verify output.
|
||||
> **CRITICAL: AGENT-EXECUTABLE VERIFICATION ONLY**
|
||||
>
|
||||
> - Acceptance = EXECUTION by the agent, not "user checks if it works"
|
||||
> - Every criterion MUST be verifiable by running a command or using a tool
|
||||
> - NO steps like "user opens browser", "user clicks", "user confirms"
|
||||
> - If you write "[placeholder]" - REPLACE IT with actual values based on task context
|
||||
|
||||
**If TDD (tests enabled):**
|
||||
- [ ] Test file created: \`[path].test.ts\`
|
||||
- [ ] Test covers: [specific scenario]
|
||||
- [ ] \`bun test [file]\` → PASS (N tests, 0 failures)
|
||||
- [ ] Test file created: src/auth/login.test.ts
|
||||
- [ ] Test covers: successful login returns JWT token
|
||||
- [ ] bun test src/auth/login.test.ts → PASS (3 tests, 0 failures)
|
||||
|
||||
**Manual Execution Verification (ALWAYS include, even with tests):**
|
||||
**Automated Verification (ALWAYS include, choose by deliverable type):**
|
||||
|
||||
*Choose based on deliverable type:*
|
||||
**For Frontend/UI changes** (using playwright skill):
|
||||
\\\`\\\`\\\`
|
||||
# Agent executes via playwright browser automation:
|
||||
1. Navigate to: http://localhost:3000/login
|
||||
2. Fill: input[name="email"] with "test@example.com"
|
||||
3. Fill: input[name="password"] with "password123"
|
||||
4. Click: button[type="submit"]
|
||||
5. Wait for: selector ".dashboard-welcome" to be visible
|
||||
6. Assert: text "Welcome back" appears on page
|
||||
7. Screenshot: .sisyphus/evidence/task-1-login-success.png
|
||||
\\\`\\\`\\\`
|
||||
|
||||
**For Frontend/UI changes:**
|
||||
- [ ] Using playwright browser automation:
|
||||
- Navigate to: \`http://localhost:[port]/[path]\`
|
||||
- Action: [click X, fill Y, scroll to Z]
|
||||
- Verify: [visual element appears, animation completes, state changes]
|
||||
- Screenshot: Save evidence to \`.sisyphus/evidence/[task-id]-[step].png\`
|
||||
**For TUI/CLI changes** (using interactive_bash):
|
||||
\\\`\\\`\\\`
|
||||
# Agent executes via tmux session:
|
||||
1. Command: ./my-cli --config test.yaml
|
||||
2. Wait for: "Configuration loaded" in output
|
||||
3. Send keys: "q" to quit
|
||||
4. Assert: Exit code 0
|
||||
5. Assert: Output contains "Goodbye"
|
||||
\\\`\\\`\\\`
|
||||
|
||||
**For TUI/CLI changes:**
|
||||
- [ ] Using interactive_bash (tmux session):
|
||||
- Command: \`[exact command to run]\`
|
||||
- Input sequence: [if interactive, list inputs]
|
||||
- Expected output contains: \`[expected string or pattern]\`
|
||||
- Exit code: [0 for success, specific code if relevant]
|
||||
**For API/Backend changes** (using Bash curl):
|
||||
\\\`\\\`\\\`bash
|
||||
# Agent runs:
|
||||
curl -s -X POST http://localhost:8080/api/users \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"email":"new@test.com","name":"Test User"}' \\
|
||||
| jq '.id'
|
||||
# Assert: Returns non-empty UUID
|
||||
# Assert: HTTP status 201
|
||||
\\\`\\\`\\\`
|
||||
|
||||
**For API/Backend changes:**
|
||||
- [ ] Request: \`curl -X [METHOD] http://localhost:[port]/[endpoint] -H "Content-Type: application/json" -d '[body]'\`
|
||||
- [ ] Response status: [200/201/etc]
|
||||
- [ ] Response body contains: \`{"key": "expected_value"}\`
|
||||
**For Library/Module changes** (using Bash node/bun):
|
||||
\\\`\\\`\\\`bash
|
||||
# Agent runs:
|
||||
bun -e "import { validateEmail } from './src/utils/validate'; console.log(validateEmail('test@example.com'))"
|
||||
# Assert: Output is "true"
|
||||
|
||||
bun -e "import { validateEmail } from './src/utils/validate'; console.log(validateEmail('invalid'))"
|
||||
# Assert: Output is "false"
|
||||
\\\`\\\`\\\`
|
||||
|
||||
**For Library/Module changes:**
|
||||
- [ ] REPL verification:
|
||||
\`\`\`
|
||||
> import { [function] } from '[module]'
|
||||
> [function]([args])
|
||||
Expected: [output]
|
||||
\`\`\`
|
||||
**For Config/Infra changes** (using Bash):
|
||||
\\\`\\\`\\\`bash
|
||||
# Agent runs:
|
||||
docker compose up -d
|
||||
# Wait 5s for containers
|
||||
docker compose ps --format json | jq '.[].State'
|
||||
# Assert: All states are "running"
|
||||
\\\`\\\`\\\`
|
||||
|
||||
**For Config/Infra changes:**
|
||||
- [ ] Apply: \`[command to apply config]\`
|
||||
- [ ] Verify state: \`[command to check state]\` → \`[expected output]\`
|
||||
|
||||
**Evidence Required:**
|
||||
- [ ] Command output captured (copy-paste actual terminal output)
|
||||
- [ ] Screenshot saved (for visual changes)
|
||||
- [ ] Response body logged (for API changes)
|
||||
**Evidence to Capture:**
|
||||
- [ ] Terminal output from verification commands (actual output, not expected)
|
||||
- [ ] Screenshot files in .sisyphus/evidence/ for UI changes
|
||||
- [ ] JSON response bodies for API changes
|
||||
|
||||
**Commit**: YES | NO (groups with N)
|
||||
- Message: \`type(scope): desc\`
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { describe, test, expect, beforeEach } from "bun:test"
|
||||
import { describe, test, expect, beforeEach, spyOn, afterEach } from "bun:test"
|
||||
import { createBuiltinAgents } from "./utils"
|
||||
import type { AgentConfig } from "@opencode-ai/sdk"
|
||||
import { clearSkillCache } from "../features/opencode-skill-loader/skill-content"
|
||||
import * as connectedProvidersCache from "../shared/connected-providers-cache"
|
||||
|
||||
const TEST_DEFAULT_MODEL = "anthropic/claude-opus-4-5"
|
||||
|
||||
@@ -46,17 +47,31 @@ describe("createBuiltinAgents with model overrides", () => {
|
||||
expect(agents.sisyphus.reasoningEffort).toBeUndefined()
|
||||
})
|
||||
|
||||
test("Oracle uses first fallback entry when no availableModels provided (no cache scenario)", async () => {
|
||||
// #given - no available models simulates CI without model cache
|
||||
test("Oracle falls back to system default when availableModels is empty (even with connected cache)", async () => {
|
||||
// #given
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"])
|
||||
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL)
|
||||
|
||||
// #then - uses first fallback entry (openai/gpt-5.2) instead of system default
|
||||
expect(agents.oracle.model).toBe("openai/gpt-5.2")
|
||||
expect(agents.oracle.reasoningEffort).toBe("medium")
|
||||
expect(agents.oracle.textVerbosity).toBe("high")
|
||||
expect(agents.oracle.thinking).toBeUndefined()
|
||||
// #then
|
||||
expect(agents.oracle.model).toBe(TEST_DEFAULT_MODEL)
|
||||
expect(agents.oracle.thinking).toEqual({ type: "enabled", budgetTokens: 32000 })
|
||||
expect(agents.oracle.reasoningEffort).toBeUndefined()
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("Oracle created without model field when no cache exists (first run scenario)", async () => {
|
||||
// #given - no cache at all (first run)
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
|
||||
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL)
|
||||
|
||||
// #then - oracle should be created with system default model (fallback to systemDefaultModel)
|
||||
expect(agents.oracle).toBeDefined()
|
||||
expect(agents.oracle.model).toBe(TEST_DEFAULT_MODEL)
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("Oracle with GPT model override has reasoningEffort, no thinking", async () => {
|
||||
@@ -107,26 +122,40 @@ describe("createBuiltinAgents with model overrides", () => {
|
||||
})
|
||||
|
||||
describe("createBuiltinAgents without systemDefaultModel", () => {
|
||||
test("creates agents successfully without systemDefaultModel", async () => {
|
||||
// #given - no systemDefaultModel provided
|
||||
test("agents NOT created when availableModels empty and no systemDefaultModel", async () => {
|
||||
// #given
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"])
|
||||
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], {}, undefined, undefined)
|
||||
|
||||
// #then - agents should still be created using fallback chain
|
||||
expect(agents.oracle).toBeDefined()
|
||||
expect(agents.oracle.model).toBe("openai/gpt-5.2")
|
||||
// #then
|
||||
expect(agents.oracle).toBeUndefined()
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("sisyphus uses fallback chain when systemDefaultModel undefined", async () => {
|
||||
// #given - no systemDefaultModel
|
||||
test("agents NOT created when no cache and no systemDefaultModel (first run without defaults)", async () => {
|
||||
// #given
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
|
||||
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], {}, undefined, undefined)
|
||||
|
||||
// #then - sisyphus should use its fallback chain
|
||||
expect(agents.sisyphus).toBeDefined()
|
||||
expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
|
||||
// #then
|
||||
expect(agents.oracle).toBeUndefined()
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("sisyphus NOT created when availableModels empty and no systemDefaultModel", async () => {
|
||||
// #given
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["anthropic"])
|
||||
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], {}, undefined, undefined)
|
||||
|
||||
// #then
|
||||
expect(agents.sisyphus).toBeUndefined()
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -313,13 +313,14 @@ export const GitMasterConfigSchema = z.object({
|
||||
include_co_authored_by: z.boolean().default(true),
|
||||
})
|
||||
|
||||
export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser"])
|
||||
export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser", "dev-browser"])
|
||||
|
||||
export const BrowserAutomationConfigSchema = z.object({
|
||||
/**
|
||||
* Browser automation provider to use for the "playwright" skill.
|
||||
* - "playwright": Uses Playwright MCP server (@playwright/mcp) - default
|
||||
* - "agent-browser": Uses Vercel's agent-browser CLI (requires: bun add -g agent-browser)
|
||||
* - "dev-browser": Uses dev-browser skill with persistent browser state
|
||||
*/
|
||||
provider: BrowserAutomationProviderSchema.default("playwright"),
|
||||
})
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { describe, test, expect, beforeEach } from "bun:test"
|
||||
import { describe, test, expect, beforeEach, afterEach } from "bun:test"
|
||||
import {
|
||||
setSessionAgent,
|
||||
getSessionAgent,
|
||||
@@ -13,9 +13,11 @@ describe("claude-code-session-state", () => {
|
||||
beforeEach(() => {
|
||||
// #given - clean state before each test
|
||||
_resetForTesting()
|
||||
clearSessionAgent("test-session-1")
|
||||
clearSessionAgent("test-session-2")
|
||||
clearSessionAgent("test-prometheus-session")
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
// #then - cleanup after each test to prevent pollution
|
||||
_resetForTesting()
|
||||
})
|
||||
|
||||
describe("setSessionAgent", () => {
|
||||
@@ -92,9 +94,9 @@ describe("claude-code-session-state", () => {
|
||||
expect(getMainSessionID()).toBe(mainID)
|
||||
})
|
||||
|
||||
test.skip("should return undefined when not set", () => {
|
||||
// #given - not set
|
||||
// TODO: Fix flaky test - parallel test execution causes state pollution
|
||||
test("should return undefined when not set", () => {
|
||||
// #given - explicit reset to ensure clean state (parallel test isolation)
|
||||
_resetForTesting()
|
||||
// #then
|
||||
expect(getMainSessionID()).toBeUndefined()
|
||||
})
|
||||
|
||||
@@ -14,6 +14,7 @@ export function getMainSessionID(): string | undefined {
|
||||
export function _resetForTesting(): void {
|
||||
_mainSessionID = undefined
|
||||
subagentSessions.clear()
|
||||
sessionAgentMap.clear()
|
||||
}
|
||||
|
||||
const sessionAgentMap = new Map<string, string>()
|
||||
|
||||
@@ -128,8 +128,15 @@ $ARGUMENTS
|
||||
}
|
||||
}
|
||||
|
||||
function parseAllowedTools(allowedTools: string | undefined): string[] | undefined {
|
||||
function parseAllowedTools(allowedTools: string | string[] | undefined): string[] | undefined {
|
||||
if (!allowedTools) return undefined
|
||||
|
||||
// Handle YAML array format: already parsed as string[]
|
||||
if (Array.isArray(allowedTools)) {
|
||||
return allowedTools.map(t => t.trim()).filter(Boolean)
|
||||
}
|
||||
|
||||
// Handle space-separated string format: "Read Write Edit Bash"
|
||||
return allowedTools.split(/\s+/).filter(Boolean)
|
||||
}
|
||||
|
||||
|
||||
@@ -268,6 +268,123 @@ Skill body.
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe("allowed-tools parsing", () => {
|
||||
it("parses space-separated allowed-tools string", async () => {
|
||||
// #given
|
||||
const skillContent = `---
|
||||
name: space-separated-tools
|
||||
description: Skill with space-separated allowed-tools
|
||||
allowed-tools: Read Write Edit Bash
|
||||
---
|
||||
Skill body.
|
||||
`
|
||||
createTestSkill("space-separated-tools", skillContent)
|
||||
|
||||
// #when
|
||||
const { discoverSkills } = await import("./loader")
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
|
||||
try {
|
||||
const skills = await discoverSkills({ includeClaudeCodePaths: false })
|
||||
const skill = skills.find(s => s.name === "space-separated-tools")
|
||||
|
||||
// #then
|
||||
expect(skill).toBeDefined()
|
||||
expect(skill?.allowedTools).toEqual(["Read", "Write", "Edit", "Bash"])
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
|
||||
it("parses YAML inline array allowed-tools", async () => {
|
||||
// #given
|
||||
const skillContent = `---
|
||||
name: yaml-inline-array
|
||||
description: Skill with YAML inline array allowed-tools
|
||||
allowed-tools: [Read, Write, Edit, Bash]
|
||||
---
|
||||
Skill body.
|
||||
`
|
||||
createTestSkill("yaml-inline-array", skillContent)
|
||||
|
||||
// #when
|
||||
const { discoverSkills } = await import("./loader")
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
|
||||
try {
|
||||
const skills = await discoverSkills({ includeClaudeCodePaths: false })
|
||||
const skill = skills.find(s => s.name === "yaml-inline-array")
|
||||
|
||||
// #then
|
||||
expect(skill).toBeDefined()
|
||||
expect(skill?.allowedTools).toEqual(["Read", "Write", "Edit", "Bash"])
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
|
||||
it("parses YAML multi-line array allowed-tools", async () => {
|
||||
// #given
|
||||
const skillContent = `---
|
||||
name: yaml-multiline-array
|
||||
description: Skill with YAML multi-line array allowed-tools
|
||||
allowed-tools:
|
||||
- Read
|
||||
- Write
|
||||
- Edit
|
||||
- Bash
|
||||
---
|
||||
Skill body.
|
||||
`
|
||||
createTestSkill("yaml-multiline-array", skillContent)
|
||||
|
||||
// #when
|
||||
const { discoverSkills } = await import("./loader")
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
|
||||
try {
|
||||
const skills = await discoverSkills({ includeClaudeCodePaths: false })
|
||||
const skill = skills.find(s => s.name === "yaml-multiline-array")
|
||||
|
||||
// #then
|
||||
expect(skill).toBeDefined()
|
||||
expect(skill?.allowedTools).toEqual(["Read", "Write", "Edit", "Bash"])
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
|
||||
it("returns undefined for skill without allowed-tools", async () => {
|
||||
// #given
|
||||
const skillContent = `---
|
||||
name: no-allowed-tools
|
||||
description: Skill without allowed-tools field
|
||||
---
|
||||
Skill body.
|
||||
`
|
||||
createTestSkill("no-allowed-tools", skillContent)
|
||||
|
||||
// #when
|
||||
const { discoverSkills } = await import("./loader")
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
|
||||
try {
|
||||
const skills = await discoverSkills({ includeClaudeCodePaths: false })
|
||||
const skill = skills.find(s => s.name === "no-allowed-tools")
|
||||
|
||||
// #then
|
||||
expect(skill).toBeDefined()
|
||||
expect(skill?.allowedTools).toBeUndefined()
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -50,8 +50,15 @@ async function loadMcpJsonFromDir(skillDir: string): Promise<SkillMcpConfig | un
|
||||
return undefined
|
||||
}
|
||||
|
||||
function parseAllowedTools(allowedTools: string | undefined): string[] | undefined {
|
||||
function parseAllowedTools(allowedTools: string | string[] | undefined): string[] | undefined {
|
||||
if (!allowedTools) return undefined
|
||||
|
||||
// Handle YAML array format: already parsed as string[]
|
||||
if (Array.isArray(allowedTools)) {
|
||||
return allowedTools.map(t => t.trim()).filter(Boolean)
|
||||
}
|
||||
|
||||
// Handle space-separated string format: "Read Write Edit Bash"
|
||||
return allowedTools.split(/\s+/).filter(Boolean)
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,14 @@ import { parseFrontmatter } from "../../shared/frontmatter"
|
||||
import { sanitizeModelField } from "../../shared/model-sanitizer"
|
||||
import { deepMerge } from "../../shared/deep-merge"
|
||||
|
||||
function parseAllowedToolsFromMetadata(allowedTools: string | string[] | undefined): string[] | undefined {
|
||||
if (!allowedTools) return undefined
|
||||
if (Array.isArray(allowedTools)) {
|
||||
return allowedTools.map(t => t.trim()).filter(Boolean)
|
||||
}
|
||||
return allowedTools.split(/\s+/).filter(Boolean)
|
||||
}
|
||||
|
||||
const SCOPE_PRIORITY: Record<SkillScope, number> = {
|
||||
builtin: 1,
|
||||
config: 2,
|
||||
@@ -119,7 +127,7 @@ $ARGUMENTS
|
||||
}
|
||||
|
||||
const allowedTools = entry["allowed-tools"] ||
|
||||
(fileMetadata["allowed-tools"] ? fileMetadata["allowed-tools"].split(/\s+/).filter(Boolean) : undefined)
|
||||
(fileMetadata["allowed-tools"] ? parseAllowedToolsFromMetadata(fileMetadata["allowed-tools"]) : undefined)
|
||||
|
||||
return {
|
||||
name,
|
||||
|
||||
@@ -13,7 +13,7 @@ export interface SkillMetadata {
|
||||
license?: string
|
||||
compatibility?: string
|
||||
metadata?: Record<string, string>
|
||||
"allowed-tools"?: string
|
||||
"allowed-tools"?: string | string[]
|
||||
mcp?: SkillMcpConfig
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +66,20 @@ describe("atlas hook", () => {
|
||||
})
|
||||
|
||||
describe("tool.execute.after handler", () => {
|
||||
test("should handle undefined output gracefully (issue #1035)", async () => {
|
||||
// #given - hook and undefined output (e.g., from /review command)
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
|
||||
// #when - calling with undefined output
|
||||
const result = await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID: "session-123" },
|
||||
undefined as unknown as { title: string; output: string; metadata: Record<string, unknown> }
|
||||
)
|
||||
|
||||
// #then - returns undefined without throwing
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
test("should ignore non-delegate_task tools", async () => {
|
||||
// #given - hook and non-delegate_task tool
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
|
||||
@@ -663,6 +663,11 @@ export function createAtlasHook(
|
||||
input: ToolExecuteAfterInput,
|
||||
output: ToolExecuteAfterOutput
|
||||
): Promise<void> => {
|
||||
// Guard against undefined output (e.g., from /review command - see issue #1035)
|
||||
if (!output) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!isCallerOrchestrator(input.sessionID)) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -170,6 +170,20 @@ export function getCachedVersion(): string | null {
|
||||
log("[auto-update-checker] Failed to resolve version from current directory:", err)
|
||||
}
|
||||
|
||||
// Fallback for compiled binaries (npm global install)
|
||||
// process.execPath points to the actual binary location
|
||||
try {
|
||||
const execDir = path.dirname(fs.realpathSync(process.execPath))
|
||||
const pkgPath = findPackageJsonUp(execDir)
|
||||
if (pkgPath) {
|
||||
const content = fs.readFileSync(pkgPath, "utf-8")
|
||||
const pkg = JSON.parse(content) as PackageJson
|
||||
if (pkg.version) return pkg.version
|
||||
}
|
||||
} catch (err) {
|
||||
log("[auto-update-checker] Failed to resolve version from execPath:", err)
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
|
||||
@@ -237,6 +237,11 @@ export function createClaudeCodeHooksHook(
|
||||
input: { tool: string; sessionID: string; callID: string },
|
||||
output: { title: string; output: string; metadata: unknown }
|
||||
): Promise<void> => {
|
||||
// Guard against undefined output (e.g., from /review command - see issue #1035)
|
||||
if (!output) {
|
||||
return
|
||||
}
|
||||
|
||||
const claudeConfig = await loadClaudeHooksConfig()
|
||||
const extendedConfig = await loadPluginExtendedConfig()
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ export async function executePostToolUseHooks(
|
||||
|
||||
if (result.exitCode === 0 && result.stdout) {
|
||||
try {
|
||||
const output = JSON.parse(result.stdout) as PostToolUseOutput
|
||||
const output = JSON.parse(result.stdout || "{}") as PostToolUseOutput
|
||||
if (output.decision === "block") {
|
||||
return {
|
||||
block: true,
|
||||
|
||||
@@ -73,7 +73,7 @@ export async function executePreCompactHooks(
|
||||
|
||||
if (result.stdout) {
|
||||
try {
|
||||
const output = JSON.parse(result.stdout) as PreCompactOutput
|
||||
const output = JSON.parse(result.stdout || "{}") as PreCompactOutput
|
||||
|
||||
if (output.hookSpecificOutput?.additionalContext) {
|
||||
collectedContext.push(...output.hookSpecificOutput.additionalContext)
|
||||
|
||||
@@ -117,7 +117,7 @@ export async function executePreToolUseHooks(
|
||||
|
||||
if (result.stdout) {
|
||||
try {
|
||||
const output = JSON.parse(result.stdout) as PreToolUseOutput
|
||||
const output = JSON.parse(result.stdout || "{}") as PreToolUseOutput
|
||||
|
||||
// Handle deprecated decision/reason fields (Claude Code backward compat)
|
||||
let decision: PermissionDecision | undefined
|
||||
|
||||
@@ -93,7 +93,7 @@ export async function executeStopHooks(
|
||||
|
||||
if (result.stdout) {
|
||||
try {
|
||||
const output = JSON.parse(result.stdout) as StopOutput
|
||||
const output = JSON.parse(result.stdout || "{}") as StopOutput
|
||||
if (output.stop_hook_active !== undefined) {
|
||||
stopHookActiveState.set(ctx.sessionId, output.stop_hook_active)
|
||||
}
|
||||
|
||||
@@ -49,7 +49,81 @@ You ARE the planner. Your job: create bulletproof work plans.
|
||||
- External library APIs and constraints
|
||||
- Similar implementations in OSS (via librarian)
|
||||
|
||||
**NEVER plan blind. Context first, plan second.**`
|
||||
**NEVER plan blind. Context first, plan second.**
|
||||
|
||||
---
|
||||
|
||||
## MANDATORY OUTPUT: PARALLEL TASK GRAPH + TODO LIST
|
||||
|
||||
**YOUR PRIMARY OUTPUT IS A PARALLEL EXECUTION TASK GRAPH.**
|
||||
|
||||
When you finalize a plan, you MUST structure it for maximum parallel execution:
|
||||
|
||||
### 1. Parallel Execution Waves (REQUIRED)
|
||||
|
||||
Analyze task dependencies and group independent tasks into parallel waves:
|
||||
|
||||
\`\`\`
|
||||
Wave 1 (Start Immediately - No Dependencies):
|
||||
├── Task 1: [description] → category: X, skills: [a, b]
|
||||
└── Task 4: [description] → category: Y, skills: [c]
|
||||
|
||||
Wave 2 (After Wave 1 Completes):
|
||||
├── Task 2: [depends: 1] → category: X, skills: [a]
|
||||
├── Task 3: [depends: 1] → category: Z, skills: [d]
|
||||
└── Task 5: [depends: 4] → category: Y, skills: [c]
|
||||
|
||||
Wave 3 (After Wave 2 Completes):
|
||||
└── Task 6: [depends: 2, 3] → category: X, skills: [a, b]
|
||||
|
||||
Critical Path: Task 1 → Task 2 → Task 6
|
||||
Estimated Parallel Speedup: ~40% faster than sequential
|
||||
\`\`\`
|
||||
|
||||
### 2. Dependency Matrix (REQUIRED)
|
||||
|
||||
| Task | Depends On | Blocks | Can Parallelize With |
|
||||
|------|------------|--------|---------------------|
|
||||
| 1 | None | 2, 3 | 4 |
|
||||
| 2 | 1 | 6 | 3, 5 |
|
||||
| 3 | 1 | 6 | 2, 5 |
|
||||
| 4 | None | 5 | 1 |
|
||||
| 5 | 4 | None | 2, 3 |
|
||||
| 6 | 2, 3 | None | None (final) |
|
||||
|
||||
### 3. TODO List Structure (REQUIRED)
|
||||
|
||||
Each TODO item MUST include:
|
||||
|
||||
\`\`\`markdown
|
||||
- [ ] N. [Task Title]
|
||||
|
||||
**What to do**: [Clear steps]
|
||||
|
||||
**Dependencies**: [Task numbers this depends on] | None
|
||||
**Blocks**: [Task numbers that depend on this]
|
||||
**Parallel Group**: Wave N (with Tasks X, Y)
|
||||
|
||||
**Recommended Agent Profile**:
|
||||
- **Category**: \`[visual-engineering | ultrabrain | artistry | quick | unspecified-low | unspecified-high | writing]\`
|
||||
- **Skills**: [\`skill-1\`, \`skill-2\`]
|
||||
|
||||
**Acceptance Criteria**: [Verifiable conditions]
|
||||
\`\`\`
|
||||
|
||||
### 4. Agent Dispatch Summary (REQUIRED)
|
||||
|
||||
| Wave | Tasks | Dispatch Command |
|
||||
|------|-------|------------------|
|
||||
| 1 | 1, 4 | \`delegate_task(category="...", load_skills=[...], run_in_background=true)\` × 2 |
|
||||
| 2 | 2, 3, 5 | \`delegate_task(...)\` × 3 after Wave 1 completes |
|
||||
| 3 | 6 | \`delegate_task(...)\` final integration |
|
||||
|
||||
**WHY PARALLEL TASK GRAPH IS MANDATORY:**
|
||||
- Orchestrator (Sisyphus) executes tasks in parallel waves
|
||||
- Independent tasks run simultaneously via background agents
|
||||
- Proper dependency tracking prevents race conditions
|
||||
- Category + skills ensure optimal model routing per task`
|
||||
|
||||
/**
|
||||
* Determines if the agent is a planner-type agent.
|
||||
@@ -166,52 +240,52 @@ delegate_task(agent="oracle", prompt="Review my approach: [describe plan]")
|
||||
YOU MUST LEVERAGE ALL AVAILABLE AGENTS / **CATEGORY + SKILLS** TO THEIR FULLEST POTENTIAL.
|
||||
TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
|
||||
|
||||
## MANDATORY: PROMETHEUS AGENT INVOCATION (NON-NEGOTIABLE)
|
||||
## MANDATORY: PLAN AGENT INVOCATION (NON-NEGOTIABLE)
|
||||
|
||||
**YOU MUST ALWAYS INVOKE PROMETHEUS (THE PLANNER) FOR ANY NON-TRIVIAL TASK.**
|
||||
**YOU MUST ALWAYS INVOKE THE PLAN AGENT FOR ANY NON-TRIVIAL TASK.**
|
||||
|
||||
| Condition | Action |
|
||||
|-----------|--------|
|
||||
| Task has 2+ steps | MUST call Prometheus |
|
||||
| Task scope unclear | MUST call Prometheus |
|
||||
| Implementation required | MUST call Prometheus |
|
||||
| Architecture decision needed | MUST call Prometheus |
|
||||
| Task has 2+ steps | MUST call plan agent |
|
||||
| Task scope unclear | MUST call plan agent |
|
||||
| Implementation required | MUST call plan agent |
|
||||
| Architecture decision needed | MUST call plan agent |
|
||||
|
||||
\`\`\`
|
||||
delegate_task(subagent_type="prometheus", prompt="<gathered context + user request>")
|
||||
delegate_task(subagent_type="plan", prompt="<gathered context + user request>")
|
||||
\`\`\`
|
||||
|
||||
**WHY PROMETHEUS IS MANDATORY:**
|
||||
- Prometheus analyzes dependencies and parallel execution opportunities
|
||||
- Prometheus recommends CATEGORY + SKILLS for each task (in TL;DR + per-task)
|
||||
- Prometheus ensures nothing is missed with structured work plans
|
||||
**WHY PLAN AGENT IS MANDATORY:**
|
||||
- Plan agent analyzes dependencies and parallel execution opportunities
|
||||
- Plan agent outputs a **parallel task graph** with waves and dependencies
|
||||
- Plan agent provides structured TODO list with category + skills per task
|
||||
- YOU are an orchestrator, NOT an implementer
|
||||
|
||||
### SESSION CONTINUITY WITH PROMETHEUS (CRITICAL)
|
||||
### SESSION CONTINUITY WITH PLAN AGENT (CRITICAL)
|
||||
|
||||
**Prometheus returns a session_id. USE IT for follow-up interactions.**
|
||||
**Plan agent returns a session_id. USE IT for follow-up interactions.**
|
||||
|
||||
| Scenario | Action |
|
||||
|----------|--------|
|
||||
| Prometheus asks clarifying questions | \`delegate_task(session_id="{returned_session_id}", prompt="<your answer>")\` |
|
||||
| Plan agent asks clarifying questions | \`delegate_task(session_id="{returned_session_id}", prompt="<your answer>")\` |
|
||||
| Need to refine the plan | \`delegate_task(session_id="{returned_session_id}", prompt="Please adjust: <feedback>")\` |
|
||||
| Plan needs more detail | \`delegate_task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` |
|
||||
|
||||
**WHY SESSION_ID IS CRITICAL:**
|
||||
- Prometheus retains FULL conversation context
|
||||
- Plan agent retains FULL conversation context
|
||||
- No repeated exploration or context gathering
|
||||
- Saves 70%+ tokens on follow-ups
|
||||
- Maintains interview continuity until plan is finalized
|
||||
|
||||
\`\`\`
|
||||
// WRONG: Starting fresh loses all context
|
||||
delegate_task(subagent_type="prometheus", prompt="Here's more info...")
|
||||
delegate_task(subagent_type="plan", prompt="Here's more info...")
|
||||
|
||||
// CORRECT: Resume preserves everything
|
||||
delegate_task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")
|
||||
\`\`\`
|
||||
|
||||
**FAILURE TO CALL PROMETHEUS = INCOMPLETE WORK.**
|
||||
**FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.**
|
||||
|
||||
---
|
||||
|
||||
@@ -223,7 +297,7 @@ delegate_task(session_id="ses_abc123", prompt="Here's my answer to your question
|
||||
|-----------|--------|-----|
|
||||
| Codebase exploration | delegate_task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient |
|
||||
| Documentation lookup | delegate_task(subagent_type="librarian", run_in_background=true) | Specialized knowledge |
|
||||
| Planning | delegate_task(subagent_type="plan") | Structured work breakdown |
|
||||
| Planning | delegate_task(subagent_type="plan") | Parallel task graph + structured TODO list |
|
||||
| Architecture/Debugging | delegate_task(subagent_type="oracle") | High-IQ reasoning |
|
||||
| Implementation | delegate_task(category="...", load_skills=[...]) | Domain-optimized models |
|
||||
|
||||
@@ -283,20 +357,20 @@ delegate_task(..., run_in_background=true) // task_id_3
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, prompt="...")
|
||||
\`\`\`
|
||||
|
||||
2. **INVOKE PROMETHEUS** (MANDATORY for non-trivial tasks):
|
||||
2. **INVOKE PLAN AGENT** (MANDATORY for non-trivial tasks):
|
||||
\`\`\`
|
||||
result = delegate_task(subagent_type="prometheus", prompt="<context + request>")
|
||||
result = delegate_task(subagent_type="plan", prompt="<context + request>")
|
||||
// STORE the session_id for follow-ups!
|
||||
prometheus_session_id = result.session_id
|
||||
plan_session_id = result.session_id
|
||||
\`\`\`
|
||||
|
||||
3. **ITERATE WITH PROMETHEUS** (if clarification needed):
|
||||
3. **ITERATE WITH PLAN AGENT** (if clarification needed):
|
||||
\`\`\`
|
||||
// Use session_id to continue the conversation
|
||||
delegate_task(session_id=prometheus_session_id, prompt="<answer to Prometheus's question>")
|
||||
delegate_task(session_id=plan_session_id, prompt="<answer to plan agent's question>")
|
||||
\`\`\`
|
||||
|
||||
4. **EXECUTE VIA DELEGATION** (category + skills from Prometheus's plan):
|
||||
4. **EXECUTE VIA DELEGATION** (category + skills from plan agent's output):
|
||||
\`\`\`
|
||||
delegate_task(category="...", load_skills=[...], prompt="<task from plan>")
|
||||
\`\`\`
|
||||
@@ -375,9 +449,9 @@ Write these criteria explicitly. Share with user if scope is non-trivial.
|
||||
THE USER ASKED FOR X. DELIVER EXACTLY X. NOT A SUBSET. NOT A DEMO. NOT A STARTING POINT.
|
||||
|
||||
1. EXPLORES + LIBRARIANS (background)
|
||||
2. GATHER -> delegate_task(subagent_type="prometheus", prompt="<context + request>")
|
||||
3. ITERATE WITH PROMETHEUS (session_id resume) UNTIL PLAN IS FINALIZED
|
||||
4. WORK BY DELEGATING TO CATEGORY + SKILLS AGENTS (following Prometheus's plan)
|
||||
2. GATHER -> delegate_task(subagent_type="plan", prompt="<context + request>")
|
||||
3. ITERATE WITH PLAN AGENT (session_id resume) UNTIL PLAN IS FINALIZED
|
||||
4. WORK BY DELEGATING TO CATEGORY + SKILLS AGENTS (following plan agent's parallel task graph)
|
||||
|
||||
NOW.
|
||||
|
||||
|
||||
@@ -338,6 +338,197 @@ describe("keyword-detector word boundary", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("keyword-detector system-reminder filtering", () => {
|
||||
let logCalls: Array<{ msg: string; data?: unknown }>
|
||||
let logSpy: ReturnType<typeof spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
setMainSession(undefined)
|
||||
logCalls = []
|
||||
logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => {
|
||||
logCalls.push({ msg, data })
|
||||
})
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
logSpy?.mockRestore()
|
||||
setMainSession(undefined)
|
||||
})
|
||||
|
||||
function createMockPluginInput() {
|
||||
return {
|
||||
client: {
|
||||
tui: {
|
||||
showToast: async () => {},
|
||||
},
|
||||
},
|
||||
} as any
|
||||
}
|
||||
|
||||
test("should NOT trigger search mode from keywords inside <system-reminder> tags", async () => {
|
||||
// #given - message contains search keywords only inside system-reminder tags
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "test-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{
|
||||
type: "text",
|
||||
text: `<system-reminder>
|
||||
The system will search for the file and find all occurrences.
|
||||
Please locate and scan the directory.
|
||||
</system-reminder>`
|
||||
}],
|
||||
}
|
||||
|
||||
// #when - keyword detection runs on system-reminder content
|
||||
await hook["chat.message"]({ sessionID }, output)
|
||||
|
||||
// #then - should NOT trigger search mode (text should remain unchanged)
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).not.toContain("[search-mode]")
|
||||
expect(textPart!.text).toContain("<system-reminder>")
|
||||
})
|
||||
|
||||
test("should NOT trigger analyze mode from keywords inside <system-reminder> tags", async () => {
|
||||
// #given - message contains analyze keywords only inside system-reminder tags
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "test-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{
|
||||
type: "text",
|
||||
text: `<system-reminder>
|
||||
You should investigate and examine the code carefully.
|
||||
Research the implementation details.
|
||||
</system-reminder>`
|
||||
}],
|
||||
}
|
||||
|
||||
// #when - keyword detection runs on system-reminder content
|
||||
await hook["chat.message"]({ sessionID }, output)
|
||||
|
||||
// #then - should NOT trigger analyze mode
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).not.toContain("[analyze-mode]")
|
||||
expect(textPart!.text).toContain("<system-reminder>")
|
||||
})
|
||||
|
||||
test("should detect keywords in user text even when system-reminder is present", async () => {
|
||||
// #given - message contains both system-reminder and user search keyword
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "test-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{
|
||||
type: "text",
|
||||
text: `<system-reminder>
|
||||
System will find and locate files.
|
||||
</system-reminder>
|
||||
|
||||
Please search for the bug in the code.`
|
||||
}],
|
||||
}
|
||||
|
||||
// #when - keyword detection runs on mixed content
|
||||
await hook["chat.message"]({ sessionID }, output)
|
||||
|
||||
// #then - should trigger search mode from user text only
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).toContain("[search-mode]")
|
||||
expect(textPart!.text).toContain("Please search for the bug in the code.")
|
||||
})
|
||||
|
||||
test("should handle multiple system-reminder tags in message", async () => {
|
||||
// #given - message contains multiple system-reminder blocks with keywords
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "test-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{
|
||||
type: "text",
|
||||
text: `<system-reminder>
|
||||
First reminder with search and find keywords.
|
||||
</system-reminder>
|
||||
|
||||
User message without keywords.
|
||||
|
||||
<system-reminder>
|
||||
Second reminder with investigate and examine keywords.
|
||||
</system-reminder>`
|
||||
}],
|
||||
}
|
||||
|
||||
// #when - keyword detection runs on message with multiple system-reminders
|
||||
await hook["chat.message"]({ sessionID }, output)
|
||||
|
||||
// #then - should NOT trigger any mode (only user text exists, no keywords)
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).not.toContain("[search-mode]")
|
||||
expect(textPart!.text).not.toContain("[analyze-mode]")
|
||||
})
|
||||
|
||||
test("should handle case-insensitive system-reminder tags", async () => {
|
||||
// #given - message contains system-reminder with different casing
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "test-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{
|
||||
type: "text",
|
||||
text: `<SYSTEM-REMINDER>
|
||||
System will search and find files.
|
||||
</SYSTEM-REMINDER>`
|
||||
}],
|
||||
}
|
||||
|
||||
// #when - keyword detection runs on uppercase system-reminder
|
||||
await hook["chat.message"]({ sessionID }, output)
|
||||
|
||||
// #then - should NOT trigger search mode
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).not.toContain("[search-mode]")
|
||||
})
|
||||
|
||||
test("should handle multiline system-reminder content with search keywords", async () => {
|
||||
// #given - system-reminder with multiline content containing various search keywords
|
||||
const collector = new ContextCollector()
|
||||
const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
|
||||
const sessionID = "test-session"
|
||||
const output = {
|
||||
message: {} as Record<string, unknown>,
|
||||
parts: [{
|
||||
type: "text",
|
||||
text: `<system-reminder>
|
||||
Commands executed:
|
||||
- find: searched for pattern
|
||||
- grep: located file
|
||||
- scan: completed
|
||||
|
||||
Please explore the codebase and discover patterns.
|
||||
</system-reminder>`
|
||||
}],
|
||||
}
|
||||
|
||||
// #when - keyword detection runs on multiline system-reminder
|
||||
await hook["chat.message"]({ sessionID }, output)
|
||||
|
||||
// #then - should NOT trigger search mode
|
||||
const textPart = output.parts.find(p => p.type === "text")
|
||||
expect(textPart).toBeDefined()
|
||||
expect(textPart!.text).not.toContain("[search-mode]")
|
||||
})
|
||||
})
|
||||
|
||||
describe("keyword-detector agent-specific ultrawork messages", () => {
|
||||
let logCalls: Array<{ msg: string; data?: unknown }>
|
||||
let logSpy: ReturnType<typeof spyOn>
|
||||
|
||||
@@ -2,7 +2,7 @@ import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { detectKeywordsWithType, extractPromptText, removeCodeBlocks } from "./detector"
|
||||
import { isPlannerAgent } from "./constants"
|
||||
import { log } from "../../shared"
|
||||
import { isSystemDirective } from "../../shared/system-directive"
|
||||
import { hasSystemReminder, isSystemDirective, removeSystemReminders } from "../../shared/system-directive"
|
||||
import { getMainSessionID, getSessionAgent, subagentSessions } from "../../features/claude-code-session-state"
|
||||
import type { ContextCollector } from "../../features/context-injector"
|
||||
|
||||
@@ -32,7 +32,10 @@ export function createKeywordDetectorHook(ctx: PluginInput, collector?: ContextC
|
||||
}
|
||||
|
||||
const currentAgent = getSessionAgent(input.sessionID) ?? input.agent
|
||||
let detectedKeywords = detectKeywordsWithType(removeCodeBlocks(promptText), currentAgent)
|
||||
|
||||
// Remove system-reminder content to prevent automated system messages from triggering mode keywords
|
||||
const cleanText = removeSystemReminders(promptText)
|
||||
let detectedKeywords = detectKeywordsWithType(removeCodeBlocks(cleanText), currentAgent)
|
||||
|
||||
if (isPlannerAgent(currentAgent)) {
|
||||
detectedKeywords = detectedKeywords.filter((k) => k.type !== "ultrawork")
|
||||
|
||||
@@ -891,40 +891,40 @@ Original task: Build something`
|
||||
})
|
||||
|
||||
describe("API timeout protection", () => {
|
||||
// FIXME: Flaky in CI - times out intermittently
|
||||
test.skip("should not hang when session.messages() times out", async () => {
|
||||
// #given - slow API that takes longer than timeout
|
||||
const slowMock = {
|
||||
test("should not hang when session.messages() throws", async () => {
|
||||
// #given - API that throws (simulates timeout error)
|
||||
let apiCallCount = 0
|
||||
const errorMock = {
|
||||
...createMockPluginInput(),
|
||||
client: {
|
||||
...createMockPluginInput().client,
|
||||
session: {
|
||||
...createMockPluginInput().client.session,
|
||||
messages: async () => {
|
||||
// Simulate slow API (would hang without timeout)
|
||||
await new Promise((resolve) => setTimeout(resolve, 10000))
|
||||
return { data: [] }
|
||||
apiCallCount++
|
||||
throw new Error("API timeout")
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
const hook = createRalphLoopHook(slowMock as any, {
|
||||
const hook = createRalphLoopHook(errorMock as any, {
|
||||
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||
apiTimeout: 100, // 100ms timeout for test
|
||||
apiTimeout: 100,
|
||||
})
|
||||
hook.startLoop("session-123", "Build something")
|
||||
|
||||
// #when - session goes idle (API will timeout)
|
||||
// #when - session goes idle (API will throw)
|
||||
const startTime = Date.now()
|
||||
await hook.event({
|
||||
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
||||
})
|
||||
const elapsed = Date.now() - startTime
|
||||
|
||||
// #then - should complete within timeout + buffer (not hang for 10s)
|
||||
expect(elapsed).toBeLessThan(500)
|
||||
// #then - loop should continue (API timeout = no completion detected)
|
||||
// #then - should complete quickly (not hang for 10s)
|
||||
expect(elapsed).toBeLessThan(2000)
|
||||
// #then - loop should continue (API error = no completion detected)
|
||||
expect(promptCalls.length).toBe(1)
|
||||
expect(apiCallCount).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
51
src/index.ts
51
src/index.ts
@@ -40,7 +40,7 @@ import {
|
||||
contextCollector,
|
||||
createContextInjectorMessagesTransformHook,
|
||||
} from "./features/context-injector";
|
||||
import { applyAgentVariant, resolveAgentVariant } from "./shared/agent-variant";
|
||||
import { applyAgentVariant, resolveAgentVariant, resolveVariantForModel } from "./shared/agent-variant";
|
||||
import { createFirstMessageVariantGate } from "./shared/first-message-variant";
|
||||
import {
|
||||
discoverUserClaudeSkills,
|
||||
@@ -78,7 +78,7 @@ import { SkillMcpManager } from "./features/skill-mcp-manager";
|
||||
import { initTaskToastManager } from "./features/task-toast-manager";
|
||||
import { TmuxSessionManager } from "./features/tmux-subagent";
|
||||
import { type HookName } from "./config";
|
||||
import { log, detectExternalNotificationPlugin, getNotificationConflictWarning, resetMessageCursor, includesCaseInsensitive } from "./shared";
|
||||
import { log, detectExternalNotificationPlugin, getNotificationConflictWarning, resetMessageCursor, includesCaseInsensitive, hasConnectedProvidersCache, getOpenCodeVersion, isOpenCodeVersionAtLeast, OPENCODE_NATIVE_AGENTS_INJECTION_VERSION } from "./shared";
|
||||
import { loadPluginConfig } from "./plugin-config";
|
||||
import { createModelCacheState, getModelLimit } from "./plugin-state";
|
||||
import { createConfigHandler } from "./plugin-handlers";
|
||||
@@ -136,9 +136,22 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
|
||||
experimental: pluginConfig.experimental,
|
||||
})
|
||||
: null;
|
||||
const directoryAgentsInjector = isHookEnabled("directory-agents-injector")
|
||||
? createDirectoryAgentsInjectorHook(ctx)
|
||||
: null;
|
||||
// Check for native OpenCode AGENTS.md injection support before creating hook
|
||||
let directoryAgentsInjector = null;
|
||||
if (isHookEnabled("directory-agents-injector")) {
|
||||
const currentVersion = getOpenCodeVersion();
|
||||
const hasNativeSupport = currentVersion !== null &&
|
||||
isOpenCodeVersionAtLeast(OPENCODE_NATIVE_AGENTS_INJECTION_VERSION);
|
||||
|
||||
if (hasNativeSupport) {
|
||||
log("directory-agents-injector auto-disabled due to native OpenCode support", {
|
||||
currentVersion,
|
||||
nativeVersion: OPENCODE_NATIVE_AGENTS_INJECTION_VERSION,
|
||||
});
|
||||
} else {
|
||||
directoryAgentsInjector = createDirectoryAgentsInjectorHook(ctx);
|
||||
}
|
||||
}
|
||||
const directoryReadmeInjector = isHookEnabled("directory-readme-injector")
|
||||
? createDirectoryReadmeInjectorHook(ctx)
|
||||
: null;
|
||||
@@ -384,13 +397,22 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
|
||||
|
||||
const message = (output as { message: { variant?: string } }).message
|
||||
if (firstMessageVariantGate.shouldOverride(input.sessionID)) {
|
||||
const variant = resolveAgentVariant(pluginConfig, input.agent)
|
||||
const variant = input.model && input.agent
|
||||
? resolveVariantForModel(pluginConfig, input.agent, input.model)
|
||||
: resolveAgentVariant(pluginConfig, input.agent)
|
||||
if (variant !== undefined) {
|
||||
message.variant = variant
|
||||
}
|
||||
firstMessageVariantGate.markApplied(input.sessionID)
|
||||
} else {
|
||||
applyAgentVariant(pluginConfig, input.agent, message)
|
||||
if (input.model && input.agent && message.variant === undefined) {
|
||||
const variant = resolveVariantForModel(pluginConfig, input.agent, input.model)
|
||||
if (variant !== undefined) {
|
||||
message.variant = variant
|
||||
}
|
||||
} else {
|
||||
applyAgentVariant(pluginConfig, input.agent, message)
|
||||
}
|
||||
}
|
||||
|
||||
await keywordDetector?.["chat.message"]?.(input, output);
|
||||
@@ -398,6 +420,17 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
|
||||
await autoSlashCommand?.["chat.message"]?.(input, output);
|
||||
await startWork?.["chat.message"]?.(input, output);
|
||||
|
||||
if (!hasConnectedProvidersCache()) {
|
||||
ctx.client.tui.showToast({
|
||||
body: {
|
||||
title: "⚠️ Provider Cache Missing",
|
||||
message: "Model filtering disabled. RESTART OpenCode to enable full functionality.",
|
||||
variant: "warning" as const,
|
||||
duration: 6000,
|
||||
},
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
if (ralphLoop) {
|
||||
const parts = (
|
||||
output as { parts?: Array<{ type: string; text?: string }> }
|
||||
@@ -637,6 +670,10 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
|
||||
},
|
||||
|
||||
"tool.execute.after": async (input, output) => {
|
||||
// Guard against undefined output (e.g., from /review command - see issue #1035)
|
||||
if (!output) {
|
||||
return;
|
||||
}
|
||||
await claudeCodeHooks["tool.execute.after"](input, output);
|
||||
await toolOutputTruncator?.["tool.execute.after"](input, output);
|
||||
await contextWindowMonitor?.["tool.execute.after"](input, output);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import type { OhMyOpenCodeConfig } from "../config"
|
||||
import { applyAgentVariant, resolveAgentVariant } from "./agent-variant"
|
||||
import { applyAgentVariant, resolveAgentVariant, resolveVariantForModel } from "./agent-variant"
|
||||
|
||||
describe("resolveAgentVariant", () => {
|
||||
test("returns undefined when agent name missing", () => {
|
||||
@@ -81,3 +81,117 @@ describe("applyAgentVariant", () => {
|
||||
expect(message.variant).toBe("max")
|
||||
})
|
||||
})
|
||||
|
||||
describe("resolveVariantForModel", () => {
|
||||
test("returns correct variant for anthropic provider", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "anthropic", modelID: "claude-opus-4-5" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "sisyphus", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBe("max")
|
||||
})
|
||||
|
||||
test("returns correct variant for openai provider", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "openai", modelID: "gpt-5.2" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "sisyphus", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBe("medium")
|
||||
})
|
||||
|
||||
test("returns undefined for provider with no variant in chain", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "google", modelID: "gemini-3-pro" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "sisyphus", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBeUndefined()
|
||||
})
|
||||
|
||||
test("returns undefined for provider not in chain", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "unknown-provider", modelID: "some-model" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "sisyphus", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBeUndefined()
|
||||
})
|
||||
|
||||
test("returns undefined for unknown agent", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "anthropic", modelID: "claude-opus-4-5" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "nonexistent-agent", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBeUndefined()
|
||||
})
|
||||
|
||||
test("returns variant for zai-coding-plan provider without variant", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "zai-coding-plan", modelID: "glm-4.7" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "sisyphus", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBeUndefined()
|
||||
})
|
||||
|
||||
test("falls back to category chain when agent has no requirement", () => {
|
||||
// #given
|
||||
const config = {
|
||||
agents: {
|
||||
"custom-agent": { category: "ultrabrain" },
|
||||
},
|
||||
} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "openai", modelID: "gpt-5.2-codex" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "custom-agent", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBe("xhigh")
|
||||
})
|
||||
|
||||
test("returns correct variant for oracle agent with openai", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "openai", modelID: "gpt-5.2" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "oracle", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBe("high")
|
||||
})
|
||||
|
||||
test("returns correct variant for oracle agent with anthropic", () => {
|
||||
// #given
|
||||
const config = {} as OhMyOpenCodeConfig
|
||||
const model = { providerID: "anthropic", modelID: "claude-opus-4-5" }
|
||||
|
||||
// #when
|
||||
const variant = resolveVariantForModel(config, "oracle", model)
|
||||
|
||||
// #then
|
||||
expect(variant).toBe("max")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { OhMyOpenCodeConfig } from "../config"
|
||||
import { findCaseInsensitive } from "./case-insensitive"
|
||||
import { AGENT_MODEL_REQUIREMENTS, CATEGORY_MODEL_REQUIREMENTS } from "./model-requirements"
|
||||
|
||||
export function resolveAgentVariant(
|
||||
config: OhMyOpenCodeConfig,
|
||||
@@ -29,6 +30,43 @@ export function resolveAgentVariant(
|
||||
return config.categories?.[categoryName]?.variant
|
||||
}
|
||||
|
||||
export function resolveVariantForModel(
|
||||
config: OhMyOpenCodeConfig,
|
||||
agentName: string,
|
||||
currentModel: { providerID: string; modelID: string },
|
||||
): string | undefined {
|
||||
const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentName]
|
||||
if (agentRequirement) {
|
||||
return findVariantInChain(agentRequirement.fallbackChain, currentModel.providerID)
|
||||
}
|
||||
|
||||
const agentOverrides = config.agents as
|
||||
| Record<string, { category?: string }>
|
||||
| undefined
|
||||
const agentOverride = agentOverrides ? findCaseInsensitive(agentOverrides, agentName) : undefined
|
||||
const categoryName = agentOverride?.category
|
||||
if (categoryName) {
|
||||
const categoryRequirement = CATEGORY_MODEL_REQUIREMENTS[categoryName]
|
||||
if (categoryRequirement) {
|
||||
return findVariantInChain(categoryRequirement.fallbackChain, currentModel.providerID)
|
||||
}
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
function findVariantInChain(
|
||||
fallbackChain: { providers: string[]; model: string; variant?: string }[],
|
||||
providerID: string,
|
||||
): string | undefined {
|
||||
for (const entry of fallbackChain) {
|
||||
if (entry.providers.includes(providerID)) {
|
||||
return entry.variant
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
export function applyAgentVariant(
|
||||
config: OhMyOpenCodeConfig,
|
||||
agentName: string | undefined,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, test, spyOn, beforeEach, afterEach } from "bun:test"
|
||||
import { describe, expect, test, spyOn, beforeEach, afterEach, mock } from "bun:test"
|
||||
import { resolveModel, resolveModelWithFallback, type ModelResolutionInput, type ExtendedModelResolutionInput, type ModelResolutionResult, type ModelSource } from "./model-resolver"
|
||||
import * as logger from "./logger"
|
||||
import * as connectedProvidersCache from "./connected-providers-cache"
|
||||
|
||||
describe("resolveModel", () => {
|
||||
describe("priority chain", () => {
|
||||
@@ -336,8 +337,73 @@ describe("resolveModelWithFallback", () => {
|
||||
expect(logSpy).toHaveBeenCalledWith("No available model found in fallback chain, falling through to system default")
|
||||
})
|
||||
|
||||
test("uses first fallback entry when availableModels is empty (no cache scenario)", () => {
|
||||
// #given - empty availableModels simulates CI environment without model cache
|
||||
test("returns undefined when availableModels empty and no connected providers cache exists", () => {
|
||||
// #given - both model cache and connected-providers cache are missing (first run)
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
|
||||
const input: ExtendedModelResolutionInput = {
|
||||
fallbackChain: [
|
||||
{ providers: ["anthropic"], model: "claude-opus-4-5" },
|
||||
],
|
||||
availableModels: new Set(),
|
||||
systemDefaultModel: undefined, // no system default configured
|
||||
}
|
||||
|
||||
// #when
|
||||
const result = resolveModelWithFallback(input)
|
||||
|
||||
// #then - should return undefined to let OpenCode use Provider.defaultModel()
|
||||
expect(result).toBeUndefined()
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("skips fallback chain when availableModels empty even if connected providers cache exists", () => {
|
||||
// #given - model cache missing but connected-providers cache exists
|
||||
// This scenario caused bugs: provider is connected but may not have the model available
|
||||
// Fix: When we can't verify model availability, skip fallback chain entirely
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai", "google"])
|
||||
const input: ExtendedModelResolutionInput = {
|
||||
fallbackChain: [
|
||||
{ providers: ["anthropic", "openai"], model: "claude-opus-4-5" },
|
||||
],
|
||||
availableModels: new Set(),
|
||||
systemDefaultModel: "google/gemini-3-pro",
|
||||
}
|
||||
|
||||
// #when
|
||||
const result = resolveModelWithFallback(input)
|
||||
|
||||
// #then - should fall through to system default (NOT use connected provider blindly)
|
||||
expect(result!.model).toBe("google/gemini-3-pro")
|
||||
expect(result!.source).toBe("system-default")
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("prevents selecting model from provider that may not have it (bug reproduction)", () => {
|
||||
// #given - user removed anthropic oauth, has quotio, but explore agent fallback has opencode
|
||||
// opencode may be "connected" but doesn't have claude-haiku-4-5
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["quotio", "opencode"])
|
||||
const input: ExtendedModelResolutionInput = {
|
||||
fallbackChain: [
|
||||
{ providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" },
|
||||
],
|
||||
availableModels: new Set(), // no model cache available
|
||||
systemDefaultModel: "quotio/claude-opus-4-5-20251101",
|
||||
}
|
||||
|
||||
// #when
|
||||
const result = resolveModelWithFallback(input)
|
||||
|
||||
// #then - should NOT return opencode/claude-haiku-4-5 (model may not exist)
|
||||
// should fall through to system default which user has configured
|
||||
expect(result!.model).toBe("quotio/claude-opus-4-5-20251101")
|
||||
expect(result!.source).toBe("system-default")
|
||||
expect(result!.model).not.toBe("opencode/claude-haiku-4-5")
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("falls through to system default when no cache and systemDefaultModel is provided", () => {
|
||||
// #given - no cache but system default is configured
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
|
||||
const input: ExtendedModelResolutionInput = {
|
||||
fallbackChain: [
|
||||
{ providers: ["anthropic"], model: "claude-opus-4-5" },
|
||||
@@ -349,9 +415,10 @@ describe("resolveModelWithFallback", () => {
|
||||
// #when
|
||||
const result = resolveModelWithFallback(input)
|
||||
|
||||
// #then - should use first fallback entry, not system default
|
||||
expect(result!.model).toBe("anthropic/claude-opus-4-5")
|
||||
expect(result!.source).toBe("provider-fallback")
|
||||
// #then - should fall through to system default
|
||||
expect(result!.model).toBe("google/gemini-3-pro")
|
||||
expect(result!.source).toBe("system-default")
|
||||
cacheSpy.mockRestore()
|
||||
})
|
||||
|
||||
test("returns system default when fallbackChain is not provided", () => {
|
||||
|
||||
@@ -55,28 +55,10 @@ export function resolveModelWithFallback(
|
||||
// Step 2: Provider fallback chain (with availability check)
|
||||
if (fallbackChain && fallbackChain.length > 0) {
|
||||
if (availableModels.size === 0) {
|
||||
const connectedProviders = readConnectedProvidersCache()
|
||||
const connectedSet = connectedProviders ? new Set(connectedProviders) : null
|
||||
|
||||
for (const entry of fallbackChain) {
|
||||
for (const provider of entry.providers) {
|
||||
if (connectedSet === null || connectedSet.has(provider)) {
|
||||
const model = `${provider}/${entry.model}`
|
||||
log("Model resolved via fallback chain (no model cache, using connected provider)", {
|
||||
provider,
|
||||
model: entry.model,
|
||||
variant: entry.variant,
|
||||
hasConnectedCache: connectedSet !== null
|
||||
})
|
||||
return { model, source: "provider-fallback", variant: entry.variant }
|
||||
}
|
||||
}
|
||||
}
|
||||
const firstEntry = fallbackChain[0]
|
||||
const firstProvider = firstEntry.providers[0]
|
||||
const model = `${firstProvider}/${firstEntry.model}`
|
||||
log("Model resolved via fallback chain (no cache at all, using first entry)", { provider: firstProvider, model: firstEntry.model, variant: firstEntry.variant })
|
||||
return { model, source: "provider-fallback", variant: firstEntry.variant }
|
||||
// When model cache is empty, we cannot verify if a provider actually has the model.
|
||||
// Skip fallback chain entirely and fall through to system default.
|
||||
// This prevents selecting provider/model combinations that may not exist.
|
||||
log("No model cache available, skipping fallback chain to use system default")
|
||||
}
|
||||
|
||||
for (const entry of fallbackChain) {
|
||||
|
||||
198
src/shared/ollama-ndjson-parser.ts
Normal file
198
src/shared/ollama-ndjson-parser.ts
Normal file
@@ -0,0 +1,198 @@
|
||||
/**
|
||||
* Ollama NDJSON Parser
|
||||
*
|
||||
* Parses newline-delimited JSON (NDJSON) responses from Ollama API.
|
||||
*
|
||||
* @module ollama-ndjson-parser
|
||||
* @see https://github.com/code-yeongyu/oh-my-opencode/issues/1124
|
||||
* @see https://github.com/ollama/ollama/blob/main/docs/api.md
|
||||
*/
|
||||
|
||||
import { log } from "./logger"
|
||||
|
||||
/**
|
||||
* Ollama message structure
|
||||
*/
|
||||
export interface OllamaMessage {
|
||||
tool_calls?: Array<{
|
||||
function: {
|
||||
name: string
|
||||
arguments: Record<string, unknown>
|
||||
}
|
||||
}>
|
||||
content?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Ollama NDJSON line structure
|
||||
*/
|
||||
export interface OllamaNDJSONLine {
|
||||
message?: OllamaMessage
|
||||
done: boolean
|
||||
total_duration?: number
|
||||
load_duration?: number
|
||||
prompt_eval_count?: number
|
||||
prompt_eval_duration?: number
|
||||
eval_count?: number
|
||||
eval_duration?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Merged Ollama response
|
||||
*/
|
||||
export interface OllamaMergedResponse {
|
||||
message: OllamaMessage
|
||||
done: boolean
|
||||
stats?: {
|
||||
total_duration?: number
|
||||
load_duration?: number
|
||||
prompt_eval_count?: number
|
||||
prompt_eval_duration?: number
|
||||
eval_count?: number
|
||||
eval_duration?: number
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Ollama streaming NDJSON response into a single merged object.
|
||||
*
|
||||
* Ollama returns streaming responses as newline-delimited JSON (NDJSON):
|
||||
* ```
|
||||
* {"message":{"tool_calls":[...]}, "done":false}
|
||||
* {"message":{"content":""}, "done":true}
|
||||
* ```
|
||||
*
|
||||
* This function:
|
||||
* 1. Splits the response by newlines
|
||||
* 2. Parses each line as JSON
|
||||
* 3. Merges tool_calls and content from all lines
|
||||
* 4. Returns a single merged response
|
||||
*
|
||||
* @param response - Raw NDJSON response string from Ollama API
|
||||
* @returns Merged response with all tool_calls and content combined
|
||||
* @throws {Error} If no valid JSON lines are found
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const ndjsonResponse = `
|
||||
* {"message":{"tool_calls":[{"function":{"name":"read","arguments":{"filePath":"README.md"}}}]}, "done":false}
|
||||
* {"message":{"content":""}, "done":true}
|
||||
* `;
|
||||
*
|
||||
* const merged = parseOllamaStreamResponse(ndjsonResponse);
|
||||
* // Result:
|
||||
* // {
|
||||
* // message: {
|
||||
* // tool_calls: [{ function: { name: "read", arguments: { filePath: "README.md" } } }],
|
||||
* // content: ""
|
||||
* // },
|
||||
* // done: true
|
||||
* // }
|
||||
* ```
|
||||
*/
|
||||
export function parseOllamaStreamResponse(response: string): OllamaMergedResponse {
|
||||
const lines = response.split("\n").filter((line) => line.trim())
|
||||
|
||||
if (lines.length === 0) {
|
||||
throw new Error("No valid NDJSON lines found in response")
|
||||
}
|
||||
|
||||
const mergedMessage: OllamaMessage = {
|
||||
tool_calls: [],
|
||||
content: "",
|
||||
}
|
||||
|
||||
let done = false
|
||||
let stats: OllamaMergedResponse["stats"] = {}
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const json = JSON.parse(line) as OllamaNDJSONLine
|
||||
|
||||
// Merge tool_calls
|
||||
if (json.message?.tool_calls) {
|
||||
mergedMessage.tool_calls = [
|
||||
...(mergedMessage.tool_calls || []),
|
||||
...json.message.tool_calls,
|
||||
]
|
||||
}
|
||||
|
||||
// Merge content (concatenate)
|
||||
if (json.message?.content) {
|
||||
mergedMessage.content = (mergedMessage.content || "") + json.message.content
|
||||
}
|
||||
|
||||
// Update done flag (final line has done: true)
|
||||
if (json.done) {
|
||||
done = true
|
||||
|
||||
// Capture stats from final line
|
||||
stats = {
|
||||
total_duration: json.total_duration,
|
||||
load_duration: json.load_duration,
|
||||
prompt_eval_count: json.prompt_eval_count,
|
||||
prompt_eval_duration: json.prompt_eval_duration,
|
||||
eval_count: json.eval_count,
|
||||
eval_duration: json.eval_duration,
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
log(`[ollama-ndjson-parser] Skipping malformed NDJSON line: ${line}`, { error })
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
message: mergedMessage,
|
||||
done,
|
||||
...(Object.keys(stats).length > 0 ? { stats } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a response string is NDJSON format.
|
||||
*
|
||||
* NDJSON is identified by:
|
||||
* - Multiple lines
|
||||
* - Each line is valid JSON
|
||||
* - At least one line has "done" field
|
||||
*
|
||||
* @param response - Response string to check
|
||||
* @returns true if response appears to be NDJSON
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const ndjson = '{"done":false}\n{"done":true}';
|
||||
* const singleJson = '{"done":true}';
|
||||
*
|
||||
* isNDJSONResponse(ndjson); // true
|
||||
* isNDJSONResponse(singleJson); // false
|
||||
* ```
|
||||
*/
|
||||
export function isNDJSONResponse(response: string): boolean {
|
||||
const lines = response.split("\n").filter((line) => line.trim())
|
||||
|
||||
// Single line is not NDJSON
|
||||
if (lines.length <= 1) {
|
||||
return false
|
||||
}
|
||||
|
||||
let hasValidJSON = false
|
||||
let hasDoneField = false
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const json = JSON.parse(line) as Record<string, unknown>
|
||||
hasValidJSON = true
|
||||
|
||||
if ("done" in json) {
|
||||
hasDoneField = true
|
||||
}
|
||||
} catch {
|
||||
// If any line fails to parse, it's not NDJSON
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return hasValidJSON && hasDoneField
|
||||
}
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
resetVersionCache,
|
||||
setVersionCache,
|
||||
MINIMUM_OPENCODE_VERSION,
|
||||
OPENCODE_NATIVE_AGENTS_INJECTION_VERSION,
|
||||
} from "./opencode-version"
|
||||
|
||||
describe("opencode-version", () => {
|
||||
@@ -220,4 +221,46 @@ describe("opencode-version", () => {
|
||||
expect(MINIMUM_OPENCODE_VERSION).toBe("1.1.1")
|
||||
})
|
||||
})
|
||||
|
||||
describe("OPENCODE_NATIVE_AGENTS_INJECTION_VERSION", () => {
|
||||
test("is set to 1.1.37", () => {
|
||||
// #given the native agents injection version constant
|
||||
// #when exported
|
||||
// #then it should be 1.1.37 (PR #10678)
|
||||
expect(OPENCODE_NATIVE_AGENTS_INJECTION_VERSION).toBe("1.1.37")
|
||||
})
|
||||
|
||||
test("version detection works correctly with native agents version", () => {
|
||||
// #given OpenCode version at or above native agents injection version
|
||||
setVersionCache("1.1.37")
|
||||
|
||||
// #when checking against native agents version
|
||||
const result = isOpenCodeVersionAtLeast(OPENCODE_NATIVE_AGENTS_INJECTION_VERSION)
|
||||
|
||||
// #then returns true (native support available)
|
||||
expect(result).toBe(true)
|
||||
})
|
||||
|
||||
test("version detection returns false for older versions", () => {
|
||||
// #given OpenCode version below native agents injection version
|
||||
setVersionCache("1.1.36")
|
||||
|
||||
// #when checking against native agents version
|
||||
const result = isOpenCodeVersionAtLeast(OPENCODE_NATIVE_AGENTS_INJECTION_VERSION)
|
||||
|
||||
// #then returns false (no native support)
|
||||
expect(result).toBe(false)
|
||||
})
|
||||
|
||||
test("returns true when version detection fails (fail-safe)", () => {
|
||||
// #given version cannot be detected
|
||||
setVersionCache(null)
|
||||
|
||||
// #when checking against native agents version
|
||||
const result = isOpenCodeVersionAtLeast(OPENCODE_NATIVE_AGENTS_INJECTION_VERSION)
|
||||
|
||||
// #then returns true (assume latest, enable native support)
|
||||
expect(result).toBe(true)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -6,6 +6,15 @@ import { execSync } from "child_process"
|
||||
*/
|
||||
export const MINIMUM_OPENCODE_VERSION = "1.1.1"
|
||||
|
||||
/**
|
||||
* OpenCode version that introduced native AGENTS.md injection.
|
||||
* PR #10678 merged on Jan 26, 2026 - OpenCode now dynamically resolves
|
||||
* AGENTS.md files from subdirectories as the agent explores them.
|
||||
* When this version is detected, the directory-agents-injector hook
|
||||
* is auto-disabled to prevent duplicate AGENTS.md loading.
|
||||
*/
|
||||
export const OPENCODE_NATIVE_AGENTS_INJECTION_VERSION = "1.1.37"
|
||||
|
||||
const NOT_CACHED = Symbol("NOT_CACHED")
|
||||
let cachedVersion: string | null | typeof NOT_CACHED = NOT_CACHED
|
||||
|
||||
|
||||
191
src/shared/system-directive.test.ts
Normal file
191
src/shared/system-directive.test.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import {
|
||||
hasSystemReminder,
|
||||
removeSystemReminders,
|
||||
isSystemDirective,
|
||||
createSystemDirective,
|
||||
} from "./system-directive"
|
||||
|
||||
describe("system-directive utilities", () => {
|
||||
describe("hasSystemReminder", () => {
|
||||
test("should return true for messages containing <system-reminder> tags", () => {
|
||||
const text = `<system-reminder>
|
||||
Some system content
|
||||
</system-reminder>`
|
||||
expect(hasSystemReminder(text)).toBe(true)
|
||||
})
|
||||
|
||||
test("should return false for messages without system-reminder tags", () => {
|
||||
const text = "Just a normal user message"
|
||||
expect(hasSystemReminder(text)).toBe(false)
|
||||
})
|
||||
|
||||
test("should be case-insensitive for tag names", () => {
|
||||
const text = `<SYSTEM-REMINDER>content</SYSTEM-REMINDER>`
|
||||
expect(hasSystemReminder(text)).toBe(true)
|
||||
})
|
||||
|
||||
test("should detect system-reminder in mixed content", () => {
|
||||
const text = `User text here
|
||||
<system-reminder>
|
||||
System content
|
||||
</system-reminder>
|
||||
More user text`
|
||||
expect(hasSystemReminder(text)).toBe(true)
|
||||
})
|
||||
|
||||
test("should handle empty system-reminder tags", () => {
|
||||
const text = `<system-reminder></system-reminder>`
|
||||
expect(hasSystemReminder(text)).toBe(true)
|
||||
})
|
||||
|
||||
test("should handle multiline system-reminder content", () => {
|
||||
const text = `<system-reminder>
|
||||
Line 1
|
||||
Line 2
|
||||
Line 3
|
||||
</system-reminder>`
|
||||
expect(hasSystemReminder(text)).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("removeSystemReminders", () => {
|
||||
test("should remove system-reminder tags and content", () => {
|
||||
const text = `<system-reminder>
|
||||
System content that should be removed
|
||||
</system-reminder>`
|
||||
expect(removeSystemReminders(text)).toBe("")
|
||||
})
|
||||
|
||||
test("should preserve user text outside system-reminder tags", () => {
|
||||
const text = `User message here
|
||||
<system-reminder>
|
||||
System content to remove
|
||||
</system-reminder>
|
||||
More user text`
|
||||
const result = removeSystemReminders(text)
|
||||
expect(result).toContain("User message here")
|
||||
expect(result).toContain("More user text")
|
||||
expect(result).not.toContain("System content to remove")
|
||||
})
|
||||
|
||||
test("should remove multiple system-reminder blocks", () => {
|
||||
const text = `<system-reminder>First block</system-reminder>
|
||||
User text
|
||||
<system-reminder>Second block</system-reminder>`
|
||||
const result = removeSystemReminders(text)
|
||||
expect(result).toContain("User text")
|
||||
expect(result).not.toContain("First block")
|
||||
expect(result).not.toContain("Second block")
|
||||
})
|
||||
|
||||
test("should be case-insensitive for tag names", () => {
|
||||
const text = `<SYSTEM-REMINDER>Content</SYSTEM-REMINDER>`
|
||||
expect(removeSystemReminders(text)).toBe("")
|
||||
})
|
||||
|
||||
test("should handle nested tags correctly", () => {
|
||||
const text = `<system-reminder>
|
||||
Outer content
|
||||
<inner>Some inner tag</inner>
|
||||
</system-reminder>`
|
||||
expect(removeSystemReminders(text)).toBe("")
|
||||
})
|
||||
|
||||
test("should trim whitespace from result", () => {
|
||||
const text = `
|
||||
<system-reminder>Remove this</system-reminder>
|
||||
|
||||
User text
|
||||
|
||||
`
|
||||
const result = removeSystemReminders(text)
|
||||
expect(result).toBe("User text")
|
||||
})
|
||||
|
||||
test("should handle empty string input", () => {
|
||||
expect(removeSystemReminders("")).toBe("")
|
||||
})
|
||||
|
||||
test("should handle text with no system-reminder tags", () => {
|
||||
const text = "Just normal user text without any system reminders"
|
||||
expect(removeSystemReminders(text)).toBe(text)
|
||||
})
|
||||
|
||||
test("should preserve code blocks in user text", () => {
|
||||
const text = `Here's some code:
|
||||
\`\`\`javascript
|
||||
const x = 1;
|
||||
\`\`\`
|
||||
<system-reminder>System info</system-reminder>`
|
||||
const result = removeSystemReminders(text)
|
||||
expect(result).toContain("Here's some code:")
|
||||
expect(result).toContain("```javascript")
|
||||
expect(result).not.toContain("System info")
|
||||
})
|
||||
})
|
||||
|
||||
describe("isSystemDirective", () => {
|
||||
test("should return true for OH-MY-OPENCODE system directives", () => {
|
||||
const directive = createSystemDirective("TEST")
|
||||
expect(isSystemDirective(directive)).toBe(true)
|
||||
})
|
||||
|
||||
test("should return false for system-reminder tags", () => {
|
||||
const text = `<system-reminder>content</system-reminder>`
|
||||
expect(isSystemDirective(text)).toBe(false)
|
||||
})
|
||||
|
||||
test("should return false for normal user messages", () => {
|
||||
expect(isSystemDirective("Just a normal message")).toBe(false)
|
||||
})
|
||||
|
||||
test("should handle leading whitespace", () => {
|
||||
const directive = ` ${createSystemDirective("TEST")}`
|
||||
expect(isSystemDirective(directive)).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("integration with keyword detection", () => {
|
||||
test("should prevent search keywords in system-reminders from triggering mode", () => {
|
||||
const text = `<system-reminder>
|
||||
The system will search for the file and find all occurrences.
|
||||
Please locate and scan the directory.
|
||||
</system-reminder>`
|
||||
|
||||
// After removing system reminders, no search keywords should remain
|
||||
const cleanText = removeSystemReminders(text)
|
||||
expect(cleanText).not.toMatch(/\b(search|find|locate|scan)\b/i)
|
||||
})
|
||||
|
||||
test("should preserve search keywords in user text while removing system-reminder keywords", () => {
|
||||
const text = `<system-reminder>
|
||||
System will find and locate files.
|
||||
</system-reminder>
|
||||
|
||||
Please search for the bug in the code.`
|
||||
|
||||
const cleanText = removeSystemReminders(text)
|
||||
expect(cleanText).toContain("search")
|
||||
expect(cleanText).not.toContain("find and locate")
|
||||
})
|
||||
|
||||
test("should handle complex mixed content with multiple modes", () => {
|
||||
const text = `<system-reminder>
|
||||
System will search and investigate.
|
||||
</system-reminder>
|
||||
|
||||
User wants to explore the codebase and analyze the implementation.
|
||||
|
||||
<system-reminder>
|
||||
Another system reminder with research keyword.
|
||||
</system-reminder>`
|
||||
|
||||
const cleanText = removeSystemReminders(text)
|
||||
expect(cleanText).toContain("explore")
|
||||
expect(cleanText).toContain("analyze")
|
||||
expect(cleanText).not.toContain("search and investigate")
|
||||
expect(cleanText).not.toContain("research")
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -26,6 +26,26 @@ export function isSystemDirective(text: string): boolean {
|
||||
return text.trimStart().startsWith(SYSTEM_DIRECTIVE_PREFIX)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a message contains system-generated content that should be excluded
|
||||
* from keyword detection and mode triggering.
|
||||
* @param text - The message text to check
|
||||
* @returns true if the message contains system-reminder tags
|
||||
*/
|
||||
export function hasSystemReminder(text: string): boolean {
|
||||
return /<system-reminder>[\s\S]*?<\/system-reminder>/i.test(text)
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes system-reminder tag content from text.
|
||||
* This prevents automated system messages from triggering mode keywords.
|
||||
* @param text - The message text to clean
|
||||
* @returns text with system-reminder content removed
|
||||
*/
|
||||
export function removeSystemReminders(text: string): string {
|
||||
return text.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/gi, "").trim()
|
||||
}
|
||||
|
||||
export const SystemDirectiveTypes = {
|
||||
TODO_CONTINUATION: "TODO CONTINUATION",
|
||||
RALPH_LOOP: "RALPH LOOP",
|
||||
|
||||
@@ -163,7 +163,10 @@ async function executeSync(
|
||||
body: {
|
||||
parentID: toolContext.sessionID,
|
||||
title: `${args.description} (@${args.subagent_type} subagent)`,
|
||||
},
|
||||
permission: [
|
||||
{ permission: "question", action: "deny" as const, pattern: "*" },
|
||||
],
|
||||
} as any,
|
||||
query: {
|
||||
directory: parentDirectory,
|
||||
},
|
||||
@@ -171,6 +174,17 @@ async function executeSync(
|
||||
|
||||
if (createResult.error) {
|
||||
log(`[call_omo_agent] Session create error:`, createResult.error)
|
||||
const errorStr = String(createResult.error)
|
||||
if (errorStr.toLowerCase().includes("unauthorized")) {
|
||||
return `Error: Failed to create session (Unauthorized). This may be due to:
|
||||
1. OAuth token restrictions (e.g., Claude Code credentials are restricted to Claude Code only)
|
||||
2. Provider authentication issues
|
||||
3. Session permission inheritance problems
|
||||
|
||||
Try using a different provider or API key authentication.
|
||||
|
||||
Original error: ${createResult.error}`
|
||||
}
|
||||
return `Error: Failed to create session: ${createResult.error}`
|
||||
}
|
||||
|
||||
|
||||
39
src/tools/delegate-task/timing.ts
Normal file
39
src/tools/delegate-task/timing.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
let POLL_INTERVAL_MS = 500
|
||||
let MIN_STABILITY_TIME_MS = 10000
|
||||
let STABILITY_POLLS_REQUIRED = 3
|
||||
let WAIT_FOR_SESSION_INTERVAL_MS = 100
|
||||
let WAIT_FOR_SESSION_TIMEOUT_MS = 30000
|
||||
let MAX_POLL_TIME_MS = 10 * 60 * 1000
|
||||
let SESSION_CONTINUATION_STABILITY_MS = 5000
|
||||
|
||||
export function getTimingConfig() {
|
||||
return {
|
||||
POLL_INTERVAL_MS,
|
||||
MIN_STABILITY_TIME_MS,
|
||||
STABILITY_POLLS_REQUIRED,
|
||||
WAIT_FOR_SESSION_INTERVAL_MS,
|
||||
WAIT_FOR_SESSION_TIMEOUT_MS,
|
||||
MAX_POLL_TIME_MS,
|
||||
SESSION_CONTINUATION_STABILITY_MS,
|
||||
}
|
||||
}
|
||||
|
||||
export function __resetTimingConfig(): void {
|
||||
POLL_INTERVAL_MS = 500
|
||||
MIN_STABILITY_TIME_MS = 10000
|
||||
STABILITY_POLLS_REQUIRED = 3
|
||||
WAIT_FOR_SESSION_INTERVAL_MS = 100
|
||||
WAIT_FOR_SESSION_TIMEOUT_MS = 30000
|
||||
MAX_POLL_TIME_MS = 10 * 60 * 1000
|
||||
SESSION_CONTINUATION_STABILITY_MS = 5000
|
||||
}
|
||||
|
||||
export function __setTimingConfig(overrides: Partial<ReturnType<typeof getTimingConfig>>): void {
|
||||
if (overrides.POLL_INTERVAL_MS !== undefined) POLL_INTERVAL_MS = overrides.POLL_INTERVAL_MS
|
||||
if (overrides.MIN_STABILITY_TIME_MS !== undefined) MIN_STABILITY_TIME_MS = overrides.MIN_STABILITY_TIME_MS
|
||||
if (overrides.STABILITY_POLLS_REQUIRED !== undefined) STABILITY_POLLS_REQUIRED = overrides.STABILITY_POLLS_REQUIRED
|
||||
if (overrides.WAIT_FOR_SESSION_INTERVAL_MS !== undefined) WAIT_FOR_SESSION_INTERVAL_MS = overrides.WAIT_FOR_SESSION_INTERVAL_MS
|
||||
if (overrides.WAIT_FOR_SESSION_TIMEOUT_MS !== undefined) WAIT_FOR_SESSION_TIMEOUT_MS = overrides.WAIT_FOR_SESSION_TIMEOUT_MS
|
||||
if (overrides.MAX_POLL_TIME_MS !== undefined) MAX_POLL_TIME_MS = overrides.MAX_POLL_TIME_MS
|
||||
if (overrides.SESSION_CONTINUATION_STABILITY_MS !== undefined) SESSION_CONTINUATION_STABILITY_MS = overrides.SESSION_CONTINUATION_STABILITY_MS
|
||||
}
|
||||
@@ -1,17 +1,35 @@
|
||||
import { describe, test, expect, beforeEach } from "bun:test"
|
||||
import { describe, test, expect, beforeEach, afterEach, spyOn } from "bun:test"
|
||||
import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES } from "./constants"
|
||||
import { resolveCategoryConfig } from "./tools"
|
||||
import type { CategoryConfig } from "../../config/schema"
|
||||
import { __resetModelCache } from "../../shared/model-availability"
|
||||
import { clearSkillCache } from "../../features/opencode-skill-loader/skill-content"
|
||||
import { __setTimingConfig, __resetTimingConfig } from "./timing"
|
||||
import * as connectedProvidersCache from "../../shared/connected-providers-cache"
|
||||
|
||||
// Test constants - systemDefaultModel is required by resolveCategoryConfig
|
||||
const SYSTEM_DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
|
||||
|
||||
describe("sisyphus-task", () => {
|
||||
let cacheSpy: ReturnType<typeof spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
__resetModelCache()
|
||||
clearSkillCache()
|
||||
__setTimingConfig({
|
||||
POLL_INTERVAL_MS: 10,
|
||||
MIN_STABILITY_TIME_MS: 50,
|
||||
STABILITY_POLLS_REQUIRED: 1,
|
||||
WAIT_FOR_SESSION_INTERVAL_MS: 10,
|
||||
WAIT_FOR_SESSION_TIMEOUT_MS: 1000,
|
||||
MAX_POLL_TIME_MS: 2000,
|
||||
SESSION_CONTINUATION_STABILITY_MS: 50,
|
||||
})
|
||||
cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["anthropic", "google", "openai"])
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
__resetTimingConfig()
|
||||
cacheSpy?.mockRestore()
|
||||
})
|
||||
|
||||
describe("DEFAULT_CATEGORIES", () => {
|
||||
@@ -533,7 +551,7 @@ describe("sisyphus-task", () => {
|
||||
})
|
||||
})
|
||||
|
||||
test.skip("DEFAULT_CATEGORIES variant passes to sync session.prompt WITHOUT userCategories", async () => {
|
||||
test("DEFAULT_CATEGORIES variant passes to sync session.prompt WITHOUT userCategories", async () => {
|
||||
// #given - NO userCategories, testing DEFAULT_CATEGORIES for sync mode
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let promptBody: any
|
||||
@@ -583,12 +601,12 @@ describe("sisyphus-task", () => {
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - variant MUST be "max" from DEFAULT_CATEGORIES
|
||||
// #then - variant MUST be "max" from DEFAULT_CATEGORIES (passed as separate field)
|
||||
expect(promptBody.model).toEqual({
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-opus-4-5",
|
||||
variant: "max",
|
||||
})
|
||||
expect(promptBody.variant).toBe("max")
|
||||
}, { timeout: 20000 })
|
||||
})
|
||||
|
||||
@@ -1874,4 +1892,250 @@ describe("sisyphus-task", () => {
|
||||
expect(resolved!.model).toBe(systemDefaultModel)
|
||||
})
|
||||
})
|
||||
|
||||
describe("prometheus self-delegation block", () => {
|
||||
test("prometheus cannot delegate to prometheus - returns error with guidance", async () => {
|
||||
// #given - current agent is prometheus
|
||||
const { createDelegateTask } = require("./tools")
|
||||
|
||||
const mockManager = { launch: async () => ({}) }
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "test-session" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({ data: [] }),
|
||||
status: async () => ({ data: {} }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "prometheus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - prometheus tries to delegate to prometheus
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test self-delegation block",
|
||||
prompt: "Create a plan",
|
||||
subagent_type: "prometheus",
|
||||
run_in_background: false,
|
||||
load_skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should return error telling prometheus to create plan directly
|
||||
expect(result).toContain("prometheus")
|
||||
expect(result).toContain("directly")
|
||||
})
|
||||
|
||||
test("non-prometheus agent CAN delegate to prometheus - proceeds normally", async () => {
|
||||
// #given - current agent is sisyphus
|
||||
const { createDelegateTask } = require("./tools")
|
||||
|
||||
const mockManager = { launch: async () => ({}) }
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_prometheus_allowed" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({
|
||||
data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created successfully" }] }]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_prometheus_allowed": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - sisyphus delegates to prometheus
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test prometheus delegation from non-prometheus agent",
|
||||
prompt: "Create a plan",
|
||||
subagent_type: "prometheus",
|
||||
run_in_background: false,
|
||||
load_skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should proceed normally
|
||||
expect(result).not.toContain("Cannot delegate")
|
||||
expect(result).toContain("Plan created successfully")
|
||||
}, { timeout: 20000 })
|
||||
|
||||
test("case-insensitive: Prometheus (capitalized) cannot delegate to prometheus", async () => {
|
||||
// #given - current agent is Prometheus (capitalized)
|
||||
const { createDelegateTask } = require("./tools")
|
||||
|
||||
const mockManager = { launch: async () => ({}) }
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "test-session" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({ data: [] }),
|
||||
status: async () => ({ data: {} }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "Prometheus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - Prometheus tries to delegate to prometheus
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test case-insensitive block",
|
||||
prompt: "Create a plan",
|
||||
subagent_type: "prometheus",
|
||||
run_in_background: false,
|
||||
load_skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should still return error
|
||||
expect(result).toContain("prometheus")
|
||||
expect(result).toContain("directly")
|
||||
})
|
||||
})
|
||||
|
||||
describe("prometheus subagent delegate_task permission", () => {
|
||||
test("prometheus subagent should have delegate_task permission enabled", async () => {
|
||||
// #given - sisyphus delegates to prometheus
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let promptBody: any
|
||||
|
||||
const mockManager = { launch: async () => ({}) }
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_prometheus_delegate" } }),
|
||||
prompt: async (input: any) => {
|
||||
promptBody = input.body
|
||||
return { data: {} }
|
||||
},
|
||||
messages: async () => ({
|
||||
data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_prometheus_delegate": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - sisyphus delegates to prometheus
|
||||
await tool.execute(
|
||||
{
|
||||
description: "Test prometheus delegate_task permission",
|
||||
prompt: "Create a plan",
|
||||
subagent_type: "prometheus",
|
||||
run_in_background: false,
|
||||
load_skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - prometheus should have delegate_task permission
|
||||
expect(promptBody.tools.delegate_task).toBe(true)
|
||||
}, { timeout: 20000 })
|
||||
|
||||
test("non-prometheus subagent should NOT have delegate_task permission", async () => {
|
||||
// #given - sisyphus delegates to oracle (non-prometheus)
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let promptBody: any
|
||||
|
||||
const mockManager = { launch: async () => ({}) }
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [{ name: "oracle", mode: "subagent" }] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_oracle_no_delegate" } }),
|
||||
prompt: async (input: any) => {
|
||||
promptBody = input.body
|
||||
return { data: {} }
|
||||
},
|
||||
messages: async () => ({
|
||||
data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Consultation done" }] }]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_oracle_no_delegate": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - sisyphus delegates to oracle
|
||||
await tool.execute(
|
||||
{
|
||||
description: "Test oracle no delegate_task permission",
|
||||
prompt: "Consult on architecture",
|
||||
subagent_type: "oracle",
|
||||
run_in_background: false,
|
||||
load_skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - oracle should NOT have delegate_task permission
|
||||
expect(promptBody.tools.delegate_task).toBe(false)
|
||||
}, { timeout: 20000 })
|
||||
})
|
||||
})
|
||||
|
||||
@@ -5,6 +5,7 @@ import type { BackgroundManager } from "../../features/background-agent"
|
||||
import type { DelegateTaskArgs } from "./types"
|
||||
import type { CategoryConfig, CategoriesConfig, GitMasterConfig, BrowserAutomationProvider } from "../../config/schema"
|
||||
import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, PLAN_AGENT_SYSTEM_PREPEND, isPlanAgent } from "./constants"
|
||||
import { getTimingConfig } from "./timing"
|
||||
import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
|
||||
import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
|
||||
import { discoverSkills } from "../../features/opencode-skill-loader"
|
||||
@@ -409,9 +410,10 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`
|
||||
}
|
||||
|
||||
// Wait for message stability after prompt completes
|
||||
const POLL_INTERVAL_MS = 500
|
||||
const MIN_STABILITY_TIME_MS = 5000
|
||||
const STABILITY_POLLS_REQUIRED = 3
|
||||
const timing = getTimingConfig()
|
||||
const POLL_INTERVAL_MS = timing.POLL_INTERVAL_MS
|
||||
const MIN_STABILITY_TIME_MS = timing.SESSION_CONTINUATION_STABILITY_MS
|
||||
const STABILITY_POLLS_REQUIRED = timing.STABILITY_POLLS_REQUIRED
|
||||
const pollStart = Date.now()
|
||||
let lastMsgCount = 0
|
||||
let stablePolls = 0
|
||||
@@ -535,7 +537,7 @@ To continue this session: session_id="${args.session_id}"`
|
||||
}
|
||||
} else {
|
||||
const resolution = resolveModelWithFallback({
|
||||
userModel: userCategories?.[args.category]?.model ?? sisyphusJuniorModel,
|
||||
userModel: userCategories?.[args.category]?.model ?? resolved.model ?? sisyphusJuniorModel,
|
||||
fallbackChain: requirement.fallbackChain,
|
||||
availableModels,
|
||||
systemDefaultModel,
|
||||
@@ -565,7 +567,7 @@ To continue this session: session_id="${args.session_id}"`
|
||||
modelInfo = { model: actualModel, type, source }
|
||||
|
||||
const parsedModel = parseModelString(actualModel)
|
||||
const variantToUse = userCategories?.[args.category]?.variant ?? resolvedVariant
|
||||
const variantToUse = userCategories?.[args.category]?.variant ?? resolvedVariant ?? resolved.config.variant
|
||||
categoryModel = parsedModel
|
||||
? (variantToUse ? { ...parsedModel, variant: variantToUse } : parsedModel)
|
||||
: undefined
|
||||
@@ -662,10 +664,11 @@ Available categories: ${categoryNames.join(", ")}`
|
||||
const startTime = new Date()
|
||||
|
||||
// Poll for completion (same logic as sync mode)
|
||||
const POLL_INTERVAL_MS = 500
|
||||
const MAX_POLL_TIME_MS = 10 * 60 * 1000
|
||||
const MIN_STABILITY_TIME_MS = 10000
|
||||
const STABILITY_POLLS_REQUIRED = 3
|
||||
const timingCfg = getTimingConfig()
|
||||
const POLL_INTERVAL_MS = timingCfg.POLL_INTERVAL_MS
|
||||
const MAX_POLL_TIME_MS = timingCfg.MAX_POLL_TIME_MS
|
||||
const MIN_STABILITY_TIME_MS = timingCfg.MIN_STABILITY_TIME_MS
|
||||
const STABILITY_POLLS_REQUIRED = timingCfg.STABILITY_POLLS_REQUIRED
|
||||
const pollStart = Date.now()
|
||||
let lastMsgCount = 0
|
||||
let stablePolls = 0
|
||||
@@ -765,6 +768,12 @@ To continue this session: session_id="${sessionID}"`
|
||||
Sisyphus-Junior is spawned automatically when you specify a category. Pick the appropriate category for your task domain.`
|
||||
}
|
||||
|
||||
if (isPlanAgent(agentName) && isPlanAgent(parentAgent)) {
|
||||
return `You are prometheus. You cannot delegate to prometheus via delegate_task.
|
||||
|
||||
Create the work plan directly - that's your job as the planning agent.`
|
||||
}
|
||||
|
||||
agentToUse = agentName
|
||||
|
||||
// Validate agent exists and is callable (not a primary agent)
|
||||
@@ -924,6 +933,7 @@ To continue this session: session_id="${task.sessionID}"`
|
||||
})
|
||||
|
||||
try {
|
||||
const allowDelegateTask = isPlanAgent(agentToUse)
|
||||
await client.session.prompt({
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
@@ -931,7 +941,7 @@ To continue this session: session_id="${task.sessionID}"`
|
||||
system: systemContent,
|
||||
tools: {
|
||||
task: false,
|
||||
delegate_task: false,
|
||||
delegate_task: allowDelegateTask,
|
||||
call_omo_agent: true,
|
||||
question: false,
|
||||
},
|
||||
@@ -965,10 +975,11 @@ To continue this session: session_id="${task.sessionID}"`
|
||||
|
||||
// Poll for session completion with stability detection
|
||||
// The session may show as "idle" before messages appear, so we also check message stability
|
||||
const POLL_INTERVAL_MS = 500
|
||||
const MAX_POLL_TIME_MS = 10 * 60 * 1000
|
||||
const MIN_STABILITY_TIME_MS = 10000 // Minimum 10s before accepting completion
|
||||
const STABILITY_POLLS_REQUIRED = 3
|
||||
const syncTiming = getTimingConfig()
|
||||
const POLL_INTERVAL_MS = syncTiming.POLL_INTERVAL_MS
|
||||
const MAX_POLL_TIME_MS = syncTiming.MAX_POLL_TIME_MS
|
||||
const MIN_STABILITY_TIME_MS = syncTiming.MIN_STABILITY_TIME_MS
|
||||
const STABILITY_POLLS_REQUIRED = syncTiming.STABILITY_POLLS_REQUIRED
|
||||
const pollStart = Date.now()
|
||||
let lastMsgCount = 0
|
||||
let stablePolls = 0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import { normalizeArgs, validateArgs } from "./tools"
|
||||
import { normalizeArgs, validateArgs, createLookAt } from "./tools"
|
||||
|
||||
describe("look-at tool", () => {
|
||||
describe("normalizeArgs", () => {
|
||||
@@ -70,4 +70,80 @@ describe("look-at tool", () => {
|
||||
expect(error).toContain("file_path")
|
||||
})
|
||||
})
|
||||
|
||||
describe("createLookAt error handling", () => {
|
||||
// #given session.prompt에서 JSON parse 에러 발생
|
||||
// #when LookAt 도구 실행
|
||||
// #then 사용자 친화적 에러 메시지 반환
|
||||
test("handles JSON parse error from session.prompt gracefully", async () => {
|
||||
const mockClient = {
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_test_json_error" } }),
|
||||
prompt: async () => {
|
||||
throw new Error("JSON Parse error: Unexpected EOF")
|
||||
},
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createLookAt({
|
||||
client: mockClient,
|
||||
directory: "/project",
|
||||
} as any)
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
const result = await tool.execute(
|
||||
{ file_path: "/test/file.png", goal: "analyze image" },
|
||||
toolContext
|
||||
)
|
||||
|
||||
expect(result).toContain("Error: Failed to analyze file")
|
||||
expect(result).toContain("malformed response")
|
||||
expect(result).toContain("multimodal-looker")
|
||||
expect(result).toContain("image/png")
|
||||
})
|
||||
|
||||
// #given session.prompt에서 일반 에러 발생
|
||||
// #when LookAt 도구 실행
|
||||
// #then 원본 에러 메시지 포함한 에러 반환
|
||||
test("handles generic prompt error gracefully", async () => {
|
||||
const mockClient = {
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_test_generic_error" } }),
|
||||
prompt: async () => {
|
||||
throw new Error("Network connection failed")
|
||||
},
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createLookAt({
|
||||
client: mockClient,
|
||||
directory: "/project",
|
||||
} as any)
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
const result = await tool.execute(
|
||||
{ file_path: "/test/file.pdf", goal: "extract text" },
|
||||
toolContext
|
||||
)
|
||||
|
||||
expect(result).toContain("Error: Failed to send prompt")
|
||||
expect(result).toContain("Network connection failed")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -102,7 +102,10 @@ If the requested information is not found, clearly state what is missing.`
|
||||
body: {
|
||||
parentID: toolContext.sessionID,
|
||||
title: `look_at: ${args.goal.substring(0, 50)}`,
|
||||
},
|
||||
permission: [
|
||||
{ permission: "question", action: "deny" as const, pattern: "*" },
|
||||
],
|
||||
} as any,
|
||||
query: {
|
||||
directory: parentDirectory,
|
||||
},
|
||||
@@ -110,6 +113,17 @@ If the requested information is not found, clearly state what is missing.`
|
||||
|
||||
if (createResult.error) {
|
||||
log(`[look_at] Session create error:`, createResult.error)
|
||||
const errorStr = String(createResult.error)
|
||||
if (errorStr.toLowerCase().includes("unauthorized")) {
|
||||
return `Error: Failed to create session (Unauthorized). This may be due to:
|
||||
1. OAuth token restrictions (e.g., Claude Code credentials are restricted to Claude Code only)
|
||||
2. Provider authentication issues
|
||||
3. Session permission inheritance problems
|
||||
|
||||
Try using a different provider or API key authentication.
|
||||
|
||||
Original error: ${createResult.error}`
|
||||
}
|
||||
return `Error: Failed to create session: ${createResult.error}`
|
||||
}
|
||||
|
||||
@@ -117,22 +131,49 @@ If the requested information is not found, clearly state what is missing.`
|
||||
log(`[look_at] Created session: ${sessionID}`)
|
||||
|
||||
log(`[look_at] Sending prompt with file passthrough to session ${sessionID}`)
|
||||
await ctx.client.session.prompt({
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
agent: MULTIMODAL_LOOKER_AGENT,
|
||||
tools: {
|
||||
task: false,
|
||||
call_omo_agent: false,
|
||||
look_at: false,
|
||||
read: false,
|
||||
try {
|
||||
await ctx.client.session.prompt({
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
agent: MULTIMODAL_LOOKER_AGENT,
|
||||
tools: {
|
||||
task: false,
|
||||
call_omo_agent: false,
|
||||
look_at: false,
|
||||
read: false,
|
||||
},
|
||||
parts: [
|
||||
{ type: "text", text: prompt },
|
||||
{ type: "file", mime: mimeType, url: pathToFileURL(args.file_path).href, filename },
|
||||
],
|
||||
},
|
||||
parts: [
|
||||
{ type: "text", text: prompt },
|
||||
{ type: "file", mime: mimeType, url: pathToFileURL(args.file_path).href, filename },
|
||||
],
|
||||
},
|
||||
})
|
||||
})
|
||||
} catch (promptError) {
|
||||
const errorMessage = promptError instanceof Error ? promptError.message : String(promptError)
|
||||
log(`[look_at] Prompt error:`, promptError)
|
||||
|
||||
const isJsonParseError = errorMessage.includes("JSON") && (errorMessage.includes("EOF") || errorMessage.includes("parse"))
|
||||
if (isJsonParseError) {
|
||||
return `Error: Failed to analyze file - received malformed response from multimodal-looker agent.
|
||||
|
||||
This typically occurs when:
|
||||
1. The multimodal-looker model is not available or not connected
|
||||
2. The model does not support this file type (${mimeType})
|
||||
3. The API returned an empty or truncated response
|
||||
|
||||
File: ${args.file_path}
|
||||
MIME type: ${mimeType}
|
||||
|
||||
Try:
|
||||
- Ensure a vision-capable model (e.g., gemini-3-flash, gpt-5.2) is available
|
||||
- Check provider connections in opencode settings
|
||||
- For text files like .md, .txt, use the Read tool instead
|
||||
|
||||
Original error: ${errorMessage}`
|
||||
}
|
||||
|
||||
return `Error: Failed to send prompt to multimodal-looker agent: ${errorMessage}`
|
||||
}
|
||||
|
||||
log(`[look_at] Prompt sent, fetching messages...`)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user