diff --git a/.github/workflows/publish-platform.yml b/.github/workflows/publish-platform.yml index 96f1be24b..3b9d2d001 100644 --- a/.github/workflows/publish-platform.yml +++ b/.github/workflows/publish-platform.yml @@ -56,10 +56,33 @@ jobs: env: BUN_INSTALL_ALLOW_SCRIPTS: "@ast-grep/napi" + - name: Validate release inputs + id: validate + env: + INPUT_VERSION: ${{ inputs.version }} + INPUT_DIST_TAG: ${{ inputs.dist_tag }} + run: | + VERSION="$INPUT_VERSION" + DIST_TAG="$INPUT_DIST_TAG" + + if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[0-9A-Za-z]+(\.[0-9A-Za-z]+)*)?$ ]]; then + echo "::error::Invalid version: $VERSION" + exit 1 + fi + + if [ -n "$DIST_TAG" ] && ! [[ "$DIST_TAG" =~ ^[a-z][a-z0-9-]*$ ]]; then + echo "::error::Invalid dist_tag: $DIST_TAG" + exit 1 + fi + + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "dist_tag=$DIST_TAG" >> $GITHUB_OUTPUT + - name: Check if already published id: check + env: + VERSION: ${{ steps.validate.outputs.version }} run: | - VERSION="${{ inputs.version }}" PLATFORM_KEY="${{ matrix.platform }}" PLATFORM_KEY="${PLATFORM_KEY//-/_}" @@ -96,15 +119,18 @@ jobs: - name: Update version in package.json if: steps.check.outputs.skip != 'true' + env: + VERSION: ${{ steps.validate.outputs.version }} run: | - VERSION="${{ inputs.version }}" cd packages/${{ matrix.platform }} jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json - name: Set root package version if: steps.check.outputs.skip != 'true' + env: + VERSION: ${{ steps.validate.outputs.version }} run: | - jq --arg v "${{ inputs.version }}" '.version = $v' package.json > tmp.json && mv tmp.json package.json + jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json - name: Pre-download baseline compile target if: steps.check.outputs.skip != 'true' && endsWith(matrix.platform, '-baseline') @@ -226,11 +252,33 @@ jobs: matrix: platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline] steps: + - name: Validate release inputs + id: validate + env: + INPUT_VERSION: ${{ inputs.version }} + INPUT_DIST_TAG: ${{ inputs.dist_tag }} + run: | + VERSION="$INPUT_VERSION" + DIST_TAG="$INPUT_DIST_TAG" + + if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[0-9A-Za-z]+(\.[0-9A-Za-z]+)*)?$ ]]; then + echo "::error::Invalid version: $VERSION" + exit 1 + fi + + if [ -n "$DIST_TAG" ] && ! [[ "$DIST_TAG" =~ ^[a-z][a-z0-9-]*$ ]]; then + echo "::error::Invalid dist_tag: $DIST_TAG" + exit 1 + fi + + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "dist_tag=$DIST_TAG" >> $GITHUB_OUTPUT + - name: Check if already published id: check + env: + VERSION: ${{ steps.validate.outputs.version }} run: | - VERSION="${{ inputs.version }}" - OC_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode-${{ matrix.platform }}/${VERSION}") OA_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent-${{ matrix.platform }}/${VERSION}") @@ -288,38 +336,38 @@ jobs: - name: Publish oh-my-opencode-${{ matrix.platform }} if: steps.check.outputs.skip_opencode != 'true' && steps.download.outcome == 'success' - run: | - cd packages/${{ matrix.platform }} - - TAG_ARG="" - if [ -n "${{ inputs.dist_tag }}" ]; then - TAG_ARG="--tag ${{ inputs.dist_tag }}" - fi - - npm publish --access public --provenance $TAG_ARG env: + DIST_TAG: ${{ steps.validate.outputs.dist_tag }} NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} NPM_CONFIG_PROVENANCE: true + run: | + cd packages/${{ matrix.platform }} + + if [ -n "$DIST_TAG" ]; then + npm publish --access public --provenance --tag "$DIST_TAG" + else + npm publish --access public --provenance + fi timeout-minutes: 15 - name: Publish oh-my-openagent-${{ matrix.platform }} if: steps.check.outputs.skip_openagent != 'true' && steps.download.outcome == 'success' + env: + DIST_TAG: ${{ steps.validate.outputs.dist_tag }} + NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} + NPM_CONFIG_PROVENANCE: true run: | cd packages/${{ matrix.platform }} - + # Rename package for oh-my-openagent jq --arg name "oh-my-openagent-${{ matrix.platform }}" \ --arg desc "Platform-specific binary for oh-my-openagent (${{ matrix.platform }})" \ '.name = $name | .description = $desc | .bin = {"oh-my-openagent": (.bin | to_entries | .[0].value)}' \ package.json > tmp.json && mv tmp.json package.json - - TAG_ARG="" - if [ -n "${{ inputs.dist_tag }}" ]; then - TAG_ARG="--tag ${{ inputs.dist_tag }}" + + if [ -n "$DIST_TAG" ]; then + npm publish --access public --provenance --tag "$DIST_TAG" + else + npm publish --access public --provenance fi - - npm publish --access public --provenance $TAG_ARG - env: - NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} - NPM_CONFIG_PROVENANCE: true timeout-minutes: 15 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 1b3f78f4e..5179cdd32 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -167,33 +167,47 @@ jobs: - name: Calculate version id: version + env: + RAW_VERSION: ${{ inputs.version }} + BUMP: ${{ inputs.bump }} run: | - VERSION="${{ inputs.version }}" + VERSION="$RAW_VERSION" if [ -z "$VERSION" ]; then PREV=$(curl -s https://registry.npmjs.org/oh-my-opencode/latest | jq -r '.version // "0.0.0"') BASE="${PREV%%-*}" IFS='.' read -r MAJOR MINOR PATCH <<< "$BASE" - case "${{ inputs.bump }}" in + case "$BUMP" in major) VERSION="$((MAJOR+1)).0.0" ;; minor) VERSION="${MAJOR}.$((MINOR+1)).0" ;; *) VERSION="${MAJOR}.${MINOR}.$((PATCH+1))" ;; esac fi + + if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[0-9A-Za-z]+(\.[0-9A-Za-z]+)*)?$ ]]; then + echo "::error::Invalid version: $VERSION" + exit 1 + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT - + if [[ "$VERSION" == *"-"* ]]; then - DIST_TAG=$(echo "$VERSION" | cut -d'-' -f2 | cut -d'.' -f1) + DIST_TAG=$(printf '%s' "$VERSION" | cut -d'-' -f2 | cut -d'.' -f1) + if ! [[ "$DIST_TAG" =~ ^[a-z][a-z0-9-]*$ ]]; then + echo "::error::Invalid dist_tag: $DIST_TAG" + exit 1 + fi echo "dist_tag=${DIST_TAG:-next}" >> $GITHUB_OUTPUT else echo "dist_tag=" >> $GITHUB_OUTPUT fi - + echo "Version: $VERSION" - name: Check if already published id: check + env: + VERSION: ${{ steps.version.outputs.version }} run: | - VERSION="${{ steps.version.outputs.version }}" STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode/${VERSION}") if [ "$STATUS" = "200" ]; then echo "skip=true" >> $GITHUB_OUTPUT @@ -204,15 +218,16 @@ jobs: - name: Update version if: steps.check.outputs.skip != 'true' + env: + VERSION: ${{ steps.version.outputs.version }} run: | - VERSION="${{ steps.version.outputs.version }}" jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json - + for platform in darwin-arm64 darwin-x64 darwin-x64-baseline linux-x64 linux-x64-baseline linux-arm64 linux-x64-musl linux-x64-musl-baseline linux-arm64-musl windows-x64 windows-x64-baseline; do jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json mv tmp.json "packages/${platform}/package.json" done - + jq --arg v "$VERSION" '.optionalDependencies = (.optionalDependencies | to_entries | map(.value = $v) | from_entries)' package.json > tmp.json && mv tmp.json package.json - name: Build main package @@ -225,33 +240,22 @@ jobs: - name: Publish oh-my-opencode if: steps.check.outputs.skip != 'true' - run: | - TAG_ARG="" - if [ -n "${{ steps.version.outputs.dist_tag }}" ]; then - TAG_ARG="--tag ${{ steps.version.outputs.dist_tag }}" - fi - npm publish --access public --provenance $TAG_ARG env: + DIST_TAG: ${{ steps.version.outputs.dist_tag }} NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} NPM_CONFIG_PROVENANCE: true - - - name: Commit version bump - if: steps.check.outputs.skip != 'true' run: | - git config user.email "github-actions[bot]@users.noreply.github.com" - git config user.name "github-actions[bot]" - git add package.json packages/*/package.json - git diff --cached --quiet || git commit -m "release: v${{ steps.version.outputs.version }}" - git tag -f "v${{ steps.version.outputs.version }}" - git push origin --tags --force - git push origin HEAD - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + if [ -n "$DIST_TAG" ]; then + npm publish --access public --provenance --tag "$DIST_TAG" + else + npm publish --access public --provenance + fi - name: Check if oh-my-openagent already published id: check-openagent + env: + VERSION: ${{ steps.version.outputs.version }} run: | - VERSION="${{ steps.version.outputs.version }}" STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent/${VERSION}") if [ "$STATUS" = "200" ]; then echo "skip=true" >> $GITHUB_OUTPUT @@ -262,9 +266,12 @@ jobs: - name: Publish oh-my-openagent if: steps.check-openagent.outputs.skip != 'true' + env: + VERSION: ${{ steps.version.outputs.version }} + DIST_TAG: ${{ steps.version.outputs.dist_tag }} + NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} + NPM_CONFIG_PROVENANCE: true run: | - VERSION="${{ steps.version.outputs.version }}" - # Update package name, version, and optionalDependencies for oh-my-openagent jq --arg v "$VERSION" ' .name = "oh-my-openagent" | @@ -275,39 +282,31 @@ jobs: from_entries ) ' package.json > tmp.json && mv tmp.json package.json - - TAG_ARG="" - if [ -n "${{ steps.version.outputs.dist_tag }}" ]; then - TAG_ARG="--tag ${{ steps.version.outputs.dist_tag }}" + + if [ -n "$DIST_TAG" ]; then + npm publish --access public --provenance --tag "$DIST_TAG" + else + npm publish --access public --provenance fi - npm publish --access public --provenance $TAG_ARG || echo "::warning::oh-my-openagent publish failed" - env: - NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} - NPM_CONFIG_PROVENANCE: true - name: Restore package.json - if: steps.check-openagent.outputs.skip != 'true' + if: always() && steps.check-openagent.outputs.skip != 'true' run: | git checkout -- package.json - trigger-platform: - runs-on: ubuntu-latest + publish-platform: needs: publish-main if: inputs.skip_platform != true - steps: - - name: Trigger platform publish workflow - run: | - gh workflow run publish-platform.yml \ - --repo ${{ github.repository }} \ - --ref ${{ github.ref }} \ - -f version=${{ needs.publish-main.outputs.version }} \ - -f dist_tag=${{ needs.publish-main.outputs.dist_tag }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/publish-platform.yml + with: + version: ${{ needs.publish-main.outputs.version }} + dist_tag: ${{ needs.publish-main.outputs.dist_tag }} + secrets: inherit release: runs-on: ubuntu-latest - needs: publish-main + needs: [publish-main, publish-platform] + if: always() && needs.publish-main.result == 'success' && (inputs.skip_platform == true || needs.publish-platform.result == 'success') steps: - uses: actions/checkout@v4 with: @@ -331,13 +330,53 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Create GitHub release + - name: Apply release version to source tree + env: + VERSION: ${{ needs.publish-main.outputs.version }} + run: | + jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json + + for platform in darwin-arm64 darwin-x64 darwin-x64-baseline linux-x64 linux-x64-baseline linux-arm64 linux-x64-musl linux-x64-musl-baseline linux-arm64-musl windows-x64 windows-x64-baseline; do + jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json + mv tmp.json "packages/${platform}/package.json" + done + + jq --arg v "$VERSION" '.optionalDependencies = (.optionalDependencies | to_entries | map(.value = $v) | from_entries)' package.json > tmp.json && mv tmp.json package.json + + - name: Commit version bump + env: + VERSION: ${{ needs.publish-main.outputs.version }} + run: | + git config user.email "github-actions[bot]@users.noreply.github.com" + git config user.name "github-actions[bot]" + git add package.json packages/*/package.json + git diff --cached --quiet || git commit -m "release: v${VERSION}" + + - name: Create release tag + env: + VERSION: ${{ needs.publish-main.outputs.version }} + run: | + if git rev-parse "v${VERSION}" >/dev/null 2>&1; then + echo "::error::Tag v${VERSION} already exists" + exit 1 + fi + git tag "v${VERSION}" + + - name: Push release state + env: + VERSION: ${{ needs.publish-main.outputs.version }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git push origin HEAD + git push origin "v${VERSION}" + + - name: Create GitHub release + env: + VERSION: ${{ needs.publish-main.outputs.version }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - VERSION="${{ needs.publish-main.outputs.version }}" gh release view "v${VERSION}" >/dev/null 2>&1 || \ gh release create "v${VERSION}" --title "v${VERSION}" --notes-file /tmp/changelog.md - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Delete draft release run: gh release delete next --yes 2>/dev/null || true @@ -346,13 +385,13 @@ jobs: - name: Merge to master continue-on-error: true + env: + VERSION: ${{ needs.publish-main.outputs.version }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - VERSION="${{ needs.publish-main.outputs.version }}" git stash --include-untracked || true git checkout master git reset --hard "v${VERSION}" git push -f origin master || echo "::warning::Failed to push to master" - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/assets/oh-my-opencode.schema.json b/assets/oh-my-opencode.schema.json index e9c7a9e88..832de4432 100644 --- a/assets/oh-my-opencode.schema.json +++ b/assets/oh-my-opencode.schema.json @@ -4885,6 +4885,11 @@ "additionalProperties": false }, "git_master": { + "default": { + "commit_footer": true, + "include_co_authored_by": true, + "git_env_prefix": "GIT_MASTER=1" + }, "type": "object", "properties": { "commit_footer": { @@ -5035,5 +5040,8 @@ } } }, + "required": [ + "git_master" + ], "additionalProperties": false } \ No newline at end of file diff --git a/docs/guide/agent-model-matching.md b/docs/guide/agent-model-matching.md index aed35feca..3924d95bf 100644 --- a/docs/guide/agent-model-matching.md +++ b/docs/guide/agent-model-matching.md @@ -92,8 +92,8 @@ These agents do grep, search, and retrieval. They intentionally use the fastest, | Agent | Role | Fallback Chain | Notes | | --------------------- | ------------------ | ---------------------------------------------- | ----------------------------------------------------- | -| **Explore** | Fast codebase grep | Grok Code Fast → opencode-go/minimax-m2.7 → opencode/minimax-m2.5 → Haiku → GPT-5-Nano | Speed is everything. Fire 10 in parallel. Uses opencode-go/minimax-m2.7 where the provider catalog exposes it, falling back to opencode/minimax-m2.5. | -| **Librarian** | Docs/code search | opencode-go/minimax-m2.7 → opencode/minimax-m2.5 → Haiku → GPT-5-Nano | Doc retrieval doesn't need deep reasoning. Uses opencode-go/minimax-m2.7 where the provider catalog exposes it, falling back to opencode/minimax-m2.5. | +| **Explore** | Fast codebase grep | Grok Code Fast → opencode-go/minimax-m2.7-highspeed → opencode/minimax-m2.7 → Haiku → GPT-5-Nano | Speed is everything. Fire 10 in parallel. Uses the high-speed OpenCode Go MiniMax entry first, then the standard OpenCode Zen MiniMax fallback. | +| **Librarian** | Docs/code search | opencode-go/minimax-m2.7 → opencode/minimax-m2.7-highspeed → Haiku → GPT-5-Nano | Doc retrieval doesn't need deep reasoning. Uses OpenCode Go MiniMax first, then the OpenCode Zen high-speed MiniMax fallback. | | **Multimodal Looker** | Vision/screenshots | GPT-5.4 → opencode-go/kimi-k2.5 → GLM-4.6v → GPT-5-Nano | Uses the first available multimodal-capable fallback. | | **Sisyphus-Junior** | Category executor | Claude Sonnet → opencode-go/kimi-k2.5 → GPT-5.4 → MiniMax M2.7 → Big Pickle | Handles delegated category tasks. Sonnet-tier default. | @@ -131,8 +131,8 @@ Principle-driven, explicit reasoning, deep technical capability. Best for agents | **Gemini 3.1 Pro** | Excels at visual/frontend tasks. Different reasoning style. Default for `visual-engineering` and `artistry`. | | **Gemini 3 Flash** | Fast. Good for doc search and light tasks. | | **Grok Code Fast 1** | Blazing fast code grep. Default for Explore agent. | -| **MiniMax M2.7** | Fast and smart. Used where provider catalogs expose the newer MiniMax line, especially through OpenCode Go. | -| **MiniMax M2.5** | Legacy OpenCode catalog entry still used in some fallback chains for compatibility. | +| **MiniMax M2.7** | Fast and smart. Used in OpenCode Go and OpenCode Zen utility fallback chains. | +| **MiniMax M2.7 Highspeed** | High-speed OpenCode catalog entry used in utility fallback chains that prefer the fastest available MiniMax path. | ### OpenCode Go @@ -144,7 +144,8 @@ A premium subscription tier ($10/month) that provides reliable access to Chinese | ------------------------ | --------------------------------------------------------------------- | | **opencode-go/kimi-k2.5** | Vision-capable, Claude-like reasoning. Used by Sisyphus, Atlas, Sisyphus-Junior, Multimodal Looker. | | **opencode-go/glm-5** | Text-only orchestration model. Used by Oracle, Prometheus, Metis, Momus. | -| **opencode-go/minimax-m2.7** | Ultra-cheap, fast responses. Used by Librarian, Explore, Atlas, and Sisyphus-Junior for utility work. | +| **opencode-go/minimax-m2.7** | Ultra-cheap, fast responses. Used by Librarian, Atlas, and Sisyphus-Junior for utility work. | +| **opencode-go/minimax-m2.7-highspeed** | Even faster OpenCode Go MiniMax entry used by Explore when the high-speed catalog entry is available. | **When It Gets Used:** @@ -156,7 +157,7 @@ Some model identifiers like `k2p5` (paid Kimi K2.5) and `glm-5` may only be avai ### About Free-Tier Fallbacks -You may see model names like `kimi-k2.5-free`, `minimax-m2.7`, `minimax-m2.5`, or `big-pickle` (GLM 4.6) in the source code or logs. These are provider-specific or speed-optimized entries in fallback chains. The exact MiniMax model can differ by provider catalog. +You may see model names like `kimi-k2.5-free`, `minimax-m2.7`, `minimax-m2.7-highspeed`, or `big-pickle` (GLM 4.6) in the source code or logs. These are provider-specific or speed-optimized entries in fallback chains. You don't need to configure them. The system includes them so it degrades gracefully when you don't have every paid subscription. If you have the paid version, the paid version is always preferred. diff --git a/docs/guide/installation.md b/docs/guide/installation.md index 9fc49f79c..0d0e34438 100644 --- a/docs/guide/installation.md +++ b/docs/guide/installation.md @@ -238,7 +238,7 @@ If Z.ai is your main provider, the most important fallbacks are: #### OpenCode Zen -OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-6`, `opencode/gpt-5.4`, `opencode/gpt-5.3-codex`, `opencode/gpt-5-nano`, `opencode/glm-5`, `opencode/big-pickle`, and `opencode/minimax-m2.5`. +OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-6`, `opencode/gpt-5.4`, `opencode/gpt-5.3-codex`, `opencode/gpt-5-nano`, `opencode/glm-5`, `opencode/big-pickle`, `opencode/minimax-m2.7`, and `opencode/minimax-m2.7-highspeed`. When OpenCode Zen is the best available provider, these are the most relevant source-backed examples: @@ -250,26 +250,21 @@ When OpenCode Zen is the best available provider, these are the most relevant so ##### Setup -Run the installer and select "Yes" for GitHub Copilot: +Run the installer and select "Yes" for OpenCode Zen: ```bash bunx oh-my-opencode install -# Select your subscriptions (Claude, ChatGPT, Gemini) -# When prompted: "Do you have a GitHub Copilot subscription?" → Select "Yes" +# Select your subscriptions (Claude, ChatGPT, Gemini, OpenCode Zen, etc.) +# When prompted: "Do you have access to OpenCode Zen (opencode/ models)?" → Select "Yes" ``` Or use non-interactive mode: ```bash -bunx oh-my-opencode install --no-tui --claude=no --openai=no --gemini=no --copilot=yes +bunx oh-my-opencode install --no-tui --claude=no --openai=no --gemini=no --opencode-zen=yes ``` -Then authenticate with GitHub: - -```bash -opencode auth login -# Select: GitHub → Authenticate via OAuth -``` +This provider uses the `opencode/` model catalog. If your OpenCode environment prompts for provider authentication, follow the OpenCode provider flow for `opencode/` models instead of reusing the fallback-provider auth steps above. ### Step 5: Understand Your Model Setup @@ -306,8 +301,8 @@ Not all models behave the same way. Understanding which models are "similar" hel | --------------------- | -------------------------------- | ----------------------------------------------------------- | | **Gemini 3.1 Pro** | google, github-copilot, opencode | Excels at visual/frontend tasks. Different reasoning style. | | **Gemini 3 Flash** | google, github-copilot, opencode | Fast, good for doc search and light tasks. | -| **MiniMax M2.7** | venice, opencode-go | Fast and smart. Good for utility tasks where the provider catalog exposes M2.7. | -| **MiniMax M2.5** | opencode | Legacy OpenCode catalog entry still used in some fallback chains for compatibility. | +| **MiniMax M2.7** | opencode-go, opencode | Fast and smart. Utility fallbacks use `minimax-m2.7` or `minimax-m2.7-highspeed` depending on the chain. | +| **MiniMax M2.7 Highspeed** | opencode | Faster OpenCode Zen variant used in utility fallback chains where the runtime prefers the high-speed catalog entry. | **Speed-Focused Models**: @@ -315,7 +310,7 @@ Not all models behave the same way. Understanding which models are "similar" hel | ----------------------- | ---------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | | **Grok Code Fast 1** | github-copilot, venice | Very fast | Optimized for code grep/search. Default for Explore. | | **Claude Haiku 4.5** | anthropic, opencode | Fast | Good balance of speed and intelligence. | -| **MiniMax M2.5** | opencode | Very fast | Legacy OpenCode catalog entry that still appears in some utility fallback chains. | +| **MiniMax M2.7 Highspeed** | opencode | Very fast | OpenCode Zen high-speed utility fallback used by runtime chains such as Librarian. | | **GPT-5.3-codex-spark** | openai | Extremely fast | Blazing fast but compacts so aggressively that oh-my-openagent's context management doesn't work well with it. Not recommended for omo agents. | #### What Each Agent Does and Which Model It Got @@ -354,8 +349,8 @@ These agents do search, grep, and retrieval. They intentionally use fast, cheap | Agent | Role | Default Chain | Design Rationale | | --------------------- | ------------------ | ---------------------------------------------------------------------- | -------------------------------------------------------------- | -| **Explore** | Fast codebase grep | Grok Code Fast → OpenCode Go MiniMax M2.7 → OpenCode MiniMax M2.5 → Haiku → GPT-5-Nano | Speed is everything. Grok is blazing fast for grep. | -| **Librarian** | Docs/code search | OpenCode Go MiniMax M2.7 → OpenCode MiniMax M2.5 → Haiku → GPT-5-Nano | Doc retrieval doesn't need deep reasoning. MiniMax is fast where the provider catalog supports it. | +| **Explore** | Fast codebase grep | Grok Code Fast → OpenCode Go MiniMax M2.7 Highspeed → OpenCode MiniMax M2.7 → Haiku → GPT-5-Nano | Speed is everything. Grok is blazing fast for grep. | +| **Librarian** | Docs/code search | OpenCode Go MiniMax M2.7 → OpenCode MiniMax M2.7 Highspeed → Haiku → GPT-5-Nano | Doc retrieval doesn't need deep reasoning. MiniMax is fast where the provider catalog supports it. | | **Multimodal Looker** | Vision/screenshots | GPT-5.4 (medium) → Kimi K2.5 → GLM-4.6v → GPT-5-Nano | GPT-5.4 now leads the default vision path when available. | #### Why Different Models Need Different Prompts diff --git a/docs/reference/cli.md b/docs/reference/cli.md index df87c8fac..85296d957 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -201,10 +201,10 @@ Manages OAuth 2.1 authentication for remote MCP servers. bunx oh-my-opencode mcp oauth login --server-url https://api.example.com # Login with explicit client ID and scopes -bunx oh-my-opencode mcp oauth login my-api --server-url https://api.example.com --client-id my-client --scopes "read,write" +bunx oh-my-opencode mcp oauth login my-api --server-url https://api.example.com --client-id my-client --scopes read write # Remove stored OAuth tokens -bunx oh-my-opencode mcp oauth logout +bunx oh-my-opencode mcp oauth logout --server-url https://api.example.com # Check OAuth token status bunx oh-my-opencode mcp oauth status [server-name] @@ -216,7 +216,7 @@ bunx oh-my-opencode mcp oauth status [server-name] | -------------------- | ------------------------------------------------------------------------- | | `--server-url ` | MCP server URL (required for login) | | `--client-id ` | OAuth client ID (optional if server supports Dynamic Client Registration) | -| `--scopes ` | Comma-separated OAuth scopes | +| `--scopes ` | OAuth scopes as separate variadic arguments (for example: `--scopes read write`) | ### Token Storage diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index a9614b675..cfb2dc46c 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -358,8 +358,8 @@ Capability data comes from provider runtime metadata first. OmO also ships bundl | **Sisyphus** | `claude-opus-4-6` | `claude-opus-4-6 (max)` → `kimi-k2.5` via OpenCode Go / Kimi providers → `gpt-5.4 (medium)` → `glm-5` → `big-pickle` | | **Hephaestus** | `gpt-5.4` | `gpt-5.4 (medium)` | | **oracle** | `gpt-5.4` | `gpt-5.4 (high)` → `gemini-3.1-pro (high)` → `claude-opus-4-6 (max)` → `glm-5` | -| **librarian** | `minimax-m2.7` | `opencode-go/minimax-m2.7` → `opencode/minimax-m2.5` → `claude-haiku-4-5` → `gpt-5-nano` | -| **explore** | `grok-code-fast-1` | `grok-code-fast-1` → `opencode-go/minimax-m2.7` → `opencode/minimax-m2.5` → `claude-haiku-4-5` → `gpt-5-nano` | +| **librarian** | `minimax-m2.7` | `opencode-go/minimax-m2.7` → `opencode/minimax-m2.7-highspeed` → `claude-haiku-4-5` → `gpt-5-nano` | +| **explore** | `grok-code-fast-1` | `grok-code-fast-1` → `opencode-go/minimax-m2.7-highspeed` → `opencode/minimax-m2.7` → `claude-haiku-4-5` → `gpt-5-nano` | | **multimodal-looker** | `gpt-5.4` | `gpt-5.4 (medium)` → `kimi-k2.5` → `glm-4.6v` → `gpt-5-nano` | | **Prometheus** | `claude-opus-4-6` | `claude-opus-4-6 (max)` → `gpt-5.4 (high)` → `glm-5` → `gemini-3.1-pro` | | **Metis** | `claude-opus-4-6` | `claude-opus-4-6 (max)` → `gpt-5.4 (high)` → `glm-5` → `k2p5` | @@ -375,9 +375,9 @@ Capability data comes from provider runtime metadata first. OmO also ships bundl | **deep** | `gpt-5.3-codex` | `gpt-5.3-codex` → `claude-opus-4-6` → `gemini-3.1-pro` | | **artistry** | `gemini-3.1-pro` | `gemini-3.1-pro` → `claude-opus-4-6` → `gpt-5.4` | | **quick** | `gpt-5.4-mini` | `gpt-5.4-mini` → `claude-haiku-4-5` → `gemini-3-flash` → `minimax-m2.7` → `gpt-5-nano` | -| **unspecified-low** | `claude-sonnet-4-6` | `claude-sonnet-4-6` → `gpt-5.3-codex` → `gemini-3-flash` → `minimax-m2.7` | +| **unspecified-low** | `claude-sonnet-4-6` | `claude-sonnet-4-6` → `gpt-5.3-codex` → `kimi-k2.5` → `gemini-3-flash` → `minimax-m2.7` | | **unspecified-high** | `claude-opus-4-6` | `claude-opus-4-6` → `gpt-5.4 (high)` → `glm-5` → `k2p5` → `kimi-k2.5` | -| **writing** | `gemini-3-flash` | `gemini-3-flash` → `claude-sonnet-4-6` → `minimax-m2.7` | +| **writing** | `gemini-3-flash` | `gemini-3-flash` → `kimi-k2.5` → `claude-sonnet-4-6` → `minimax-m2.7` | Run `bunx oh-my-opencode doctor --verbose` to see effective model resolution for your config. @@ -925,7 +925,7 @@ When enabled, two companion hooks are active: `hashline-read-enhancer` (annotate "aggressive_truncation": false, "auto_resume": false, "disable_omo_env": false, - "task_system": false, + "task_system": true, "dynamic_context_pruning": { "enabled": false, "notification": "detailed", @@ -955,7 +955,7 @@ When enabled, two companion hooks are active: `hashline-read-enhancer` (annotate | `aggressive_truncation` | `false` | Aggressively truncate when token limit exceeded | | `auto_resume` | `false` | Auto-resume after thinking block recovery | | `disable_omo_env` | `false` | Disable auto-injected `` block (date/time/locale). Improves cache hit rate. | -| `task_system` | `false` | Enable Sisyphus task system | +| `task_system` | `true` | Enable Sisyphus task system | | `dynamic_context_pruning.enabled` | `false` | Auto-prune old tool outputs to manage context window | | `dynamic_context_pruning.notification` | `detailed` | Pruning notifications: `off` / `minimal` / `detailed` | | `turn_protection.turns` | `3` | Recent turns protected from pruning (1–10) | diff --git a/docs/reference/features.md b/docs/reference/features.md index cf2f94eb4..7fb6a57e6 100644 --- a/docs/reference/features.md +++ b/docs/reference/features.md @@ -13,8 +13,8 @@ Core-agent tab cycling is deterministic via injected runtime order field. The fi | **Sisyphus** | `claude-opus-4-6` | The default orchestrator. Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Todo-driven workflow with extended thinking (32k budget). Fallback: `glm-5` → `big-pickle`. | | **Hephaestus** | `gpt-5.4` | The Legitimate Craftsman. Autonomous deep worker inspired by AmpCode's deep mode. Goal-oriented execution with thorough research before action. Explores codebase patterns, completes tasks end-to-end without premature stopping. Named after the Greek god of forge and craftsmanship. Requires a GPT-capable provider. | | **Oracle** | `gpt-5.4` | Architecture decisions, code review, debugging. Read-only consultation with stellar logical reasoning and deep analysis. Inspired by AmpCode. Fallback: `gemini-3.1-pro` → `claude-opus-4-6`. | -| **Librarian** | `minimax-m2.7` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Primary OpenCode Go path uses MiniMax M2.7. Other provider catalogs may still fall back to MiniMax M2.5, then `claude-haiku-4-5` and `gpt-5-nano`. | -| **Explore** | `grok-code-fast-1` | Fast codebase exploration and contextual grep. Primary path stays on Grok Code Fast 1. MiniMax M2.7 is now used where provider catalogs expose it, while some OpenCode fallback paths still use MiniMax M2.5 for catalog compatibility. | +| **Librarian** | `minimax-m2.7` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Primary OpenCode Go path uses MiniMax M2.7, then falls back to OpenCode Zen `minimax-m2.7-highspeed`, then `claude-haiku-4-5` and `gpt-5-nano`. | +| **Explore** | `grok-code-fast-1` | Fast codebase exploration and contextual grep. Primary path stays on Grok Code Fast 1, then uses OpenCode Go `minimax-m2.7-highspeed`, then OpenCode Zen `minimax-m2.7`, before falling through to Haiku and GPT-5-Nano. | | **Multimodal-Looker** | `gpt-5.4` | Visual content specialist. Analyzes PDFs, images, diagrams to extract information. Fallback: `k2p5` → `glm-4.6v` → `gpt-5-nano`. | ### Planning Agents diff --git a/package.json b/package.json index 2c4a4e857..8918cc74c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode", - "version": "3.11.0", + "version": "3.14.0", "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -78,17 +78,17 @@ "typescript": "^5.7.3" }, "optionalDependencies": { - "oh-my-opencode-darwin-arm64": "3.11.0", - "oh-my-opencode-darwin-x64": "3.11.0", - "oh-my-opencode-darwin-x64-baseline": "3.11.0", - "oh-my-opencode-linux-arm64": "3.11.0", - "oh-my-opencode-linux-arm64-musl": "3.11.0", - "oh-my-opencode-linux-x64": "3.11.0", - "oh-my-opencode-linux-x64-baseline": "3.11.0", - "oh-my-opencode-linux-x64-musl": "3.11.0", - "oh-my-opencode-linux-x64-musl-baseline": "3.11.0", - "oh-my-opencode-windows-x64": "3.11.0", - "oh-my-opencode-windows-x64-baseline": "3.11.0" + "oh-my-opencode-darwin-arm64": "3.14.0", + "oh-my-opencode-darwin-x64": "3.14.0", + "oh-my-opencode-darwin-x64-baseline": "3.14.0", + "oh-my-opencode-linux-arm64": "3.14.0", + "oh-my-opencode-linux-arm64-musl": "3.14.0", + "oh-my-opencode-linux-x64": "3.14.0", + "oh-my-opencode-linux-x64-baseline": "3.14.0", + "oh-my-opencode-linux-x64-musl": "3.14.0", + "oh-my-opencode-linux-x64-musl-baseline": "3.14.0", + "oh-my-opencode-windows-x64": "3.14.0", + "oh-my-opencode-windows-x64-baseline": "3.14.0" }, "overrides": { "@opencode-ai/sdk": "^1.2.24" diff --git a/packages/darwin-arm64/package.json b/packages/darwin-arm64/package.json index 0aac36062..efe8ae64d 100644 --- a/packages/darwin-arm64/package.json +++ b/packages/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-darwin-arm64", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)", "license": "MIT", "repository": { diff --git a/packages/darwin-x64-baseline/package.json b/packages/darwin-x64-baseline/package.json index d0df9abc2..22c3e93bd 100644 --- a/packages/darwin-x64-baseline/package.json +++ b/packages/darwin-x64-baseline/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-darwin-x64-baseline", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (darwin-x64-baseline, no AVX2)", "license": "MIT", "repository": { diff --git a/packages/darwin-x64/package.json b/packages/darwin-x64/package.json index 07d6abeb3..de6d3e6fa 100644 --- a/packages/darwin-x64/package.json +++ b/packages/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-darwin-x64", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (darwin-x64)", "license": "MIT", "repository": { diff --git a/packages/linux-arm64-musl/package.json b/packages/linux-arm64-musl/package.json index 46ea9448d..427a9fa2b 100644 --- a/packages/linux-arm64-musl/package.json +++ b/packages/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-arm64-musl", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)", "license": "MIT", "repository": { diff --git a/packages/linux-arm64/package.json b/packages/linux-arm64/package.json index 3f76a1a60..b0bb92483 100644 --- a/packages/linux-arm64/package.json +++ b/packages/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-arm64", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (linux-arm64)", "license": "MIT", "repository": { diff --git a/packages/linux-x64-baseline/package.json b/packages/linux-x64-baseline/package.json index 26d6813a5..f34ed7676 100644 --- a/packages/linux-x64-baseline/package.json +++ b/packages/linux-x64-baseline/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64-baseline", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (linux-x64-baseline, no AVX2)", "license": "MIT", "repository": { diff --git a/packages/linux-x64-musl-baseline/package.json b/packages/linux-x64-musl-baseline/package.json index 59e206a9c..60d684b31 100644 --- a/packages/linux-x64-musl-baseline/package.json +++ b/packages/linux-x64-musl-baseline/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64-musl-baseline", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl-baseline, no AVX2)", "license": "MIT", "repository": { diff --git a/packages/linux-x64-musl/package.json b/packages/linux-x64-musl/package.json index 29d9fd684..ba9220c7c 100644 --- a/packages/linux-x64-musl/package.json +++ b/packages/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64-musl", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)", "license": "MIT", "repository": { diff --git a/packages/linux-x64/package.json b/packages/linux-x64/package.json index 252a1c46c..f8d7f22c4 100644 --- a/packages/linux-x64/package.json +++ b/packages/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (linux-x64)", "license": "MIT", "repository": { diff --git a/packages/windows-x64-baseline/package.json b/packages/windows-x64-baseline/package.json index 70a4142c1..a2a81a344 100644 --- a/packages/windows-x64-baseline/package.json +++ b/packages/windows-x64-baseline/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-windows-x64-baseline", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (windows-x64-baseline, no AVX2)", "license": "MIT", "repository": { diff --git a/packages/windows-x64/package.json b/packages/windows-x64/package.json index 1483ff42c..18ae57f25 100644 --- a/packages/windows-x64/package.json +++ b/packages/windows-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-windows-x64", - "version": "3.11.0", + "version": "3.14.0", "description": "Platform-specific binary for oh-my-opencode (windows-x64)", "license": "MIT", "repository": { diff --git a/postinstall.mjs b/postinstall.mjs index 0d78c888f..5fe05f702 100644 --- a/postinstall.mjs +++ b/postinstall.mjs @@ -1,6 +1,7 @@ // postinstall.mjs // Runs after npm install to verify platform binary is available +import { readFileSync } from "node:fs"; import { createRequire } from "node:module"; import { getPlatformPackageCandidates, getBinaryPath } from "./bin/platform.js"; @@ -24,7 +25,7 @@ function getLibcFamily() { function getPackageBaseName() { try { - const packageJson = JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8")); + const packageJson = JSON.parse(readFileSync(new URL("./package.json", import.meta.url), "utf8")); return packageJson.name || "oh-my-opencode"; } catch { return "oh-my-opencode"; diff --git a/script/generate-changelog.ts b/script/generate-changelog.ts index bd46352b3..7263d2e43 100644 --- a/script/generate-changelog.ts +++ b/script/generate-changelog.ts @@ -34,6 +34,72 @@ async function generateChangelog(previousTag: string): Promise { return notes } +async function getChangedFiles(previousTag: string): Promise { + try { + const diff = await $`git diff --name-only ${previousTag}..HEAD`.text() + return diff + .split("\n") + .map((line) => line.trim()) + .filter(Boolean) + } catch { + return [] + } +} + +function touchesAnyPath(files: string[], candidates: string[]): boolean { + return files.some((file) => candidates.some((candidate) => file === candidate || file.startsWith(`${candidate}/`))) +} + +function buildReleaseFraming(files: string[]): string[] { + const bullets: string[] = [] + + if ( + touchesAnyPath(files, [ + "src/index.ts", + "src/plugin-config.ts", + "bin/platform.js", + "postinstall.mjs", + "docs", + ]) + ) { + bullets.push("Rename transition updates across package detection, plugin/config compatibility, and install surfaces.") + } + + if (touchesAnyPath(files, ["src/tools/delegate-task", "src/plugin/tool-registry.ts"])) { + bullets.push("Task and tool behavior updates, including delegate-task contract and runtime registration behavior.") + } + + if ( + touchesAnyPath(files, [ + "src/plugin/tool-registry.ts", + "src/plugin-handlers/agent-config-handler.ts", + "src/plugin-handlers/tool-config-handler.ts", + "src/hooks/tasks-todowrite-disabler", + ]) + ) { + bullets.push("Task-system default behavior alignment so omitted configuration behaves consistently across runtime paths.") + } + + if (touchesAnyPath(files, [".github/workflows", "docs/guide/installation.md", "postinstall.mjs"])) { + bullets.push("Install and publish workflow hardening, including safer release sequencing and package/install fixes.") + } + + if (bullets.length === 0) { + return [] + } + + return [ + "## Minor Compatibility and Stability Release", + "", + "This release carries compatibility-facing behavior changes and operational hardening. Read the summary below before upgrading or publishing.", + "", + ...bullets.map((bullet) => `- ${bullet}`), + "", + "## Commit Summary", + "", + ] +} + async function getContributors(previousTag: string): Promise { const notes: string[] = [] @@ -78,9 +144,11 @@ async function main() { process.exit(0) } + const changedFiles = await getChangedFiles(previousTag) const changelog = await generateChangelog(previousTag) const contributors = await getContributors(previousTag) - const notes = [...changelog, ...contributors] + const framing = buildReleaseFraming(changedFiles) + const notes = [...framing, ...changelog, ...contributors] if (notes.length === 0) { console.log("No notable changes") diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts index 014f2328a..b190aff58 100644 --- a/src/features/background-agent/manager.test.ts +++ b/src/features/background-agent/manager.test.ts @@ -3312,6 +3312,9 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => { prompt: async () => ({}), promptAsync: async () => ({}), abort: async () => ({}), + get: async () => { + throw new Error("missing") + }, }, } const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 }) @@ -3348,6 +3351,9 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => { prompt: async () => ({}), promptAsync: async () => ({}), abort: async () => ({}), + get: async () => { + throw new Error("missing") + }, }, } const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 }) @@ -3437,6 +3443,7 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => { status: "running", startedAt: new Date(Date.now() - 15 * 60 * 1000), progress: undefined, + consecutiveMissedPolls: 2, } getTaskMap(manager).set(task.id, task) @@ -3471,6 +3478,7 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => { status: "running", startedAt: new Date(Date.now() - 15 * 60 * 1000), progress: undefined, + consecutiveMissedPolls: 2, } getTaskMap(manager).set(task.id, task) diff --git a/src/features/background-agent/task-poller.test.ts b/src/features/background-agent/task-poller.test.ts index 4ea281cf0..cd3d8a9cf 100644 --- a/src/features/background-agent/task-poller.test.ts +++ b/src/features/background-agent/task-poller.test.ts @@ -8,6 +8,7 @@ describe("checkAndInterruptStaleTasks", () => { const mockClient = { session: { abort: mock(() => Promise.resolve()), + get: mock(() => Promise.resolve({ data: { id: "ses-1" } })), }, } const mockConcurrencyManager = { @@ -35,6 +36,11 @@ describe("checkAndInterruptStaleTasks", () => { beforeEach(() => { fixedTime = Date.now() spyOn(globalThis.Date, "now").mockReturnValue(fixedTime) + mockClient.session.abort.mockClear() + mockClient.session.get.mockReset() + mockClient.session.get.mockResolvedValue({ data: { id: "ses-1" } }) + mockConcurrencyManager.release.mockClear() + mockNotify.mockClear() }) afterEach(() => { @@ -288,6 +294,59 @@ describe("checkAndInterruptStaleTasks", () => { expect(task.status).toBe("running") }) + it("should NOT cancel healthy task on first missing status poll", async () => { + //#given — one missing poll should not be enough to declare the session gone + const task = createRunningTask({ + startedAt: new Date(Date.now() - 300_000), + progress: { + toolCalls: 1, + lastUpdate: new Date(Date.now() - 120_000), + }, + }) + + //#when + await checkAndInterruptStaleTasks({ + tasks: [task], + client: mockClient as never, + config: { staleTimeoutMs: 180_000, sessionGoneTimeoutMs: 60_000 }, + concurrencyManager: mockConcurrencyManager as never, + notifyParentSession: mockNotify, + sessionStatuses: {}, + }) + + //#then + expect(task.status).toBe("running") + expect(task.consecutiveMissedPolls).toBe(1) + expect(mockClient.session.get).not.toHaveBeenCalled() + }) + + it("should NOT cancel task when session.get confirms the session still exists", async () => { + //#given — repeated missing polls but direct lookup still succeeds + const task = createRunningTask({ + startedAt: new Date(Date.now() - 300_000), + progress: { + toolCalls: 1, + lastUpdate: new Date(Date.now() - 120_000), + }, + consecutiveMissedPolls: 2, + }) + + //#when + await checkAndInterruptStaleTasks({ + tasks: [task], + client: mockClient as never, + config: { staleTimeoutMs: 180_000, sessionGoneTimeoutMs: 60_000 }, + concurrencyManager: mockConcurrencyManager as never, + notifyParentSession: mockNotify, + sessionStatuses: {}, + }) + + //#then + expect(task.status).toBe("running") + expect(task.consecutiveMissedPolls).toBe(0) + expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: "ses-1" } }) + }) + it("should use session-gone timeout when session is missing from status map (with progress)", async () => { //#given — lastUpdate 2min ago, session completely gone from status const task = createRunningTask({ @@ -296,8 +355,11 @@ describe("checkAndInterruptStaleTasks", () => { toolCalls: 1, lastUpdate: new Date(Date.now() - 120_000), }, + consecutiveMissedPolls: 2, }) + mockClient.session.get.mockRejectedValue(new Error("missing")) + //#when — empty sessionStatuses (session gone), sessionGoneTimeoutMs = 60s await checkAndInterruptStaleTasks({ tasks: [task], @@ -318,8 +380,11 @@ describe("checkAndInterruptStaleTasks", () => { const task = createRunningTask({ startedAt: new Date(Date.now() - 120_000), progress: undefined, + consecutiveMissedPolls: 2, }) + mockClient.session.get.mockRejectedValue(new Error("missing")) + //#when — session gone, sessionGoneTimeoutMs = 60s await checkAndInterruptStaleTasks({ tasks: [task], @@ -343,8 +408,11 @@ describe("checkAndInterruptStaleTasks", () => { toolCalls: 1, lastUpdate: new Date(Date.now() - 120_000), }, + consecutiveMissedPolls: 2, }) + mockClient.session.get.mockRejectedValue(new Error("missing")) + //#when — session is idle (present in map), staleTimeoutMs = 180s await checkAndInterruptStaleTasks({ tasks: [task], @@ -367,8 +435,11 @@ describe("checkAndInterruptStaleTasks", () => { toolCalls: 1, lastUpdate: new Date(Date.now() - 120_000), }, + consecutiveMissedPolls: 2, }) + mockClient.session.get.mockRejectedValue(new Error("missing")) + //#when — no config (default sessionGoneTimeoutMs = 60_000) await checkAndInterruptStaleTasks({ tasks: [task], diff --git a/src/features/background-agent/task-poller.ts b/src/features/background-agent/task-poller.ts index 10be9180c..803b0f51a 100644 --- a/src/features/background-agent/task-poller.ts +++ b/src/features/background-agent/task-poller.ts @@ -16,6 +16,8 @@ import { import { removeTaskToastTracking } from "./remove-task-toast-tracking" import { isActiveSessionStatus } from "./session-status-classifier" + +const MIN_SESSION_GONE_POLLS = 3 const TERMINAL_TASK_STATUSES = new Set([ "completed", "error", @@ -97,6 +99,15 @@ export function pruneStaleTasksAndNotifications(args: { export type SessionStatusMap = Record +async function verifySessionExists(client: OpencodeClient, sessionID: string): Promise { + try { + const result = await client.session.get({ path: { id: sessionID } }) + return !!result.data + } catch { + return false + } +} + export async function checkAndInterruptStaleTasks(args: { tasks: Iterable client: OpencodeClient @@ -130,14 +141,28 @@ export async function checkAndInterruptStaleTasks(args: { const sessionStatus = sessionStatuses?.[sessionID]?.type const sessionIsRunning = sessionStatus !== undefined && isActiveSessionStatus(sessionStatus) - const sessionGone = sessionStatuses !== undefined && sessionStatus === undefined + const sessionMissing = sessionStatuses !== undefined && sessionStatus === undefined const runtime = now - startedAt.getTime() + if (sessionMissing) { + task.consecutiveMissedPolls = (task.consecutiveMissedPolls ?? 0) + 1 + } else if (sessionStatuses !== undefined) { + task.consecutiveMissedPolls = 0 + } + + const sessionGone = sessionMissing && (task.consecutiveMissedPolls ?? 0) >= MIN_SESSION_GONE_POLLS + if (!task.progress?.lastUpdate) { if (sessionIsRunning) continue + if (sessionMissing && !sessionGone) continue const effectiveTimeout = sessionGone ? sessionGoneTimeoutMs : messageStalenessMs if (runtime <= effectiveTimeout) continue + if (sessionGone && await verifySessionExists(client, sessionID)) { + task.consecutiveMissedPolls = 0 + continue + } + const staleMinutes = Math.round(runtime / 60000) const reason = sessionGone ? "session gone from status registry" : "no activity" task.status = "cancelled" @@ -171,11 +196,16 @@ export async function checkAndInterruptStaleTasks(args: { if (timeSinceLastUpdate <= effectiveStaleTimeout) continue if (task.status !== "running") continue - const staleMinutes = Math.round(timeSinceLastUpdate / 60000) - const reason = sessionGone ? "session gone from status registry" : "no activity" - task.status = "cancelled" - task.error = `Stale timeout (${reason} for ${staleMinutes}min). This is a FINAL cancellation - do NOT create a replacement task. If the timeout is too short, increase 'background_task.${sessionGone ? "sessionGoneTimeoutMs" : "staleTimeoutMs"}' in .opencode/oh-my-opencode.json.` - task.completedAt = new Date() + if (sessionGone && await verifySessionExists(client, sessionID)) { + task.consecutiveMissedPolls = 0 + continue + } + + const staleMinutes = Math.round(timeSinceLastUpdate / 60000) + const reason = sessionGone ? "session gone from status registry" : "no activity" + task.status = "cancelled" + task.error = `Stale timeout (${reason} for ${staleMinutes}min). This is a FINAL cancellation - do NOT create a replacement task. If the timeout is too short, increase 'background_task.${sessionGone ? "sessionGoneTimeoutMs" : "staleTimeoutMs"}' in .opencode/oh-my-opencode.json.` + task.completedAt = new Date() if (task.concurrencyKey) { concurrencyManager.release(task.concurrencyKey) diff --git a/src/features/background-agent/types.ts b/src/features/background-agent/types.ts index 3a8af85e9..5edbe102d 100644 --- a/src/features/background-agent/types.ts +++ b/src/features/background-agent/types.ts @@ -66,6 +66,8 @@ export interface BackgroundTask { lastMsgCount?: number /** Number of consecutive polls with stable message count */ stablePolls?: number + /** Number of consecutive polls where session was missing from status map */ + consecutiveMissedPolls?: number } export interface LaunchInput { diff --git a/src/features/opencode-skill-loader/loaded-skill-template-extractor.ts b/src/features/opencode-skill-loader/loaded-skill-template-extractor.ts index ba20552e5..52dc5e5ff 100644 --- a/src/features/opencode-skill-loader/loaded-skill-template-extractor.ts +++ b/src/features/opencode-skill-loader/loaded-skill-template-extractor.ts @@ -3,9 +3,13 @@ import { parseFrontmatter } from "../../shared/frontmatter" import type { LoadedSkill } from "./types" export function extractSkillTemplate(skill: LoadedSkill): string { - if (skill.path) { - const content = readFileSync(skill.path, "utf-8") - const { body } = parseFrontmatter(content) + if (skill.scope === "config" && skill.definition.template) { + return skill.definition.template + } + + if (skill.path) { + const content = readFileSync(skill.path, "utf-8") + const { body } = parseFrontmatter(content) return body.trim() } return skill.definition.template || "" diff --git a/src/hooks/anthropic-context-window-limit-recovery/message-builder.test.ts b/src/hooks/anthropic-context-window-limit-recovery/message-builder.test.ts new file mode 100644 index 000000000..e107aed39 --- /dev/null +++ b/src/hooks/anthropic-context-window-limit-recovery/message-builder.test.ts @@ -0,0 +1,105 @@ +import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test" + +const replaceEmptyTextPartsAsync = mock(() => Promise.resolve(false)) +const injectTextPartAsync = mock(() => Promise.resolve(false)) +const findMessagesWithEmptyTextPartsFromSDK = mock(() => Promise.resolve([] as string[])) + +mock.module("../../shared", () => ({ + normalizeSDKResponse: (response: { data?: unknown[] }) => response.data ?? [], +})) + +mock.module("../../shared/logger", () => ({ + log: () => {}, +})) + +mock.module("../../shared/opencode-storage-detection", () => ({ + isSqliteBackend: () => true, +})) + +mock.module("../session-recovery/storage", () => ({ + findEmptyMessages: () => [], + findMessagesWithEmptyTextParts: () => [], + injectTextPart: () => false, + replaceEmptyTextParts: () => false, +})) + +mock.module("../session-recovery/storage/empty-text", () => ({ + replaceEmptyTextPartsAsync, + findMessagesWithEmptyTextPartsFromSDK, +})) + +mock.module("../session-recovery/storage/text-part-injector", () => ({ + injectTextPartAsync, +})) + +async function importFreshMessageBuilder(): Promise { + return import(`./message-builder?test=${Date.now()}-${Math.random()}`) +} + +afterAll(() => { + mock.restore() +}) + +describe("sanitizeEmptyMessagesBeforeSummarize", () => { + beforeEach(() => { + replaceEmptyTextPartsAsync.mockReset() + replaceEmptyTextPartsAsync.mockResolvedValue(false) + injectTextPartAsync.mockReset() + injectTextPartAsync.mockResolvedValue(false) + findMessagesWithEmptyTextPartsFromSDK.mockReset() + findMessagesWithEmptyTextPartsFromSDK.mockResolvedValue([]) + }) + + test("#given sqlite message with tool content and empty text part #when sanitizing #then it fixes the mixed-content message", async () => { + const { sanitizeEmptyMessagesBeforeSummarize, PLACEHOLDER_TEXT } = await importFreshMessageBuilder() + const client = { + session: { + messages: mock(() => Promise.resolve({ + data: [ + { + info: { id: "msg-1" }, + parts: [ + { type: "tool_result", text: "done" }, + { type: "text", text: "" }, + ], + }, + ], + })), + }, + } as never + findMessagesWithEmptyTextPartsFromSDK.mockResolvedValue(["msg-1"]) + replaceEmptyTextPartsAsync.mockResolvedValue(true) + + const fixedCount = await sanitizeEmptyMessagesBeforeSummarize("ses-1", client) + + expect(fixedCount).toBe(1) + expect(replaceEmptyTextPartsAsync).toHaveBeenCalledWith(client, "ses-1", "msg-1", PLACEHOLDER_TEXT) + expect(injectTextPartAsync).not.toHaveBeenCalled() + }) + + test("#given sqlite message with mixed content and failed replacement #when sanitizing #then it injects the placeholder text part", async () => { + const { sanitizeEmptyMessagesBeforeSummarize, PLACEHOLDER_TEXT } = await importFreshMessageBuilder() + const client = { + session: { + messages: mock(() => Promise.resolve({ + data: [ + { + info: { id: "msg-2" }, + parts: [ + { type: "tool_use", text: "call" }, + { type: "text", text: "" }, + ], + }, + ], + })), + }, + } as never + findMessagesWithEmptyTextPartsFromSDK.mockResolvedValue(["msg-2"]) + injectTextPartAsync.mockResolvedValue(true) + + const fixedCount = await sanitizeEmptyMessagesBeforeSummarize("ses-2", client) + + expect(fixedCount).toBe(1) + expect(injectTextPartAsync).toHaveBeenCalledWith(client, "ses-2", "msg-2", PLACEHOLDER_TEXT) + }) +}) diff --git a/src/hooks/anthropic-context-window-limit-recovery/message-builder.ts b/src/hooks/anthropic-context-window-limit-recovery/message-builder.ts index 4eff6e513..b2b3fd56c 100644 --- a/src/hooks/anthropic-context-window-limit-recovery/message-builder.ts +++ b/src/hooks/anthropic-context-window-limit-recovery/message-builder.ts @@ -8,7 +8,7 @@ import { injectTextPart, replaceEmptyTextParts, } from "../session-recovery/storage" -import { replaceEmptyTextPartsAsync } from "../session-recovery/storage/empty-text" +import { findMessagesWithEmptyTextPartsFromSDK, replaceEmptyTextPartsAsync } from "../session-recovery/storage/empty-text" import { injectTextPartAsync } from "../session-recovery/storage/text-part-injector" import type { Client } from "./client" @@ -86,12 +86,14 @@ export async function sanitizeEmptyMessagesBeforeSummarize( ): Promise { if (client && isSqliteBackend()) { const emptyMessageIds = await findEmptyMessageIdsFromSDK(client, sessionID) - if (emptyMessageIds.length === 0) { + const emptyTextPartIds = await findMessagesWithEmptyTextPartsFromSDK(client, sessionID) + const allIds = [...new Set([...emptyMessageIds, ...emptyTextPartIds])] + if (allIds.length === 0) { return 0 } let fixedCount = 0 - for (const messageID of emptyMessageIds) { + for (const messageID of allIds) { const replaced = await replaceEmptyTextPartsAsync(client, sessionID, messageID, PLACEHOLDER_TEXT) if (replaced) { fixedCount++ @@ -107,7 +109,7 @@ export async function sanitizeEmptyMessagesBeforeSummarize( log("[auto-compact] pre-summarize sanitization fixed empty messages", { sessionID, fixedCount, - totalEmpty: emptyMessageIds.length, + totalEmpty: allIds.length, }) } diff --git a/src/hooks/atlas/boulder-continuation-injector.ts b/src/hooks/atlas/boulder-continuation-injector.ts index a9ffa5478..f72196f63 100644 --- a/src/hooks/atlas/boulder-continuation-injector.ts +++ b/src/hooks/atlas/boulder-continuation-injector.ts @@ -1,8 +1,9 @@ import type { PluginInput } from "@opencode-ai/plugin" import type { BackgroundManager } from "../../features/background-agent" +import { isAgentRegistered } from "../../features/claude-code-session-state" import { log } from "../../shared/logger" import { createInternalAgentTextPart, resolveInheritedPromptTools } from "../../shared" -import { getAgentDisplayName } from "../../shared/agent-display-names" +import { getAgentConfigKey } from "../../shared/agent-display-names" import { HOOK_NAME } from "./hook-name" import { BOULDER_CONTINUATION_PROMPT } from "./system-reminder-templates" import { resolveRecentPromptContextForSession } from "./recent-model-resolver" @@ -48,24 +49,33 @@ export async function injectBoulderContinuation(input: { const preferredSessionContext = preferredTaskSessionId ? `\n\n[Preferred reuse session for current top-level plan task${preferredTaskTitle ? `: ${preferredTaskTitle}` : ""}: ${preferredTaskSessionId}]` : "" - const prompt = - BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) + - `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` + - preferredSessionContext + - worktreeContext + const prompt = + BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) + + `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` + + preferredSessionContext + + worktreeContext + const continuationAgent = agent ?? (isAgentRegistered("atlas") ? "atlas" : undefined) - try { - log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining }) + if (!continuationAgent || !isAgentRegistered(continuationAgent)) { + log(`[${HOOK_NAME}] Skipped injection: continuation agent unavailable`, { + sessionID, + agent: continuationAgent ?? agent ?? "unknown", + }) + return + } + + try { + log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining }) const promptContext = await resolveRecentPromptContextForSession(ctx, sessionID) const inheritedTools = resolveInheritedPromptTools(sessionID, promptContext.tools) - await ctx.client.session.promptAsync({ - path: { id: sessionID }, - body: { - agent: getAgentDisplayName(agent ?? "atlas"), - ...(promptContext.model !== undefined ? { model: promptContext.model } : {}), - ...(inheritedTools ? { tools: inheritedTools } : {}), + await ctx.client.session.promptAsync({ + path: { id: sessionID }, + body: { + agent: getAgentConfigKey(continuationAgent), + ...(promptContext.model !== undefined ? { model: promptContext.model } : {}), + ...(inheritedTools ? { tools: inheritedTools } : {}), parts: [createInternalAgentTextPart(prompt)], }, query: { directory: ctx.directory }, diff --git a/src/hooks/atlas/compaction-agent-filter.test.ts b/src/hooks/atlas/compaction-agent-filter.test.ts index 7e821bd86..7dfbe0d92 100644 --- a/src/hooks/atlas/compaction-agent-filter.test.ts +++ b/src/hooks/atlas/compaction-agent-filter.test.ts @@ -6,7 +6,7 @@ import { join } from "node:path" import { randomUUID } from "node:crypto" import { clearBoulderState, writeBoulderState } from "../../features/boulder-state" -import { _resetForTesting } from "../../features/claude-code-session-state" +import { _resetForTesting, registerAgentName } from "../../features/claude-code-session-state" import type { BoulderState } from "../../features/boulder-state" const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-compaction-storage-${randomUUID()}`) @@ -66,6 +66,8 @@ describe("atlas hook compaction agent filtering", () => { mkdirSync(testDirectory, { recursive: true }) clearBoulderState(testDirectory) _resetForTesting() + registerAgentName("atlas") + registerAgentName("sisyphus") }) afterEach(() => { diff --git a/src/hooks/atlas/idle-event-lineage.test.ts b/src/hooks/atlas/idle-event-lineage.test.ts index 061195a97..112c676b0 100644 --- a/src/hooks/atlas/idle-event-lineage.test.ts +++ b/src/hooks/atlas/idle-event-lineage.test.ts @@ -6,7 +6,7 @@ import { tmpdir } from "node:os" import { join } from "node:path" import { clearBoulderState, readBoulderState, writeBoulderState } from "../../features/boulder-state" import type { BoulderState } from "../../features/boulder-state" -import { _resetForTesting, setSessionAgent, subagentSessions } from "../../features/claude-code-session-state" +import { _resetForTesting, registerAgentName, setSessionAgent, subagentSessions } from "../../features/claude-code-session-state" const { createAtlasHook } = await import("./index") @@ -64,6 +64,8 @@ describe("atlas hook idle-event session lineage", () => { promptCalls = [] clearBoulderState(testDirectory) _resetForTesting() + registerAgentName("atlas") + registerAgentName("sisyphus") subagentSessions.clear() }) diff --git a/src/hooks/atlas/idle-event.ts b/src/hooks/atlas/idle-event.ts index 9923d7d0e..55f423bc2 100644 --- a/src/hooks/atlas/idle-event.ts +++ b/src/hooks/atlas/idle-event.ts @@ -5,7 +5,7 @@ import { readBoulderState, readCurrentTopLevelTask, } from "../../features/boulder-state" -import { getSessionAgent, subagentSessions } from "../../features/claude-code-session-state" +import { getSessionAgent, isAgentRegistered, subagentSessions } from "../../features/claude-code-session-state" import { getAgentConfigKey } from "../../shared/agent-display-names" import { log } from "../../shared/logger" import { injectBoulderContinuation } from "./boulder-continuation-injector" @@ -141,7 +141,15 @@ export async function handleAtlasSessionIdle(input: { if (subagentSessions.has(sessionID)) { const sessionAgent = getSessionAgent(sessionID) const agentKey = getAgentConfigKey(sessionAgent ?? "") - const requiredAgentKey = getAgentConfigKey(boulderState.agent ?? "atlas") + const requiredAgentName = boulderState.agent ?? (isAgentRegistered("atlas") ? "atlas" : undefined) + if (!requiredAgentName || !isAgentRegistered(requiredAgentName)) { + log(`[${HOOK_NAME}] Skipped: boulder agent is unavailable for continuation`, { + sessionID, + requiredAgent: boulderState.agent ?? "unknown", + }) + return + } + const requiredAgentKey = getAgentConfigKey(requiredAgentName) const agentMatches = agentKey === requiredAgentKey || (requiredAgentKey === getAgentConfigKey("atlas") && agentKey === getAgentConfigKey("sisyphus")) @@ -149,10 +157,10 @@ export async function handleAtlasSessionIdle(input: { log(`[${HOOK_NAME}] Skipped: subagent agent does not match boulder agent`, { sessionID, agent: sessionAgent ?? "unknown", - requiredAgent: boulderState.agent ?? "atlas", - }) - return - } + requiredAgent: requiredAgentName, + }) + return + } } const sessionState = getState(sessionID) diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts index 4f01547cd..8b19c7539 100644 --- a/src/hooks/atlas/index.test.ts +++ b/src/hooks/atlas/index.test.ts @@ -9,7 +9,7 @@ import { readBoulderState, } from "../../features/boulder-state" import type { BoulderState } from "../../features/boulder-state" -import { _resetForTesting, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state" +import { _resetForTesting, registerAgentName, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state" import type { PendingTaskRef } from "./types" const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`) @@ -90,6 +90,9 @@ describe("atlas hook", () => { } beforeEach(() => { + _resetForTesting() + registerAgentName("atlas") + registerAgentName("sisyphus") TEST_DIR = join(tmpdir(), `atlas-test-${randomUUID()}`) SISYPHUS_DIR = join(TEST_DIR, ".sisyphus") if (!existsSync(TEST_DIR)) { @@ -102,6 +105,7 @@ describe("atlas hook", () => { }) afterEach(() => { + _resetForTesting() clearBoulderState(TEST_DIR) if (existsSync(TEST_DIR)) { rmSync(TEST_DIR, { recursive: true, force: true }) @@ -1182,9 +1186,11 @@ session_id: ses_untrusted_999 beforeEach(() => { _resetForTesting() - subagentSessions.clear() - setupMessageStorage(MAIN_SESSION_ID, "atlas") - }) + registerAgentName("atlas") + registerAgentName("sisyphus") + subagentSessions.clear() + setupMessageStorage(MAIN_SESSION_ID, "atlas") + }) afterEach(() => { cleanupMessageStorage(MAIN_SESSION_ID) diff --git a/src/hooks/legacy-plugin-toast/auto-migrate.test.ts b/src/hooks/legacy-plugin-toast/auto-migrate.test.ts index d1aa5e745..0ee33cb8c 100644 --- a/src/hooks/legacy-plugin-toast/auto-migrate.test.ts +++ b/src/hooks/legacy-plugin-toast/auto-migrate.test.ts @@ -2,7 +2,9 @@ import { afterEach, beforeEach, describe, expect, it } from "bun:test" import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs" import { tmpdir } from "node:os" import { join } from "node:path" -import { autoMigrateLegacyPluginEntry } from "./auto-migrate" +async function importFreshAutoMigrateModule(): Promise { + return import(`./auto-migrate?test=${Date.now()}-${Math.random()}`) +} describe("autoMigrateLegacyPluginEntry", () => { let testConfigDir = "" @@ -17,13 +19,15 @@ describe("autoMigrateLegacyPluginEntry", () => { }) describe("#given opencode.json has a bare legacy plugin entry", () => { - it("#then replaces oh-my-opencode with oh-my-openagent", () => { + it("#then replaces oh-my-opencode with oh-my-openagent", async () => { // given writeFileSync( join(testConfigDir, "opencode.json"), JSON.stringify({ plugin: ["oh-my-opencode"] }, null, 2) + "\n", ) + const { autoMigrateLegacyPluginEntry } = await importFreshAutoMigrateModule() + // when const result = autoMigrateLegacyPluginEntry(testConfigDir) @@ -37,13 +41,15 @@ describe("autoMigrateLegacyPluginEntry", () => { }) describe("#given opencode.json has a version-pinned legacy entry", () => { - it("#then preserves the version suffix", () => { + it("#then preserves the version suffix", async () => { // given writeFileSync( join(testConfigDir, "opencode.json"), JSON.stringify({ plugin: ["oh-my-opencode@3.10.0"] }, null, 2) + "\n", ) + const { autoMigrateLegacyPluginEntry } = await importFreshAutoMigrateModule() + // when const result = autoMigrateLegacyPluginEntry(testConfigDir) @@ -57,13 +63,15 @@ describe("autoMigrateLegacyPluginEntry", () => { }) describe("#given both canonical and legacy entries exist", () => { - it("#then removes legacy entry and keeps canonical", () => { + it("#then removes legacy entry and keeps canonical", async () => { // given writeFileSync( join(testConfigDir, "opencode.json"), JSON.stringify({ plugin: ["oh-my-openagent", "oh-my-opencode"] }, null, 2) + "\n", ) + const { autoMigrateLegacyPluginEntry } = await importFreshAutoMigrateModule() + // when const result = autoMigrateLegacyPluginEntry(testConfigDir) @@ -75,8 +83,9 @@ describe("autoMigrateLegacyPluginEntry", () => { }) describe("#given no config file exists", () => { - it("#then returns migrated false", () => { + it("#then returns migrated false", async () => { // given - empty dir + const { autoMigrateLegacyPluginEntry } = await importFreshAutoMigrateModule() // when const result = autoMigrateLegacyPluginEntry(testConfigDir) @@ -88,13 +97,15 @@ describe("autoMigrateLegacyPluginEntry", () => { }) describe("#given opencode.jsonc has comments and a legacy entry", () => { - it("#then preserves comments and replaces entry", () => { + it("#then preserves comments and replaces entry", async () => { // given writeFileSync( join(testConfigDir, "opencode.jsonc"), '{\n // my config\n "plugin": ["oh-my-opencode"]\n}\n', ) + const { autoMigrateLegacyPluginEntry } = await importFreshAutoMigrateModule() + // when const result = autoMigrateLegacyPluginEntry(testConfigDir) @@ -108,11 +119,13 @@ describe("autoMigrateLegacyPluginEntry", () => { }) describe("#given only canonical entry exists", () => { - it("#then returns migrated false and leaves file untouched", () => { + it("#then returns migrated false and leaves file untouched", async () => { // given const original = JSON.stringify({ plugin: ["oh-my-openagent"] }, null, 2) + "\n" writeFileSync(join(testConfigDir, "opencode.json"), original) + const { autoMigrateLegacyPluginEntry } = await importFreshAutoMigrateModule() + // when const result = autoMigrateLegacyPluginEntry(testConfigDir) diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts index e045de72c..b4b6932a0 100644 --- a/src/hooks/preemptive-compaction.test.ts +++ b/src/hooks/preemptive-compaction.test.ts @@ -669,4 +669,43 @@ describe("preemptive-compaction", () => { expect(ctx.client.session.summarize).toHaveBeenCalled() }) + + it("should ignore stale cached Anthropic limits for older models", async () => { + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000) + + const hook = createPreemptiveCompactionHook(ctx as never, {} as never, { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + const sessionID = "ses_old_anthropic_limit" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + modelID: "claude-sonnet-4-5", + finish: true, + tokens: { + input: 170000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + { title: "", output: "test", metadata: null } + ) + + expect(ctx.client.session.summarize).toHaveBeenCalled() + }) }) diff --git a/src/hooks/ralph-loop/completion-promise-detector.test.ts b/src/hooks/ralph-loop/completion-promise-detector.test.ts index bbb2206fb..165776d0c 100644 --- a/src/hooks/ralph-loop/completion-promise-detector.test.ts +++ b/src/hooks/ralph-loop/completion-promise-detector.test.ts @@ -186,7 +186,7 @@ describe("detectCompletionInSessionMessages", () => { }) describe("#given semantic completion patterns", () => { - test("#when agent says 'task is complete' #then should detect semantic completion", async () => { + test("#when agent says 'task is complete' without explicit promise #then should NOT detect completion", async () => { // #given const messages: SessionMessage[] = [ { @@ -205,10 +205,10 @@ describe("detectCompletionInSessionMessages", () => { }) // #then - expect(detected).toBe(true) + expect(detected).toBe(false) }) - test("#when agent says 'all items are done' #then should detect semantic completion", async () => { + test("#when agent says 'all items are done' without explicit promise #then should NOT detect completion", async () => { // #given const messages: SessionMessage[] = [ { @@ -227,10 +227,10 @@ describe("detectCompletionInSessionMessages", () => { }) // #then - expect(detected).toBe(true) + expect(detected).toBe(false) }) - test("#when agent says 'nothing left to do' #then should detect semantic completion", async () => { + test("#when agent says 'nothing left to do' without explicit promise #then should NOT detect completion", async () => { // #given const messages: SessionMessage[] = [ { @@ -249,10 +249,10 @@ describe("detectCompletionInSessionMessages", () => { }) // #then - expect(detected).toBe(true) + expect(detected).toBe(false) }) - test("#when agent says 'successfully completed all' #then should detect semantic completion", async () => { + test("#when agent says 'successfully completed all' without explicit promise #then should NOT detect completion", async () => { // #given const messages: SessionMessage[] = [ { @@ -271,7 +271,7 @@ describe("detectCompletionInSessionMessages", () => { }) // #then - expect(detected).toBe(true) + expect(detected).toBe(false) }) test("#when promise is VERIFIED #then semantic completion should NOT trigger", async () => { @@ -295,6 +295,75 @@ describe("detectCompletionInSessionMessages", () => { // #then expect(detected).toBe(false) }) + + test("#when completion text appears inside a quote #then should NOT detect completion", async () => { + // #given + const messages: SessionMessage[] = [ + { + info: { role: "assistant" }, + parts: [{ type: "text", text: 'The user wrote: "the task is complete". I am still working.' }], + }, + ] + const ctx = createPluginInput(messages) + + // #when + const detected = await detectCompletionInSessionMessages(ctx, { + sessionID: "session-quoted", + promise: "DONE", + apiTimeoutMs: 1000, + directory: "/tmp", + }) + + // #then + expect(detected).toBe(false) + }) + + test("#when tool_result says all items are complete #then should NOT detect completion", async () => { + // #given + const messages: SessionMessage[] = [ + { + info: { role: "assistant" }, + parts: [ + { type: "tool_result", text: "Background agent report: all items are complete." }, + { type: "text", text: "Still validating the final behavior." }, + ], + }, + ] + const ctx = createPluginInput(messages) + + // #when + const detected = await detectCompletionInSessionMessages(ctx, { + sessionID: "session-tool-result-semantic", + promise: "DONE", + apiTimeoutMs: 1000, + directory: "/tmp", + }) + + // #then + expect(detected).toBe(false) + }) + + test("#when assistant says complete but not actually done #then should NOT detect completion", async () => { + // #given + const messages: SessionMessage[] = [ + { + info: { role: "assistant" }, + parts: [{ type: "text", text: "The implementation looks complete, but I still need to run the tests." }], + }, + ] + const ctx = createPluginInput(messages) + + // #when + const detected = await detectCompletionInSessionMessages(ctx, { + sessionID: "session-not-actually-done", + promise: "DONE", + apiTimeoutMs: 1000, + directory: "/tmp", + }) + + // #then + expect(detected).toBe(false) + }) }) }) diff --git a/src/hooks/ralph-loop/completion-promise-detector.ts b/src/hooks/ralph-loop/completion-promise-detector.ts index 57b85ca9c..93f709166 100644 --- a/src/hooks/ralph-loop/completion-promise-detector.ts +++ b/src/hooks/ralph-loop/completion-promise-detector.ts @@ -39,6 +39,8 @@ const SEMANTIC_COMPLETION_PATTERNS = [ /\bnothing\s+(?:left|more|remaining)\s+to\s+(?:do|implement|fix)\b/i, ] +const SEMANTIC_DONE_FALLBACK_ENABLED = false + export function detectSemanticCompletion(text: string): boolean { return SEMANTIC_COMPLETION_PATTERNS.some((pattern) => pattern.test(text)) } @@ -65,9 +67,8 @@ export function detectCompletionInTranscript( const entryText = extractTranscriptEntryText(entry) if (!entryText) continue if (pattern.test(entryText)) return true - // Fallback: semantic completion only for DONE promise and assistant entries const isAssistantEntry = entry.type === "assistant" || entry.type === "text" - if (promise === "DONE" && isAssistantEntry && detectSemanticCompletion(entryText)) { + if (SEMANTIC_DONE_FALLBACK_ENABLED && promise === "DONE" && isAssistantEntry && detectSemanticCompletion(entryText)) { log("[ralph-loop] WARNING: Semantic completion detected in transcript (agent used natural language instead of DONE)") return true } @@ -135,8 +136,7 @@ export async function detectCompletionInSessionMessages( return true } - // Fallback: semantic completion only for DONE promise - if (options.promise === "DONE" && detectSemanticCompletion(responseText)) { + if (SEMANTIC_DONE_FALLBACK_ENABLED && options.promise === "DONE" && detectSemanticCompletion(responseText)) { log("[ralph-loop] WARNING: Semantic completion detected (agent used natural language instead of DONE)", { sessionID: options.sessionID, }) diff --git a/src/hooks/session-recovery/storage/readers-from-sdk.test.ts b/src/hooks/session-recovery/storage/readers-from-sdk.test.ts index e3194576f..4b63cad6b 100644 --- a/src/hooks/session-recovery/storage/readers-from-sdk.test.ts +++ b/src/hooks/session-recovery/storage/readers-from-sdk.test.ts @@ -1,7 +1,13 @@ import { describe, expect, it } from "bun:test" -import { readMessagesFromSDK, readPartsFromSDK } from "../storage" -import { readMessages } from "./messages-reader" -import { readParts } from "./parts-reader" +async function importFreshReaders() { + const token = `${Date.now()}-${Math.random()}` + const [{ readMessagesFromSDK, readMessages }, { readPartsFromSDK, readParts }] = await Promise.all([ + import(`./messages-reader?test=${token}`), + import(`./parts-reader?test=${token}`), + ]) + + return { readMessagesFromSDK, readPartsFromSDK, readMessages, readParts } +} function createMockClient(handlers: { messages?: (sessionID: string) => unknown[] @@ -28,6 +34,7 @@ function createMockClient(handlers: { describe("session-recovery storage SDK readers", () => { it("readPartsFromSDK returns empty array when fetch fails", async () => { //#given a client that throws on request + const { readPartsFromSDK } = await importFreshReaders() const client = createMockClient({}) as Parameters[0] //#when readPartsFromSDK is called @@ -39,6 +46,7 @@ describe("session-recovery storage SDK readers", () => { it("readPartsFromSDK returns stored parts from SDK response", async () => { //#given a client that returns a message with parts + const { readPartsFromSDK } = await importFreshReaders() const sessionID = "ses_test" const messageID = "msg_test" const storedParts = [ @@ -58,6 +66,7 @@ describe("session-recovery storage SDK readers", () => { it("readMessagesFromSDK normalizes and sorts messages", async () => { //#given a client that returns messages list + const { readMessagesFromSDK } = await importFreshReaders() const sessionID = "ses_test" const client = createMockClient({ messages: () => [ @@ -78,8 +87,9 @@ describe("session-recovery storage SDK readers", () => { ]) }) - it("readParts returns empty array for nonexistent message", () => { + it("readParts returns empty array for nonexistent message", async () => { //#given a message ID that has no stored parts + const { readParts } = await importFreshReaders() //#when readParts is called const parts = readParts("msg_nonexistent") @@ -87,8 +97,9 @@ describe("session-recovery storage SDK readers", () => { expect(parts).toEqual([]) }) - it("readMessages returns empty array for nonexistent session", () => { + it("readMessages returns empty array for nonexistent session", async () => { //#given a session ID that has no stored messages + const { readMessages } = await importFreshReaders() //#when readMessages is called const messages = readMessages("ses_nonexistent") diff --git a/src/hooks/start-work/index.test.ts b/src/hooks/start-work/index.test.ts index ddea999a0..1b673b7b3 100644 --- a/src/hooks/start-work/index.test.ts +++ b/src/hooks/start-work/index.test.ts @@ -12,7 +12,6 @@ import { import type { BoulderState } from "../../features/boulder-state" import * as sessionState from "../../features/claude-code-session-state" import * as worktreeDetector from "./worktree-detector" -import * as worktreeDetector from "./worktree-detector" describe("start-work hook", () => { let testDir: string @@ -26,6 +25,9 @@ describe("start-work hook", () => { } beforeEach(() => { + sessionState._resetForTesting() + sessionState.registerAgentName("atlas") + sessionState.registerAgentName("sisyphus") testDir = join(tmpdir(), `start-work-test-${randomUUID()}`) sisyphusDir = join(testDir, ".sisyphus") if (!existsSync(testDir)) { @@ -38,6 +40,7 @@ describe("start-work hook", () => { }) afterEach(() => { + sessionState._resetForTesting() clearBoulderState(testDir) if (existsSync(testDir)) { rmSync(testDir, { recursive: true, force: true }) @@ -409,7 +412,7 @@ describe("start-work hook", () => { // given const hook = createStartWorkHook(createMockPluginInput()) const output = { - message: {}, + message: {} as Record, parts: [{ type: "text", text: "" }], } @@ -422,6 +425,29 @@ describe("start-work hook", () => { // then expect(output.message.agent).toBe("Atlas (Plan Executor)") }) + + test("should keep the current agent when Atlas is unavailable", async () => { + // given + sessionState._resetForTesting() + sessionState.registerAgentName("sisyphus") + sessionState.updateSessionAgent("ses-prometheus-to-sisyphus", "sisyphus") + + const hook = createStartWorkHook(createMockPluginInput()) + const output = { + message: {} as Record, + parts: [{ type: "text", text: "" }], + } + + // when + await hook["chat.message"]( + { sessionID: "ses-prometheus-to-sisyphus" }, + output + ) + + // then + expect(output.message.agent).toBe("Sisyphus (Ultraworker)") + expect(sessionState.getSessionAgent("ses-prometheus-to-sisyphus")).toBe("sisyphus") + }) }) describe("worktree support", () => { diff --git a/src/hooks/start-work/start-work-hook.ts b/src/hooks/start-work/start-work-hook.ts index 8eb58d2fa..ef41fb3b1 100644 --- a/src/hooks/start-work/start-work-hook.ts +++ b/src/hooks/start-work/start-work-hook.ts @@ -12,7 +12,7 @@ import { } from "../../features/boulder-state" import { log } from "../../shared/logger" import { getAgentDisplayName } from "../../shared/agent-display-names" -import { updateSessionAgent, isAgentRegistered } from "../../features/claude-code-session-state" +import { getSessionAgent, isAgentRegistered, updateSessionAgent } from "../../features/claude-code-session-state" import { detectWorktreePath } from "./worktree-detector" import { parseUserRequest } from "./parse-user-request" @@ -80,14 +80,13 @@ export function createStartWorkHook(ctx: PluginInput) { if (!promptText.includes("")) return log(`[${HOOK_NAME}] Processing start-work command`, { sessionID: input.sessionID }) - const atlasDisplayName = getAgentDisplayName("atlas") - if (isAgentRegistered("atlas") || isAgentRegistered(atlasDisplayName)) { - updateSessionAgent(input.sessionID, "atlas") - if (output.message) { - output.message["agent"] = atlasDisplayName - } - } else { - log(`[${HOOK_NAME}] Atlas agent not available, continuing with current agent`, { sessionID: input.sessionID }) + const activeAgent = isAgentRegistered("atlas") + ? "atlas" + : getSessionAgent(input.sessionID) ?? "sisyphus" + const activeAgentDisplayName = getAgentDisplayName(activeAgent) + updateSessionAgent(input.sessionID, activeAgent) + if (output.message) { + output.message["agent"] = activeAgentDisplayName } const existingState = readBoulderState(ctx.directory) @@ -116,7 +115,7 @@ The requested plan "${getPlanName(matchedPlan)}" has been completed. All ${progress.total} tasks are done. Create a new plan with: /plan "your task"` } else { if (existingState) clearBoulderState(ctx.directory) - const newState = createBoulderState(matchedPlan, sessionId, "atlas", worktreePath) + const newState = createBoulderState(matchedPlan, sessionId, activeAgent, worktreePath) writeBoulderState(ctx.directory, newState) contextInfo = ` @@ -223,7 +222,7 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta } else if (incompletePlans.length === 1) { const planPath = incompletePlans[0] const progress = getPlanProgress(planPath) - const newState = createBoulderState(planPath, sessionId, "atlas", worktreePath) + const newState = createBoulderState(planPath, sessionId, activeAgent, worktreePath) writeBoulderState(ctx.directory, newState) contextInfo += ` diff --git a/src/hooks/tasks-todowrite-disabler/hook.ts b/src/hooks/tasks-todowrite-disabler/hook.ts index d9f7d1afb..9449cfea8 100644 --- a/src/hooks/tasks-todowrite-disabler/hook.ts +++ b/src/hooks/tasks-todowrite-disabler/hook.ts @@ -9,7 +9,7 @@ export interface TasksTodowriteDisablerConfig { export function createTasksTodowriteDisablerHook( config: TasksTodowriteDisablerConfig, ) { - const isTaskSystemEnabled = config.experimental?.task_system ?? false; + const isTaskSystemEnabled = config.experimental?.task_system ?? true; return { "tool.execute.before": async ( diff --git a/src/hooks/tasks-todowrite-disabler/index.test.ts b/src/hooks/tasks-todowrite-disabler/index.test.ts index d6cd44ad5..ebb7bb798 100644 --- a/src/hooks/tasks-todowrite-disabler/index.test.ts +++ b/src/hooks/tasks-todowrite-disabler/index.test.ts @@ -59,7 +59,7 @@ describe("tasks-todowrite-disabler", () => { }) }) - describe("when experimental.task_system is disabled or undefined", () => { + describe("when experimental.task_system is disabled", () => { test("should not block TodoWrite when flag is false", async () => { // given const hook = createTasksTodowriteDisablerHook({ experimental: { task_system: false } }) @@ -78,7 +78,7 @@ describe("tasks-todowrite-disabler", () => { ).resolves.toBeUndefined() }) - test("should not block TodoWrite when experimental is undefined", async () => { + test("should block TodoWrite when experimental is undefined because task_system defaults to enabled", async () => { // given const hook = createTasksTodowriteDisablerHook({}) const input = { @@ -93,7 +93,7 @@ describe("tasks-todowrite-disabler", () => { // when / then await expect( hook["tool.execute.before"](input, output) - ).resolves.toBeUndefined() + ).rejects.toThrow("TodoRead/TodoWrite are DISABLED") }) test("should not block TodoRead when flag is false", async () => { diff --git a/src/plugin-config.test.ts b/src/plugin-config.test.ts index 47e1cf537..242b9cf1a 100644 --- a/src/plugin-config.test.ts +++ b/src/plugin-config.test.ts @@ -246,7 +246,13 @@ describe("parseConfigPartially", () => { const result = parseConfigPartially({}); expect(result).not.toBeNull(); - expect(Object.keys(result!).length).toBe(0); + expect(result).toEqual({ + git_master: { + commit_footer: true, + include_co_authored_by: true, + git_env_prefix: "GIT_MASTER=1", + }, + }); }); }); diff --git a/src/plugin-handlers/agent-config-handler.test.ts b/src/plugin-handlers/agent-config-handler.test.ts index 6cb7514ed..9be307ef4 100644 --- a/src/plugin-handlers/agent-config-handler.test.ts +++ b/src/plugin-handlers/agent-config-handler.test.ts @@ -290,7 +290,7 @@ describe("applyAgentConfig builtin override protection", () => { }) // then - expect(createSisyphusJuniorAgentSpy).toHaveBeenCalledWith(undefined, "openai/gpt-5.4", false) + expect(createSisyphusJuniorAgentSpy).toHaveBeenCalledWith(undefined, "openai/gpt-5.4", true) }) test("includes project and global .agents skills in builtin agent awareness", async () => { diff --git a/src/plugin-handlers/agent-config-handler.ts b/src/plugin-handlers/agent-config-handler.ts index 14993cda3..8f45d7239 100644 --- a/src/plugin-handlers/agent-config-handler.ts +++ b/src/plugin-handlers/agent-config-handler.ts @@ -90,7 +90,7 @@ export async function applyAgentConfig(params: { params.pluginConfig.browser_automation_engine?.provider ?? "playwright"; const currentModel = params.config.model as string | undefined; const disabledSkills = new Set(params.pluginConfig.disabled_skills ?? []); - const useTaskSystem = params.pluginConfig.experimental?.task_system ?? false; + const useTaskSystem = params.pluginConfig.experimental?.task_system ?? true; const disableOmoEnv = params.pluginConfig.experimental?.disable_omo_env ?? false; const includeClaudeAgents = params.pluginConfig.claude_code?.agents ?? true; diff --git a/src/plugin-handlers/config-handler.test.ts b/src/plugin-handlers/config-handler.test.ts index c3e7a91d8..4d33d5d9f 100644 --- a/src/plugin-handlers/config-handler.test.ts +++ b/src/plugin-handlers/config-handler.test.ts @@ -1243,7 +1243,7 @@ describe("per-agent todowrite/todoread deny when task_system enabled", () => { expect(agentResult[getAgentDisplayName("hephaestus")]?.permission?.todoread).toBeUndefined() }) - test("does not deny todowrite/todoread when task_system is undefined", async () => { + test("denies todowrite/todoread when task_system is undefined", async () => { //#given const createBuiltinAgentsMock = agents.createBuiltinAgents as unknown as { mockResolvedValue: (value: Record) => void @@ -1271,8 +1271,8 @@ describe("per-agent todowrite/todoread deny when task_system enabled", () => { //#then const agentResult = config.agent as Record }> - expect(agentResult[getAgentDisplayName("sisyphus")]?.permission?.todowrite).toBeUndefined() - expect(agentResult[getAgentDisplayName("sisyphus")]?.permission?.todoread).toBeUndefined() + expect(agentResult[getAgentDisplayName("sisyphus")]?.permission?.todowrite).toBe("deny") + expect(agentResult[getAgentDisplayName("sisyphus")]?.permission?.todoread).toBe("deny") }) }) diff --git a/src/plugin-handlers/tool-config-handler.test.ts b/src/plugin-handlers/tool-config-handler.test.ts index 3eaa8afe3..a868a8d2e 100644 --- a/src/plugin-handlers/tool-config-handler.test.ts +++ b/src/plugin-handlers/tool-config-handler.test.ts @@ -15,7 +15,7 @@ function createParams(overrides: { return { config: { tools: {}, permission: {} } as Record, pluginConfig: { - experimental: { task_system: overrides.taskSystem ?? false }, + experimental: overrides.taskSystem === undefined ? undefined : { task_system: overrides.taskSystem }, disabled_tools: overrides.disabledTools, } as OhMyOpenCodeConfig, agentResult: agentResult as Record, @@ -216,6 +216,30 @@ describe("applyToolConfig", () => { }) }) + describe("#given task_system is undefined", () => { + describe("#when applying tool config", () => { + it.each([ + "atlas", + "sisyphus", + "hephaestus", + "prometheus", + "sisyphus-junior", + ])("#then should deny todo tools for %s agent by default", (agentName) => { + const params = createParams({ + agents: [agentName], + }) + + applyToolConfig(params) + + const agent = params.agentResult[agentName] as { + permission: Record + } + expect(agent.permission.todowrite).toBe("deny") + expect(agent.permission.todoread).toBe("deny") + }) + }) + }) + describe("#given disabled_tools includes 'question'", () => { let originalConfigContent: string | undefined let originalCliRunMode: string | undefined diff --git a/src/plugin-handlers/tool-config-handler.ts b/src/plugin-handlers/tool-config-handler.ts index 633d32577..33f30b352 100644 --- a/src/plugin-handlers/tool-config-handler.ts +++ b/src/plugin-handlers/tool-config-handler.ts @@ -15,7 +15,7 @@ function getConfigQuestionPermission(): string | null { } function agentByKey(agentResult: Record, key: string): AgentWithPermission | undefined { - return (agentResult[key] ?? agentResult[getAgentDisplayName(key)]) as + return (agentResult[getAgentDisplayName(key)] ?? agentResult[key]) as | AgentWithPermission | undefined; } @@ -25,7 +25,8 @@ export function applyToolConfig(params: { pluginConfig: OhMyOpenCodeConfig; agentResult: Record; }): void { - const denyTodoTools = params.pluginConfig.experimental?.task_system + const taskSystemEnabled = params.pluginConfig.experimental?.task_system ?? true + const denyTodoTools = taskSystemEnabled ? { todowrite: "deny", todoread: "deny" } : {} @@ -40,7 +41,7 @@ export function applyToolConfig(params: { LspCodeActionResolve: false, "task_*": false, teammate: false, - ...(params.pluginConfig.experimental?.task_system + ...(taskSystemEnabled ? { todowrite: false, todoread: false } : {}), ...(skillDeniedByHost diff --git a/src/plugin/tool-execute-before.test.ts b/src/plugin/tool-execute-before.test.ts index 7383bfb63..06303504e 100644 --- a/src/plugin/tool-execute-before.test.ts +++ b/src/plugin/tool-execute-before.test.ts @@ -1,6 +1,7 @@ const { describe, expect, test } = require("bun:test") const { createToolExecuteBeforeHandler } = require("./tool-execute-before") const { createToolRegistry } = require("./tool-registry") +const { builtinTools } = require("../tools") describe("createToolExecuteBeforeHandler", () => { test("does not execute subagent question blocker hook for question tool", async () => { @@ -268,6 +269,44 @@ describe("createToolRegistry", () => { }) }) }) + + describe("#given max_tools is lower than or equal to builtin tool count", () => { + describe("#when creating the tool registry", () => { + test("#then it trims to the exact configured cap", () => { + const result = createToolRegistry( + createRegistryInput({ + experimental: { max_tools: Object.keys(builtinTools).length }, + }), + ) + + expect(Object.keys(result.filteredTools)).toHaveLength(Object.keys(builtinTools).length) + }) + }) + }) + + describe("#given max_tools is set below the full plugin tool count", () => { + describe("#when creating the tool registry", () => { + test("#then it enforces the exact cap deterministically", () => { + const result = createToolRegistry( + createRegistryInput({ + experimental: { max_tools: 10 }, + }), + ) + + expect(Object.keys(result.filteredTools)).toHaveLength(10) + }) + + test("#then it keeps the task tool when lower-priority tools can satisfy the cap", () => { + const result = createToolRegistry( + createRegistryInput({ + experimental: { max_tools: 10 }, + }), + ) + + expect(result.filteredTools.task).toBeDefined() + }) + }) + }) }) export {} diff --git a/src/plugin/tool-registry.ts b/src/plugin/tool-registry.ts index f13546977..c29ecfce6 100644 --- a/src/plugin/tool-registry.ts +++ b/src/plugin/tool-registry.ts @@ -40,6 +40,63 @@ export type ToolRegistryResult = { taskSystemEnabled: boolean } +const LOW_PRIORITY_TOOL_ORDER = [ + "session_list", + "session_read", + "session_search", + "session_info", + "interactive_bash", + "look_at", + "call_omo_agent", + "task_create", + "task_get", + "task_list", + "task_update", + "background_output", + "background_cancel", + "hashline_edit", + "ast_grep_replace", + "ast_grep_search", + "glob", + "grep", + "skill_mcp", + "skill", + "task", + "lsp_rename", + "lsp_prepare_rename", + "lsp_find_references", + "lsp_goto_definition", + "lsp_symbols", + "lsp_diagnostics", +] as const + +function trimToolsToCap(filteredTools: ToolsRecord, maxTools: number): void { + const toolNames = Object.keys(filteredTools) + if (toolNames.length <= maxTools) return + + const removableToolNames = [ + ...LOW_PRIORITY_TOOL_ORDER.filter((toolName) => toolNames.includes(toolName)), + ...toolNames + .filter((toolName) => !LOW_PRIORITY_TOOL_ORDER.includes(toolName as (typeof LOW_PRIORITY_TOOL_ORDER)[number])) + .sort(), + ] + + let currentCount = toolNames.length + let removed = 0 + + for (const toolName of removableToolNames) { + if (currentCount <= maxTools) break + if (!filteredTools[toolName]) continue + delete filteredTools[toolName] + currentCount -= 1 + removed += 1 + } + + log( + `[tool-registry] Trimmed ${removed} tools to satisfy max_tools=${maxTools}. Final plugin tool count=${currentCount}.`, + ) +} + export function createToolRegistry(args: { ctx: PluginContext pluginConfig: OhMyOpenCodeConfig @@ -158,29 +215,7 @@ export function createToolRegistry(args: { const maxTools = pluginConfig.experimental?.max_tools if (maxTools) { - const estimatedBuiltinTools = 20 - const pluginToolBudget = maxTools - estimatedBuiltinTools - const toolEntries = Object.entries(filteredTools) - if (pluginToolBudget > 0 && toolEntries.length > pluginToolBudget) { - const excess = toolEntries.length - pluginToolBudget - log(`[tool-registry] Tool count (${toolEntries.length} plugin + ~${estimatedBuiltinTools} builtin = ~${toolEntries.length + estimatedBuiltinTools}) exceeds max_tools=${maxTools}. Trimming ${excess} lower-priority tools.`) - const lowPriorityTools = [ - "session_list", "session_read", "session_search", "session_info", - "call_omo_agent", "interactive_bash", "look_at", - "task_create", "task_get", "task_list", "task_update", - ] - let removed = 0 - for (const toolName of lowPriorityTools) { - if (removed >= excess) break - if (filteredTools[toolName]) { - delete filteredTools[toolName] - removed += 1 - } - } - if (removed < excess) { - log(`[tool-registry] WARNING: Could not trim enough tools. ${toolEntries.length - removed} plugin tools remain.`) - } - } + trimToolsToCap(filteredTools, maxTools) } return { diff --git a/src/shared/context-limit-resolver.test.ts b/src/shared/context-limit-resolver.test.ts index a55209a5e..b6a8f6d9a 100644 --- a/src/shared/context-limit-resolver.test.ts +++ b/src/shared/context-limit-resolver.test.ts @@ -45,7 +45,7 @@ describe("resolveActualContextLimit", () => { expect(actualLimit).toBe(1_000_000) }) - it("returns cached limit for Anthropic models when modelContextLimitsCache has entry", () => { + it("returns default 200K for older Anthropic models when 1M mode is disabled", () => { // given delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] delete process.env[VERTEX_CONTEXT_ENV_KEY] @@ -59,7 +59,7 @@ describe("resolveActualContextLimit", () => { }) // then - expect(actualLimit).toBe(500_000) + expect(actualLimit).toBe(200_000) }) it("returns default 200K for Anthropic models without cached limit and 1M mode disabled", () => { @@ -126,6 +126,40 @@ describe("resolveActualContextLimit", () => { expect(actualLimit).toBe(1_000_000) }) + it("supports Anthropic 4.6 high-variant model IDs without widening older models", () => { + // given + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + delete process.env[VERTEX_CONTEXT_ENV_KEY] + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("anthropic/claude-sonnet-4-6-high", 500_000) + + // when + const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-6-high", { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + + // then + expect(actualLimit).toBe(500_000) + }) + + it("ignores stale cached limits for older Anthropic models with suffixed IDs", () => { + // given + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + delete process.env[VERTEX_CONTEXT_ENV_KEY] + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("anthropic/claude-sonnet-4-5-high", 500_000) + + // when + const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5-high", { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + + // then + expect(actualLimit).toBe(200_000) + }) + it("returns null for non-Anthropic providers without a cached limit", () => { // given delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] diff --git a/src/shared/context-limit-resolver.ts b/src/shared/context-limit-resolver.ts index e714989a5..2bf2a8147 100644 --- a/src/shared/context-limit-resolver.ts +++ b/src/shared/context-limit-resolver.ts @@ -19,6 +19,10 @@ function getAnthropicActualLimit(modelCacheState?: ContextLimitModelCacheState): : DEFAULT_ANTHROPIC_ACTUAL_LIMIT } +function supportsCachedAnthropicLimit(modelID: string): boolean { + return /^claude-(opus|sonnet)-4(?:-|\.)6(?:-high)?$/.test(modelID) +} + export function resolveActualContextLimit( providerID: string, modelID: string, @@ -29,7 +33,7 @@ export function resolveActualContextLimit( if (explicit1M === 1_000_000) return explicit1M const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) - if (cachedLimit) return cachedLimit + if (cachedLimit && supportsCachedAnthropicLimit(modelID)) return cachedLimit return DEFAULT_ANTHROPIC_ACTUAL_LIMIT } diff --git a/src/shared/migrate-legacy-plugin-entry.test.ts b/src/shared/migrate-legacy-plugin-entry.test.ts index 8a51080d2..544e245bc 100644 --- a/src/shared/migrate-legacy-plugin-entry.test.ts +++ b/src/shared/migrate-legacy-plugin-entry.test.ts @@ -2,7 +2,10 @@ import { afterEach, beforeEach, describe, expect, it } from "bun:test" import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs" import { tmpdir } from "node:os" import { join } from "node:path" -import { migrateLegacyPluginEntry } from "./migrate-legacy-plugin-entry" + +async function importFreshMigrationModule(): Promise { + return import(`./migrate-legacy-plugin-entry?test=${Date.now()}-${Math.random()}`) +} describe("migrateLegacyPluginEntry", () => { let testDir = "" @@ -18,9 +21,10 @@ describe("migrateLegacyPluginEntry", () => { describe("#given opencode.json contains oh-my-opencode plugin entry", () => { describe("#when migrating the config", () => { - it("#then replaces oh-my-opencode with oh-my-openagent", () => { + it("#then replaces oh-my-opencode with oh-my-openagent", async () => { const configPath = join(testDir, "opencode.json") writeFileSync(configPath, JSON.stringify({ plugin: ["oh-my-opencode@latest"] }, null, 2)) + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() const result = migrateLegacyPluginEntry(configPath) @@ -34,9 +38,10 @@ describe("migrateLegacyPluginEntry", () => { describe("#given opencode.json contains bare oh-my-opencode entry", () => { describe("#when migrating the config", () => { - it("#then replaces with oh-my-openagent", () => { + it("#then replaces with oh-my-openagent", async () => { const configPath = join(testDir, "opencode.json") writeFileSync(configPath, JSON.stringify({ plugin: ["oh-my-opencode"] }, null, 2)) + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() const result = migrateLegacyPluginEntry(configPath) @@ -50,9 +55,10 @@ describe("migrateLegacyPluginEntry", () => { describe("#given opencode.json contains pinned oh-my-opencode version", () => { describe("#when migrating the config", () => { - it("#then preserves the version pin", () => { + it("#then preserves the version pin", async () => { const configPath = join(testDir, "opencode.json") writeFileSync(configPath, JSON.stringify({ plugin: ["oh-my-opencode@3.11.0"] }, null, 2)) + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() const result = migrateLegacyPluginEntry(configPath) @@ -65,10 +71,11 @@ describe("migrateLegacyPluginEntry", () => { describe("#given opencode.json already uses oh-my-openagent", () => { describe("#when checking for migration", () => { - it("#then returns false and does not modify the file", () => { + it("#then returns false and does not modify the file", async () => { const configPath = join(testDir, "opencode.json") const original = JSON.stringify({ plugin: ["oh-my-openagent@latest"] }, null, 2) writeFileSync(configPath, original) + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() const result = migrateLegacyPluginEntry(configPath) @@ -78,9 +85,89 @@ describe("migrateLegacyPluginEntry", () => { }) }) + describe("#given plugin entries contain both canonical and legacy values", () => { + describe("#when migrating the config", () => { + it("#then removes the legacy entry instead of duplicating the canonical one", async () => { + const configPath = join(testDir, "opencode.json") + writeFileSync(configPath, JSON.stringify({ plugin: ["oh-my-openagent", "oh-my-opencode"] }, null, 2)) + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() + + const result = migrateLegacyPluginEntry(configPath) + + expect(result).toBe(true) + const saved = JSON.parse(readFileSync(configPath, "utf-8")) as { plugin: string[] } + expect(saved.plugin).toEqual(["oh-my-openagent"]) + }) + }) + }) + + describe("#given unrelated strings contain the legacy package name", () => { + describe("#when migrating the config", () => { + it("#then rewrites only plugin entries and preserves unrelated fields", async () => { + const configPath = join(testDir, "opencode.json") + writeFileSync( + configPath, + JSON.stringify( + { + plugin: ["oh-my-opencode"], + notes: "keep oh-my-opencode in this text field", + paths: ["/tmp/oh-my-opencode/cache"], + }, + null, + 2, + ), + ) + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() + + const result = migrateLegacyPluginEntry(configPath) + + expect(result).toBe(true) + const saved = JSON.parse(readFileSync(configPath, "utf-8")) as { + plugin: string[] + notes: string + paths: string[] + } + expect(saved.plugin).toEqual(["oh-my-openagent"]) + expect(saved.notes).toBe("keep oh-my-opencode in this text field") + expect(saved.paths).toEqual(["/tmp/oh-my-opencode/cache"]) + }) + }) + }) + + describe("#given opencode.jsonc contains a nested plugin key before the top-level plugin array", () => { + describe("#when migrating the config", () => { + it("#then rewrites only the top-level plugin array", async () => { + const configPath = join(testDir, "opencode.jsonc") + writeFileSync( + configPath, + `{ + "nested": { + "plugin": ["oh-my-opencode"] + }, + "plugin": ["oh-my-opencode@latest"] +} +`, + ) + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() + + const result = migrateLegacyPluginEntry(configPath) + + expect(result).toBe(true) + const content = readFileSync(configPath, "utf-8") + expect(content).toContain(`"nested": { + "plugin": ["oh-my-opencode"] + }`) + expect(content).toContain(`"plugin": [ + "oh-my-openagent@latest" + ]`) + }) + }) + }) + describe("#given config file does not exist", () => { describe("#when attempting migration", () => { - it("#then returns false", () => { + it("#then returns false", async () => { + const { migrateLegacyPluginEntry } = await importFreshMigrationModule() const result = migrateLegacyPluginEntry(join(testDir, "nonexistent.json")) expect(result).toBe(false) diff --git a/src/shared/migrate-legacy-plugin-entry.ts b/src/shared/migrate-legacy-plugin-entry.ts index 9d38c9071..1eee6ae2e 100644 --- a/src/shared/migrate-legacy-plugin-entry.ts +++ b/src/shared/migrate-legacy-plugin-entry.ts @@ -1,8 +1,54 @@ import { existsSync, readFileSync, writeFileSync } from "node:fs" +import { applyEdits, modify } from "jsonc-parser" +import { parseJsoncSafe } from "./jsonc-parser" import { log } from "./logger" import { LEGACY_PLUGIN_NAME, PLUGIN_NAME } from "./plugin-identity" +interface OpenCodeConfig { + plugin?: string[] +} + +function isLegacyEntry(entry: string): boolean { + return entry === LEGACY_PLUGIN_NAME || entry.startsWith(`${LEGACY_PLUGIN_NAME}@`) +} + +function isCanonicalEntry(entry: string): boolean { + return entry === PLUGIN_NAME || entry.startsWith(`${PLUGIN_NAME}@`) +} + +function toCanonicalEntry(entry: string): string { + if (entry === LEGACY_PLUGIN_NAME) return PLUGIN_NAME + if (entry.startsWith(`${LEGACY_PLUGIN_NAME}@`)) { + return `${PLUGIN_NAME}${entry.slice(LEGACY_PLUGIN_NAME.length)}` + } + return entry +} + +function normalizePluginEntries(entries: string[]): string[] { + const hasCanonical = entries.some(isCanonicalEntry) + + if (hasCanonical) { + return entries.filter((entry) => !isLegacyEntry(entry)) + } + + return entries.map((entry) => (isLegacyEntry(entry) ? toCanonicalEntry(entry) : entry)) +} + +function updateJsoncPluginArray(content: string, pluginEntries: string[]): string | null { + const edits = modify(content, ["plugin"], pluginEntries, { + formattingOptions: { + insertSpaces: true, + tabSize: 2, + eol: "\n", + }, + getInsertionIndex: () => 0, + }) + + if (edits.length === 0) return null + return applyEdits(content, edits) +} + export function migrateLegacyPluginEntry(configPath: string): boolean { if (!existsSync(configPath)) return false @@ -10,8 +56,15 @@ export function migrateLegacyPluginEntry(configPath: string): boolean { const content = readFileSync(configPath, "utf-8") if (!content.includes(LEGACY_PLUGIN_NAME)) return false - const updated = content.replaceAll(LEGACY_PLUGIN_NAME, PLUGIN_NAME) - if (updated === content) return false + const parseResult = parseJsoncSafe(content) + const pluginEntries = parseResult.data?.plugin + if (!pluginEntries || !pluginEntries.some(isLegacyEntry)) return false + + const updatedPluginEntries = normalizePluginEntries(pluginEntries) + const updated = configPath.endsWith(".jsonc") + ? updateJsoncPluginArray(content, updatedPluginEntries) + : JSON.stringify({ ...(parseResult.data as OpenCodeConfig), plugin: updatedPluginEntries }, null, 2) + "\n" + if (!updated || updated === content) return false writeFileSync(configPath, updated, "utf-8") log("[migrateLegacyPluginEntry] Auto-migrated opencode.json plugin entry", { diff --git a/src/shared/skill-path-resolver.test.ts b/src/shared/skill-path-resolver.test.ts index da29ee863..a9815b7fd 100644 --- a/src/shared/skill-path-resolver.test.ts +++ b/src/shared/skill-path-resolver.test.ts @@ -125,4 +125,28 @@ describe("resolveSkillPathReferences", () => { //#then expect(result).toBe("/skills/frontend/scripts/search.py") }) + + it("does not resolve traversal paths that escape the base directory", () => { + //#given + const content = "Read @data/../../../../etc/passwd before running" + const basePath = "/skills/frontend" + + //#when + const result = resolveSkillPathReferences(content, basePath) + + //#then + expect(result).toBe("Read @data/../../../../etc/passwd before running") + }) + + it("does not resolve directory traversal with trailing slash", () => { + //#given + const content = "Inspect @data/../../../secret/" + const basePath = "/skills/frontend" + + //#when + const result = resolveSkillPathReferences(content, basePath) + + //#then + expect(result).toBe("Inspect @data/../../../secret/") + }) }) diff --git a/src/shared/skill-path-resolver.ts b/src/shared/skill-path-resolver.ts index 72b8f93a5..6d088171d 100644 --- a/src/shared/skill-path-resolver.ts +++ b/src/shared/skill-path-resolver.ts @@ -1,4 +1,4 @@ -import { join } from "path" +import { isAbsolute, relative, resolve, sep } from "node:path" function looksLikeFilePath(path: string): boolean { if (path.endsWith("/")) return true @@ -6,22 +6,21 @@ function looksLikeFilePath(path: string): boolean { return /\.[a-zA-Z0-9]+$/.test(lastSegment) } -/** - * Resolves @path references in skill content to absolute paths. - * - * Matches @references that contain at least one slash (e.g., @scripts/search.py, @data/) - * to avoid false positives with decorators (@param), JSDoc tags (@ts-ignore), etc. - * Also skips npm scoped packages (@scope/package) by requiring a file extension or trailing slash. - * - * Email addresses are excluded since they have alphanumeric characters before @. - */ export function resolveSkillPathReferences(content: string, basePath: string): string { const normalizedBase = basePath.endsWith("/") ? basePath.slice(0, -1) : basePath return content.replace( /(? { if (!looksLikeFilePath(relativePath)) return match - return join(normalizedBase, relativePath) + const resolvedPath = resolve(normalizedBase, relativePath) + const relativePathFromBase = relative(normalizedBase, resolvedPath) + if (relativePathFromBase.startsWith("..") || isAbsolute(relativePathFromBase)) { + return match + } + if (relativePath.endsWith("/") && !resolvedPath.endsWith(sep)) { + return `${resolvedPath}/` + } + return resolvedPath } ) } diff --git a/src/tools/delegate-task/model-selection.test.ts b/src/tools/delegate-task/model-selection.test.ts index 70b71a2b9..3bc7c2c88 100644 --- a/src/tools/delegate-task/model-selection.test.ts +++ b/src/tools/delegate-task/model-selection.test.ts @@ -76,7 +76,9 @@ describe("resolveModelForDelegateTask", () => { }) describe("#when availableModels is empty (cache exists but empty)", () => { - test("#then falls through to category default model (existing behavior)", () => { + test("#then keeps the category default when its provider is connected", () => { + const readConnectedProvidersSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["anthropic"]) + const result = resolveModelForDelegateTask({ categoryDefaultModel: "anthropic/claude-sonnet-4-6", fallbackChain: [ @@ -87,6 +89,40 @@ describe("resolveModelForDelegateTask", () => { }) expect(result).toEqual({ model: "anthropic/claude-sonnet-4-6" }) + readConnectedProvidersSpy.mockRestore() + }) + + test("#then skips a disconnected category default and resolves via a connected fallback", () => { + const readConnectedProvidersSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"]) + + const result = resolveModelForDelegateTask({ + categoryDefaultModel: "anthropic/claude-sonnet-4-6", + fallbackChain: [ + { providers: ["openai"], model: "gpt-5.4", variant: "high" }, + ], + availableModels: new Set(), + systemDefaultModel: "anthropic/claude-sonnet-4-6", + }) + + expect(result).toEqual({ + model: "openai/gpt-5.4", + variant: "high", + fallbackEntry: { providers: ["openai"], model: "gpt-5.4", variant: "high" }, + matchedFallback: true, + }) + readConnectedProvidersSpy.mockRestore() + }) + + test("#then skips disconnected user fallback models and keeps the first connected fallback", () => { + const readConnectedProvidersSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"]) + + const result = resolveModelForDelegateTask({ + userFallbackModels: ["anthropic/claude-sonnet-4-6", "openai/gpt-5.4"], + availableModels: new Set(), + }) + + expect(result).toEqual({ model: "openai/gpt-5.4", matchedFallback: true }) + readConnectedProvidersSpy.mockRestore() }) }) @@ -225,16 +261,23 @@ describe("resolveModelForDelegateTask", () => { }) describe("#when availableModels is empty", () => { - test("#then falls through to existing resolution (cache partially ready)", () => { + test("#then uses connected providers to avoid disconnected category defaults", () => { + const readConnectedProvidersSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"]) + const result = resolveModelForDelegateTask({ categoryDefaultModel: "anthropic/claude-sonnet-4-6", fallbackChain: [ - { providers: ["anthropic"], model: "claude-sonnet-4-6" }, + { providers: ["openai"], model: "gpt-5.4" }, ], availableModels: new Set(), }) - expect(result).toBeDefined() + expect(result).toEqual({ + model: "openai/gpt-5.4", + fallbackEntry: { providers: ["openai"], model: "gpt-5.4" }, + matchedFallback: true, + }) + readConnectedProvidersSpy.mockRestore() }) }) }) diff --git a/src/tools/delegate-task/model-selection.ts b/src/tools/delegate-task/model-selection.ts index d3e33a142..1e7ce2c4e 100644 --- a/src/tools/delegate-task/model-selection.ts +++ b/src/tools/delegate-task/model-selection.ts @@ -59,6 +59,8 @@ export function resolveModelForDelegateTask(input: { return { model: userModel } } + const connectedProviders = input.availableModels.size === 0 ? readConnectedProvidersCache() : null + // Before provider cache is created (first run), skip model resolution entirely. // OpenCode will use its system default model when no model is specified in the prompt. if (input.availableModels.size === 0 && !hasProviderModelsCache() && !hasConnectedProvidersCache()) { @@ -77,7 +79,15 @@ export function resolveModelForDelegateTask(input: { } if (input.availableModels.size === 0) { - return { model: categoryDefault } + const categoryProvider = categoryDefault.includes("/") ? categoryDefault.split("/")[0] : undefined + if (!connectedProviders || !categoryProvider || connectedProviders.includes(categoryProvider)) { + return { model: categoryDefault } + } + + log("[resolveModelForDelegateTask] skipping disconnected category default on cold cache", { + categoryDefault, + connectedProviders, + }) } const parts = categoryDefault.split("/") @@ -95,9 +105,19 @@ export function resolveModelForDelegateTask(input: { const userFallbackModels = input.userFallbackModels if (userFallbackModels && userFallbackModels.length > 0) { if (input.availableModels.size === 0) { - const first = userFallbackModels[0] ? parseUserFallbackModel(userFallbackModels[0]) : undefined - if (first) { - return { model: first.baseModel, variant: first.variant, matchedFallback: true } + for (const fallbackModel of userFallbackModels) { + const parsedFallback = parseUserFallbackModel(fallbackModel) + if (!parsedFallback) continue + + if ( + connectedProviders && + parsedFallback.providerHint && + !parsedFallback.providerHint.some((provider) => connectedProviders.includes(provider)) + ) { + continue + } + + return { model: parsedFallback.baseModel, variant: parsedFallback.variant, matchedFallback: true } } } else { for (const fallbackModel of userFallbackModels) { @@ -115,7 +135,6 @@ export function resolveModelForDelegateTask(input: { const fallbackChain = input.fallbackChain if (fallbackChain && fallbackChain.length > 0) { if (input.availableModels.size === 0) { - const connectedProviders = readConnectedProvidersCache() if (connectedProviders) { const connectedSet = new Set(connectedProviders) for (const entry of fallbackChain) { diff --git a/src/tools/delegate-task/tools.ts b/src/tools/delegate-task/tools.ts index a1d69d228..f722f9296 100644 --- a/src/tools/delegate-task/tools.ts +++ b/src/tools/delegate-task/tools.ts @@ -76,13 +76,13 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini - category: For task delegation (uses Sisyphus-Junior with category-optimized model) - subagent_type: For direct agent invocation (explore, librarian, oracle, etc.) - **DO NOT provide both.** If category is provided, subagent_type is ignored. + **DO NOT provide both.** category and subagent_type are mutually exclusive. - load_skills: ALWAYS REQUIRED. Pass [] if no skills needed, or ["skill-1", "skill-2"] for category tasks. - category: Use predefined category → Spawns Sisyphus-Junior with category config Available categories: ${categoryList} - - subagent_type: Use specific agent directly (explore, librarian, oracle, metis, momus) + - subagent_type: Use a specific callable non-primary agent directly (for example: explore, librarian, oracle, metis, momus) - run_in_background: REQUIRED. true=async (returns task_id), false=sync (waits). Use background=true ONLY for parallel exploration with 5+ independent queries. - session_id: Existing Task session to continue (from previous task output). Continues agent with FULL CONTEXT PRESERVED - saves tokens, maintains continuity. - command: The command that triggered this task (optional, for slash command tracking). @@ -102,7 +102,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini prompt: tool.schema.string().describe("Full detailed prompt for the agent"), run_in_background: tool.schema.boolean().describe("REQUIRED. true=async (returns task_id), false=sync (waits). Use false for task delegation, true ONLY for parallel exploration."), category: tool.schema.string().optional().describe(`REQUIRED if subagent_type not provided. Do NOT provide both category and subagent_type.`), - subagent_type: tool.schema.string().optional().describe("REQUIRED if category not provided. Do NOT provide both category and subagent_type. Valid values: explore, librarian, oracle, metis, momus"), + subagent_type: tool.schema.string().optional().describe("REQUIRED if category not provided. Do NOT provide both category and subagent_type. Must be a callable non-primary agent name returned by app.agents()."), session_id: tool.schema.string().optional().describe("Existing Task session to continue"), command: tool.schema.string().optional().describe("The command that triggered this task"), }, @@ -115,7 +115,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini ` - You provided: category="${args.category}", subagent_type="${args.subagent_type}"\n` + ` - Use category for task delegation (e.g., category="${categoryExamples.split(", ")[0]}")\n` + ` - Use subagent_type for direct agent invocation (e.g., subagent_type="explore")\n` + - ` - Valid subagent_type values: explore, librarian, oracle, metis, momus` + ` - subagent_type must be a callable non-primary agent name returned by app.agents()` ) } if (args.category) { diff --git a/src/tools/skill/tools.test.ts b/src/tools/skill/tools.test.ts index 281e073d0..871a58382 100644 --- a/src/tools/skill/tools.test.ts +++ b/src/tools/skill/tools.test.ts @@ -616,6 +616,33 @@ describe("skill tool - browserProvider forwarding", () => { }) describe("skill tool - nativeSkills integration", () => { + it("includes native skills in the description even when skills are pre-seeded", async () => { + //#given + const tool = createSkillTool({ + skills: [createMockSkill("seeded-skill")], + nativeSkills: { + async all() { + return [{ + name: "native-visible-skill", + description: "Native skill exposed from config", + location: "/external/skills/native-visible-skill/SKILL.md", + content: "Native visible skill body", + }] + }, + async get() { return undefined }, + async dirs() { return [] }, + }, + }) + + //#when + expect(tool.description).toContain("seeded-skill") + await tool.execute({ name: "native-visible-skill" }, mockContext) + + //#then + expect(tool.description).toContain("seeded-skill") + expect(tool.description).toContain("native-visible-skill") + }) + it("merges native skills exposed by PluginInput.skills.all()", async () => { //#given const tool = createSkillTool({ @@ -639,6 +666,6 @@ describe("skill tool - nativeSkills integration", () => { //#then expect(result).toContain("external-plugin-skill") - expect(result).toContain("Test skill body content") + expect(result).toContain("External plugin skill body") }) }) diff --git a/src/tools/skill/tools.ts b/src/tools/skill/tools.ts index 19d146c8d..2274ca17d 100644 --- a/src/tools/skill/tools.ts +++ b/src/tools/skill/tools.ts @@ -105,6 +105,11 @@ async function extractSkillBody(skill: LoadedSkill): Promise { return templateMatch ? templateMatch[1].trim() : fullTemplate } + if (skill.scope === "config" && skill.definition.template) { + const templateMatch = skill.definition.template.match(/([\s\S]*?)<\/skill-instruction>/) + return templateMatch ? templateMatch[1].trim() : skill.definition.template + } + if (skill.path) { return extractSkillTemplate(skill) } @@ -235,11 +240,13 @@ export function createSkillTool(options: SkillLoadOptions = {}): ToolDefinition return cachedDescription } - // Eagerly build description when callers pre-provide skills/commands. if (options.skills !== undefined) { const skillInfos = options.skills.map(loadedSkillToInfo) const commandsForDescription = options.commands ?? [] cachedDescription = formatCombinedDescription(skillInfos, commandsForDescription) + if (options.nativeSkills) { + void buildDescription() + } } else if (options.commands !== undefined) { cachedDescription = formatCombinedDescription([], options.commands) } else { @@ -248,6 +255,9 @@ export function createSkillTool(options: SkillLoadOptions = {}): ToolDefinition return tool({ get description() { + if (cachedDescription === null) { + void buildDescription() + } return cachedDescription ?? TOOL_DESCRIPTION_PREFIX }, args: { @@ -259,8 +269,8 @@ export function createSkillTool(options: SkillLoadOptions = {}): ToolDefinition }, async execute(args: SkillArgs, ctx?: { agent?: string }) { const skills = await getSkills() - cachedDescription = null const commands = getCommands() + cachedDescription = formatCombinedDescription(skills.map(loadedSkillToInfo), commands) const requestedName = args.name.replace(/^\//, "")