Merge pull request #2366 from code-yeongyu/fix/issue-2338

fix: honor model-specific context limits for non-Anthropic models
2026-03-11 20:06:44 +09:00
parent 836ce97f07 7de80e6717
commit 3a980c53e6
6 changed files with 189 additions and 20 deletions
--- a/src/hooks/context-window-monitor.model-context-limits.test.ts
+++ b/src/hooks/context-window-monitor.model-context-limits.test.ts
@@ -0,0 +1,93 @@
+/// <reference types="bun-types" />
+
+import { describe, expect, it } from "bun:test"
+import { createContextWindowMonitorHook } from "./context-window-monitor"
+
+function createOutput() {
+  return { title: "", output: "original", metadata: null }
+}
+
+describe("context-window-monitor modelContextLimitsCache", () => {
+  it("does not append reminder below cached non-anthropic threshold", async () => {
+    // given
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+    const hook = createContextWindowMonitorHook({} as never, {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+    const sessionID = "ses_non_anthropic_below_threshold"
+
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "opencode",
+            modelID: "kimi-k2.5-free",
+            finish: true,
+            tokens: {
+              input: 150000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    // when
+    const output = createOutput()
+    await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
+
+    // then
+    expect(output.output).toBe("original")
+  })
+
+  it("appends reminder above cached non-anthropic threshold", async () => {
+    // given
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+    const hook = createContextWindowMonitorHook({} as never, {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+    const sessionID = "ses_non_anthropic_above_threshold"
+
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "opencode",
+            modelID: "kimi-k2.5-free",
+            finish: true,
+            tokens: {
+              input: 180000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    // when
+    const output = createOutput()
+    await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
+
+    // then
+    expect(output.output).toContain("context remaining")
+    expect(output.output).toContain("262,144-token context window")
+    expect(output.output).toContain("[Context Status: 72.5% used (190,000/262,144 tokens), 27.5% remaining]")
+    expect(output.output).not.toContain("1,000,000")
+  })
+})
--- a/src/hooks/context-window-monitor.ts
+++ b/src/hooks/context-window-monitor.ts
@@ -1,12 +1,12 @@
 import type { PluginInput } from "@opencode-ai/plugin"
 import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive"

-const ANTHROPIC_DISPLAY_LIMIT = 1_000_000
 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
 const CONTEXT_WARNING_THRESHOLD = 0.70

 type ModelCacheStateLike = {
  anthropicContext1MEnabled: boolean
+  modelContextLimitsCache?: Map<string, number>
 }

 function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,11 +17,15 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
    : DEFAULT_ANTHROPIC_ACTUAL_LIMIT
 }

-const CONTEXT_REMINDER = `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
+function createContextReminder(actualLimit: number): string {
+  const limitTokens = actualLimit.toLocaleString()

-You are using Anthropic Claude with 1M context window.
-You have plenty of context remaining - do NOT rush or skip tasks.
+  return `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
+
+You are using a ${limitTokens}-token context window.
+You still have context remaining - do NOT rush or skip tasks.
 Complete your work thoroughly and methodically.`
+}

 interface TokenInfo {
  input: number
@@ -32,6 +36,7 @@ interface TokenInfo {

 interface CachedTokenState {
  providerID: string
+  modelID: string
  tokens: TokenInfo
 }

@@ -57,25 +62,30 @@ export function createContextWindowMonitorHook(
    const cached = tokenCache.get(sessionID)
    if (!cached) return

-    if (!isAnthropicProvider(cached.providerID)) return
+    const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(
+      `${cached.providerID}/${cached.modelID}`
+    )
+    const actualLimit =
+      cachedLimit ??
+      (isAnthropicProvider(cached.providerID) ? getAnthropicActualLimit(modelCacheState) : null)
+
+    if (!actualLimit) return

    const lastTokens = cached.tokens
    const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)

-    const actualUsagePercentage =
-      totalInputTokens / getAnthropicActualLimit(modelCacheState)
+    const actualUsagePercentage = totalInputTokens / actualLimit

    if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return

    remindedSessions.add(sessionID)

-    const displayUsagePercentage = totalInputTokens / ANTHROPIC_DISPLAY_LIMIT
-    const usedPct = (displayUsagePercentage * 100).toFixed(1)
-    const remainingPct = ((1 - displayUsagePercentage) * 100).toFixed(1)
+    const usedPct = (actualUsagePercentage * 100).toFixed(1)
+    const remainingPct = ((1 - actualUsagePercentage) * 100).toFixed(1)
    const usedTokens = totalInputTokens.toLocaleString()
-    const limitTokens = ANTHROPIC_DISPLAY_LIMIT.toLocaleString()
+    const limitTokens = actualLimit.toLocaleString()

-    output.output += `\n\n${CONTEXT_REMINDER}
+    output.output += `\n\n${createContextReminder(actualLimit)}
 [Context Status: ${usedPct}% used (${usedTokens}/${limitTokens} tokens), ${remainingPct}% remaining]`
  }

@@ -95,6 +105,7 @@ export function createContextWindowMonitorHook(
        role?: string
        sessionID?: string
        providerID?: string
+        modelID?: string
        finish?: boolean
        tokens?: TokenInfo
      } | undefined
@@ -104,6 +115,7 @@ export function createContextWindowMonitorHook(

      tokenCache.set(info.sessionID, {
        providerID: info.providerID,
+        modelID: info.modelID ?? "",
        tokens: info.tokens,
      })
    }
--- a/src/hooks/tool-output-truncator.test.ts
+++ b/src/hooks/tool-output-truncator.test.ts
@@ -19,6 +19,20 @@ describe("createToolOutputTruncatorHook", () => {
    hook = createToolOutputTruncatorHook({} as never)
  })

+  it("passes modelContextLimitsCache through to createDynamicTruncator", () => {
+    const ctx = {} as never
+    const modelContextLimitsCache = new Map<string, number>()
+    const modelCacheState = {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    }
+
+    truncateSpy.mockClear()
+    createToolOutputTruncatorHook(ctx, { modelCacheState })
+
+    expect(truncateSpy).toHaveBeenLastCalledWith(ctx, modelCacheState)
+  })
+
  describe("tool.execute.after", () => {
    const createInput = (tool: string) => ({
      tool,
--- a/src/hooks/tool-output-truncator.ts
+++ b/src/hooks/tool-output-truncator.ts
@@ -27,7 +27,10 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
 }

 interface ToolOutputTruncatorOptions {
-  modelCacheState?: { anthropicContext1MEnabled: boolean }
+  modelCacheState?: {
+    anthropicContext1MEnabled: boolean
+    modelContextLimitsCache?: Map<string, number>
+  }
  experimental?: ExperimentalConfig
 }

--- a/src/shared/dynamic-truncator.test.ts
+++ b/src/shared/dynamic-truncator.test.ts
@@ -24,7 +24,10 @@ function resetContextLimitEnv(): void {
  }
 }

-function createContextUsageMockContext(inputTokens: number) {
+function createContextUsageMockContext(
+  inputTokens: number,
+  options?: { providerID?: string; modelID?: string; cacheRead?: number }
+) {
  return {
    client: {
      session: {
@@ -33,11 +36,13 @@ function createContextUsageMockContext(inputTokens: number) {
            {
              info: {
                role: "assistant",
+                providerID: options?.providerID ?? "anthropic",
+                modelID: options?.modelID,
                tokens: {
                  input: inputTokens,
                  output: 0,
                  reasoning: 0,
-                  cache: { read: 0, write: 0 },
+                  cache: { read: options?.cacheRead ?? 0, write: 0 },
                },
              },
            },
@@ -99,4 +104,24 @@ describe("getContextWindowUsage", () => {
    expect(usage?.usagePercentage).toBe(0.3)
    expect(usage?.remainingTokens).toBe(700000)
  })
+
+  it("uses model-specific limit for non-anthropic providers when cached", async () => {
+    // given
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+    const ctx = createContextUsageMockContext(180000, {
+      providerID: "opencode",
+      modelID: "kimi-k2.5-free",
+    })
+
+    // when
+    const usage = await getContextWindowUsage(ctx as never, "ses_model_limit", {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+
+    // then
+    expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
+    expect(usage?.remainingTokens).toBe(82144)
+  })
 })
--- a/src/shared/dynamic-truncator.ts
+++ b/src/shared/dynamic-truncator.ts
@@ -7,6 +7,7 @@ const DEFAULT_TARGET_MAX_TOKENS = 50_000;

 type ModelCacheStateLike = {
 	anthropicContext1MEnabled: boolean;
+	modelContextLimitsCache?: Map<string, number>;
 }

 function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,8 +18,14 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
 		: DEFAULT_ANTHROPIC_ACTUAL_LIMIT;
 }

+function isAnthropicProvider(providerID: string): boolean {
+	return providerID === "anthropic" || providerID === "google-vertex-anthropic";
+}
+
 interface AssistantMessageInfo {
 	role: "assistant";
+	providerID?: string;
+	modelID?: string;
 	tokens: {
 		input: number;
 		output: number;
@@ -136,20 +143,35 @@ export async function getContextWindowUsage(
 			.map((m) => m.info as AssistantMessageInfo);

 		if (assistantMessages.length === 0) return null;
-
+		
 		const lastAssistant = assistantMessages[assistantMessages.length - 1];
-		const lastTokens = lastAssistant.tokens;
+		const lastTokens = lastAssistant?.tokens;
+		if (!lastAssistant || !lastTokens) return null;
+
+		const cachedLimit =
+			lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
+				? modelCacheState?.modelContextLimitsCache?.get(
+					`${lastAssistant.providerID}/${lastAssistant.modelID}`,
+				)
+				: undefined;
+		const actualLimit =
+			cachedLimit ??
+			(lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
+				? getAnthropicActualLimit(modelCacheState)
+				: null);
+
+		if (!actualLimit) return null;
+
 		const usedTokens =
 			(lastTokens?.input ?? 0) +
 			(lastTokens?.cache?.read ?? 0) +
 			(lastTokens?.output ?? 0);
-		const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
-		const remainingTokens = anthropicActualLimit - usedTokens;
+		const remainingTokens = actualLimit - usedTokens;

 		return {
 			usedTokens,
 			remainingTokens,
-			usagePercentage: usedTokens / anthropicActualLimit,
+			usagePercentage: usedTokens / actualLimit,
 		};
 	} catch {
 		return null;