diff --git a/src/hooks/context-window-monitor.model-context-limits.test.ts b/src/hooks/context-window-monitor.model-context-limits.test.ts new file mode 100644 index 000000000..531b87963 --- /dev/null +++ b/src/hooks/context-window-monitor.model-context-limits.test.ts @@ -0,0 +1,93 @@ +/// + +import { describe, expect, it } from "bun:test" +import { createContextWindowMonitorHook } from "./context-window-monitor" + +function createOutput() { + return { title: "", output: "original", metadata: null } +} + +describe("context-window-monitor modelContextLimitsCache", () => { + it("does not append reminder below cached non-anthropic threshold", async () => { + // given + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144) + + const hook = createContextWindowMonitorHook({} as never, { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + const sessionID = "ses_non_anthropic_below_threshold" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "opencode", + modelID: "kimi-k2.5-free", + finish: true, + tokens: { + input: 150000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + // when + const output = createOutput() + await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output) + + // then + expect(output.output).toBe("original") + }) + + it("appends reminder above cached non-anthropic threshold", async () => { + // given + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144) + + const hook = createContextWindowMonitorHook({} as never, { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + const sessionID = "ses_non_anthropic_above_threshold" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "opencode", + modelID: "kimi-k2.5-free", + finish: true, + tokens: { + input: 180000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + // when + const output = createOutput() + await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output) + + // then + expect(output.output).toContain("context remaining") + expect(output.output).toContain("262,144-token context window") + expect(output.output).toContain("[Context Status: 72.5% used (190,000/262,144 tokens), 27.5% remaining]") + expect(output.output).not.toContain("1,000,000") + }) +}) diff --git a/src/hooks/context-window-monitor.ts b/src/hooks/context-window-monitor.ts index 399c0810c..ec5d93061 100644 --- a/src/hooks/context-window-monitor.ts +++ b/src/hooks/context-window-monitor.ts @@ -1,12 +1,12 @@ import type { PluginInput } from "@opencode-ai/plugin" import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive" -const ANTHROPIC_DISPLAY_LIMIT = 1_000_000 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000 const CONTEXT_WARNING_THRESHOLD = 0.70 type ModelCacheStateLike = { anthropicContext1MEnabled: boolean + modelContextLimitsCache?: Map } function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { @@ -17,11 +17,15 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number : DEFAULT_ANTHROPIC_ACTUAL_LIMIT } -const CONTEXT_REMINDER = `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)} +function createContextReminder(actualLimit: number): string { + const limitTokens = actualLimit.toLocaleString() -You are using Anthropic Claude with 1M context window. -You have plenty of context remaining - do NOT rush or skip tasks. + return `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)} + +You are using a ${limitTokens}-token context window. +You still have context remaining - do NOT rush or skip tasks. Complete your work thoroughly and methodically.` +} interface TokenInfo { input: number @@ -32,6 +36,7 @@ interface TokenInfo { interface CachedTokenState { providerID: string + modelID: string tokens: TokenInfo } @@ -57,25 +62,30 @@ export function createContextWindowMonitorHook( const cached = tokenCache.get(sessionID) if (!cached) return - if (!isAnthropicProvider(cached.providerID)) return + const cachedLimit = modelCacheState?.modelContextLimitsCache?.get( + `${cached.providerID}/${cached.modelID}` + ) + const actualLimit = + cachedLimit ?? + (isAnthropicProvider(cached.providerID) ? getAnthropicActualLimit(modelCacheState) : null) + + if (!actualLimit) return const lastTokens = cached.tokens const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) - const actualUsagePercentage = - totalInputTokens / getAnthropicActualLimit(modelCacheState) + const actualUsagePercentage = totalInputTokens / actualLimit if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return remindedSessions.add(sessionID) - const displayUsagePercentage = totalInputTokens / ANTHROPIC_DISPLAY_LIMIT - const usedPct = (displayUsagePercentage * 100).toFixed(1) - const remainingPct = ((1 - displayUsagePercentage) * 100).toFixed(1) + const usedPct = (actualUsagePercentage * 100).toFixed(1) + const remainingPct = ((1 - actualUsagePercentage) * 100).toFixed(1) const usedTokens = totalInputTokens.toLocaleString() - const limitTokens = ANTHROPIC_DISPLAY_LIMIT.toLocaleString() + const limitTokens = actualLimit.toLocaleString() - output.output += `\n\n${CONTEXT_REMINDER} + output.output += `\n\n${createContextReminder(actualLimit)} [Context Status: ${usedPct}% used (${usedTokens}/${limitTokens} tokens), ${remainingPct}% remaining]` } @@ -95,6 +105,7 @@ export function createContextWindowMonitorHook( role?: string sessionID?: string providerID?: string + modelID?: string finish?: boolean tokens?: TokenInfo } | undefined @@ -104,6 +115,7 @@ export function createContextWindowMonitorHook( tokenCache.set(info.sessionID, { providerID: info.providerID, + modelID: info.modelID ?? "", tokens: info.tokens, }) } diff --git a/src/hooks/tool-output-truncator.test.ts b/src/hooks/tool-output-truncator.test.ts index e38a1c70e..d1d1d573e 100644 --- a/src/hooks/tool-output-truncator.test.ts +++ b/src/hooks/tool-output-truncator.test.ts @@ -19,6 +19,20 @@ describe("createToolOutputTruncatorHook", () => { hook = createToolOutputTruncatorHook({} as never) }) + it("passes modelContextLimitsCache through to createDynamicTruncator", () => { + const ctx = {} as never + const modelContextLimitsCache = new Map() + const modelCacheState = { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + } + + truncateSpy.mockClear() + createToolOutputTruncatorHook(ctx, { modelCacheState }) + + expect(truncateSpy).toHaveBeenLastCalledWith(ctx, modelCacheState) + }) + describe("tool.execute.after", () => { const createInput = (tool: string) => ({ tool, diff --git a/src/hooks/tool-output-truncator.ts b/src/hooks/tool-output-truncator.ts index f47bf199b..c62ab23b5 100644 --- a/src/hooks/tool-output-truncator.ts +++ b/src/hooks/tool-output-truncator.ts @@ -27,7 +27,10 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record = { } interface ToolOutputTruncatorOptions { - modelCacheState?: { anthropicContext1MEnabled: boolean } + modelCacheState?: { + anthropicContext1MEnabled: boolean + modelContextLimitsCache?: Map + } experimental?: ExperimentalConfig } diff --git a/src/shared/dynamic-truncator.test.ts b/src/shared/dynamic-truncator.test.ts index 0a91d7096..a468b8a42 100644 --- a/src/shared/dynamic-truncator.test.ts +++ b/src/shared/dynamic-truncator.test.ts @@ -24,7 +24,10 @@ function resetContextLimitEnv(): void { } } -function createContextUsageMockContext(inputTokens: number) { +function createContextUsageMockContext( + inputTokens: number, + options?: { providerID?: string; modelID?: string; cacheRead?: number } +) { return { client: { session: { @@ -33,11 +36,13 @@ function createContextUsageMockContext(inputTokens: number) { { info: { role: "assistant", + providerID: options?.providerID ?? "anthropic", + modelID: options?.modelID, tokens: { input: inputTokens, output: 0, reasoning: 0, - cache: { read: 0, write: 0 }, + cache: { read: options?.cacheRead ?? 0, write: 0 }, }, }, }, @@ -99,4 +104,24 @@ describe("getContextWindowUsage", () => { expect(usage?.usagePercentage).toBe(0.3) expect(usage?.remainingTokens).toBe(700000) }) + + it("uses model-specific limit for non-anthropic providers when cached", async () => { + // given + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144) + const ctx = createContextUsageMockContext(180000, { + providerID: "opencode", + modelID: "kimi-k2.5-free", + }) + + // when + const usage = await getContextWindowUsage(ctx as never, "ses_model_limit", { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + + // then + expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144) + expect(usage?.remainingTokens).toBe(82144) + }) }) diff --git a/src/shared/dynamic-truncator.ts b/src/shared/dynamic-truncator.ts index 5236f3e76..1de2321fd 100644 --- a/src/shared/dynamic-truncator.ts +++ b/src/shared/dynamic-truncator.ts @@ -7,6 +7,7 @@ const DEFAULT_TARGET_MAX_TOKENS = 50_000; type ModelCacheStateLike = { anthropicContext1MEnabled: boolean; + modelContextLimitsCache?: Map; } function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { @@ -17,8 +18,14 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number : DEFAULT_ANTHROPIC_ACTUAL_LIMIT; } +function isAnthropicProvider(providerID: string): boolean { + return providerID === "anthropic" || providerID === "google-vertex-anthropic"; +} + interface AssistantMessageInfo { role: "assistant"; + providerID?: string; + modelID?: string; tokens: { input: number; output: number; @@ -136,20 +143,35 @@ export async function getContextWindowUsage( .map((m) => m.info as AssistantMessageInfo); if (assistantMessages.length === 0) return null; - + const lastAssistant = assistantMessages[assistantMessages.length - 1]; - const lastTokens = lastAssistant.tokens; + const lastTokens = lastAssistant?.tokens; + if (!lastAssistant || !lastTokens) return null; + + const cachedLimit = + lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined + ? modelCacheState?.modelContextLimitsCache?.get( + `${lastAssistant.providerID}/${lastAssistant.modelID}`, + ) + : undefined; + const actualLimit = + cachedLimit ?? + (lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID) + ? getAnthropicActualLimit(modelCacheState) + : null); + + if (!actualLimit) return null; + const usedTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) + (lastTokens?.output ?? 0); - const anthropicActualLimit = getAnthropicActualLimit(modelCacheState); - const remainingTokens = anthropicActualLimit - usedTokens; + const remainingTokens = actualLimit - usedTokens; return { usedTokens, remainingTokens, - usagePercentage: usedTokens / anthropicActualLimit, + usagePercentage: usedTokens / actualLimit, }; } catch { return null;