diff --git a/src/hooks/context-window-monitor.model-context-limits.test.ts b/src/hooks/context-window-monitor.model-context-limits.test.ts
new file mode 100644
index 000000000..531b87963
--- /dev/null
+++ b/src/hooks/context-window-monitor.model-context-limits.test.ts
@@ -0,0 +1,93 @@
+///
+
+import { describe, expect, it } from "bun:test"
+import { createContextWindowMonitorHook } from "./context-window-monitor"
+
+function createOutput() {
+ return { title: "", output: "original", metadata: null }
+}
+
+describe("context-window-monitor modelContextLimitsCache", () => {
+ it("does not append reminder below cached non-anthropic threshold", async () => {
+ // given
+ const modelContextLimitsCache = new Map()
+ modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+ const hook = createContextWindowMonitorHook({} as never, {
+ anthropicContext1MEnabled: false,
+ modelContextLimitsCache,
+ })
+ const sessionID = "ses_non_anthropic_below_threshold"
+
+ await hook.event({
+ event: {
+ type: "message.updated",
+ properties: {
+ info: {
+ role: "assistant",
+ sessionID,
+ providerID: "opencode",
+ modelID: "kimi-k2.5-free",
+ finish: true,
+ tokens: {
+ input: 150000,
+ output: 0,
+ reasoning: 0,
+ cache: { read: 10000, write: 0 },
+ },
+ },
+ },
+ },
+ })
+
+ // when
+ const output = createOutput()
+ await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
+
+ // then
+ expect(output.output).toBe("original")
+ })
+
+ it("appends reminder above cached non-anthropic threshold", async () => {
+ // given
+ const modelContextLimitsCache = new Map()
+ modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+ const hook = createContextWindowMonitorHook({} as never, {
+ anthropicContext1MEnabled: false,
+ modelContextLimitsCache,
+ })
+ const sessionID = "ses_non_anthropic_above_threshold"
+
+ await hook.event({
+ event: {
+ type: "message.updated",
+ properties: {
+ info: {
+ role: "assistant",
+ sessionID,
+ providerID: "opencode",
+ modelID: "kimi-k2.5-free",
+ finish: true,
+ tokens: {
+ input: 180000,
+ output: 0,
+ reasoning: 0,
+ cache: { read: 10000, write: 0 },
+ },
+ },
+ },
+ },
+ })
+
+ // when
+ const output = createOutput()
+ await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
+
+ // then
+ expect(output.output).toContain("context remaining")
+ expect(output.output).toContain("262,144-token context window")
+ expect(output.output).toContain("[Context Status: 72.5% used (190,000/262,144 tokens), 27.5% remaining]")
+ expect(output.output).not.toContain("1,000,000")
+ })
+})
diff --git a/src/hooks/context-window-monitor.ts b/src/hooks/context-window-monitor.ts
index 399c0810c..ec5d93061 100644
--- a/src/hooks/context-window-monitor.ts
+++ b/src/hooks/context-window-monitor.ts
@@ -1,12 +1,12 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive"
-const ANTHROPIC_DISPLAY_LIMIT = 1_000_000
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
const CONTEXT_WARNING_THRESHOLD = 0.70
type ModelCacheStateLike = {
anthropicContext1MEnabled: boolean
+ modelContextLimitsCache?: Map
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,11 +17,15 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT
}
-const CONTEXT_REMINDER = `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
+function createContextReminder(actualLimit: number): string {
+ const limitTokens = actualLimit.toLocaleString()
-You are using Anthropic Claude with 1M context window.
-You have plenty of context remaining - do NOT rush or skip tasks.
+ return `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
+
+You are using a ${limitTokens}-token context window.
+You still have context remaining - do NOT rush or skip tasks.
Complete your work thoroughly and methodically.`
+}
interface TokenInfo {
input: number
@@ -32,6 +36,7 @@ interface TokenInfo {
interface CachedTokenState {
providerID: string
+ modelID: string
tokens: TokenInfo
}
@@ -57,25 +62,30 @@ export function createContextWindowMonitorHook(
const cached = tokenCache.get(sessionID)
if (!cached) return
- if (!isAnthropicProvider(cached.providerID)) return
+ const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(
+ `${cached.providerID}/${cached.modelID}`
+ )
+ const actualLimit =
+ cachedLimit ??
+ (isAnthropicProvider(cached.providerID) ? getAnthropicActualLimit(modelCacheState) : null)
+
+ if (!actualLimit) return
const lastTokens = cached.tokens
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
- const actualUsagePercentage =
- totalInputTokens / getAnthropicActualLimit(modelCacheState)
+ const actualUsagePercentage = totalInputTokens / actualLimit
if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return
remindedSessions.add(sessionID)
- const displayUsagePercentage = totalInputTokens / ANTHROPIC_DISPLAY_LIMIT
- const usedPct = (displayUsagePercentage * 100).toFixed(1)
- const remainingPct = ((1 - displayUsagePercentage) * 100).toFixed(1)
+ const usedPct = (actualUsagePercentage * 100).toFixed(1)
+ const remainingPct = ((1 - actualUsagePercentage) * 100).toFixed(1)
const usedTokens = totalInputTokens.toLocaleString()
- const limitTokens = ANTHROPIC_DISPLAY_LIMIT.toLocaleString()
+ const limitTokens = actualLimit.toLocaleString()
- output.output += `\n\n${CONTEXT_REMINDER}
+ output.output += `\n\n${createContextReminder(actualLimit)}
[Context Status: ${usedPct}% used (${usedTokens}/${limitTokens} tokens), ${remainingPct}% remaining]`
}
@@ -95,6 +105,7 @@ export function createContextWindowMonitorHook(
role?: string
sessionID?: string
providerID?: string
+ modelID?: string
finish?: boolean
tokens?: TokenInfo
} | undefined
@@ -104,6 +115,7 @@ export function createContextWindowMonitorHook(
tokenCache.set(info.sessionID, {
providerID: info.providerID,
+ modelID: info.modelID ?? "",
tokens: info.tokens,
})
}
diff --git a/src/hooks/tool-output-truncator.test.ts b/src/hooks/tool-output-truncator.test.ts
index e38a1c70e..d1d1d573e 100644
--- a/src/hooks/tool-output-truncator.test.ts
+++ b/src/hooks/tool-output-truncator.test.ts
@@ -19,6 +19,20 @@ describe("createToolOutputTruncatorHook", () => {
hook = createToolOutputTruncatorHook({} as never)
})
+ it("passes modelContextLimitsCache through to createDynamicTruncator", () => {
+ const ctx = {} as never
+ const modelContextLimitsCache = new Map()
+ const modelCacheState = {
+ anthropicContext1MEnabled: false,
+ modelContextLimitsCache,
+ }
+
+ truncateSpy.mockClear()
+ createToolOutputTruncatorHook(ctx, { modelCacheState })
+
+ expect(truncateSpy).toHaveBeenLastCalledWith(ctx, modelCacheState)
+ })
+
describe("tool.execute.after", () => {
const createInput = (tool: string) => ({
tool,
diff --git a/src/hooks/tool-output-truncator.ts b/src/hooks/tool-output-truncator.ts
index f47bf199b..c62ab23b5 100644
--- a/src/hooks/tool-output-truncator.ts
+++ b/src/hooks/tool-output-truncator.ts
@@ -27,7 +27,10 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record = {
}
interface ToolOutputTruncatorOptions {
- modelCacheState?: { anthropicContext1MEnabled: boolean }
+ modelCacheState?: {
+ anthropicContext1MEnabled: boolean
+ modelContextLimitsCache?: Map
+ }
experimental?: ExperimentalConfig
}
diff --git a/src/shared/dynamic-truncator.test.ts b/src/shared/dynamic-truncator.test.ts
index 0a91d7096..a468b8a42 100644
--- a/src/shared/dynamic-truncator.test.ts
+++ b/src/shared/dynamic-truncator.test.ts
@@ -24,7 +24,10 @@ function resetContextLimitEnv(): void {
}
}
-function createContextUsageMockContext(inputTokens: number) {
+function createContextUsageMockContext(
+ inputTokens: number,
+ options?: { providerID?: string; modelID?: string; cacheRead?: number }
+) {
return {
client: {
session: {
@@ -33,11 +36,13 @@ function createContextUsageMockContext(inputTokens: number) {
{
info: {
role: "assistant",
+ providerID: options?.providerID ?? "anthropic",
+ modelID: options?.modelID,
tokens: {
input: inputTokens,
output: 0,
reasoning: 0,
- cache: { read: 0, write: 0 },
+ cache: { read: options?.cacheRead ?? 0, write: 0 },
},
},
},
@@ -99,4 +104,24 @@ describe("getContextWindowUsage", () => {
expect(usage?.usagePercentage).toBe(0.3)
expect(usage?.remainingTokens).toBe(700000)
})
+
+ it("uses model-specific limit for non-anthropic providers when cached", async () => {
+ // given
+ const modelContextLimitsCache = new Map()
+ modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+ const ctx = createContextUsageMockContext(180000, {
+ providerID: "opencode",
+ modelID: "kimi-k2.5-free",
+ })
+
+ // when
+ const usage = await getContextWindowUsage(ctx as never, "ses_model_limit", {
+ anthropicContext1MEnabled: false,
+ modelContextLimitsCache,
+ })
+
+ // then
+ expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
+ expect(usage?.remainingTokens).toBe(82144)
+ })
})
diff --git a/src/shared/dynamic-truncator.ts b/src/shared/dynamic-truncator.ts
index 5236f3e76..1de2321fd 100644
--- a/src/shared/dynamic-truncator.ts
+++ b/src/shared/dynamic-truncator.ts
@@ -7,6 +7,7 @@ const DEFAULT_TARGET_MAX_TOKENS = 50_000;
type ModelCacheStateLike = {
anthropicContext1MEnabled: boolean;
+ modelContextLimitsCache?: Map;
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,8 +18,14 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT;
}
+function isAnthropicProvider(providerID: string): boolean {
+ return providerID === "anthropic" || providerID === "google-vertex-anthropic";
+}
+
interface AssistantMessageInfo {
role: "assistant";
+ providerID?: string;
+ modelID?: string;
tokens: {
input: number;
output: number;
@@ -136,20 +143,35 @@ export async function getContextWindowUsage(
.map((m) => m.info as AssistantMessageInfo);
if (assistantMessages.length === 0) return null;
-
+
const lastAssistant = assistantMessages[assistantMessages.length - 1];
- const lastTokens = lastAssistant.tokens;
+ const lastTokens = lastAssistant?.tokens;
+ if (!lastAssistant || !lastTokens) return null;
+
+ const cachedLimit =
+ lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
+ ? modelCacheState?.modelContextLimitsCache?.get(
+ `${lastAssistant.providerID}/${lastAssistant.modelID}`,
+ )
+ : undefined;
+ const actualLimit =
+ cachedLimit ??
+ (lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
+ ? getAnthropicActualLimit(modelCacheState)
+ : null);
+
+ if (!actualLimit) return null;
+
const usedTokens =
(lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0);
- const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
- const remainingTokens = anthropicActualLimit - usedTokens;
+ const remainingTokens = actualLimit - usedTokens;
return {
usedTokens,
remainingTokens,
- usagePercentage: usedTokens / anthropicActualLimit,
+ usagePercentage: usedTokens / actualLimit,
};
} catch {
return null;