Merge pull request #2366 from code-yeongyu/fix/issue-2338

fix: honor model-specific context limits for non-Anthropic models
This commit is contained in:
YeonGyu-Kim
2026-03-11 20:06:44 +09:00
committed by GitHub
6 changed files with 189 additions and 20 deletions

View File

@@ -0,0 +1,93 @@
/// <reference types="bun-types" />
import { describe, expect, it } from "bun:test"
import { createContextWindowMonitorHook } from "./context-window-monitor"
function createOutput() {
return { title: "", output: "original", metadata: null }
}
describe("context-window-monitor modelContextLimitsCache", () => {
it("does not append reminder below cached non-anthropic threshold", async () => {
// given
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
const hook = createContextWindowMonitorHook({} as never, {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
const sessionID = "ses_non_anthropic_below_threshold"
await hook.event({
event: {
type: "message.updated",
properties: {
info: {
role: "assistant",
sessionID,
providerID: "opencode",
modelID: "kimi-k2.5-free",
finish: true,
tokens: {
input: 150000,
output: 0,
reasoning: 0,
cache: { read: 10000, write: 0 },
},
},
},
},
})
// when
const output = createOutput()
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
// then
expect(output.output).toBe("original")
})
it("appends reminder above cached non-anthropic threshold", async () => {
// given
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
const hook = createContextWindowMonitorHook({} as never, {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
const sessionID = "ses_non_anthropic_above_threshold"
await hook.event({
event: {
type: "message.updated",
properties: {
info: {
role: "assistant",
sessionID,
providerID: "opencode",
modelID: "kimi-k2.5-free",
finish: true,
tokens: {
input: 180000,
output: 0,
reasoning: 0,
cache: { read: 10000, write: 0 },
},
},
},
},
})
// when
const output = createOutput()
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
// then
expect(output.output).toContain("context remaining")
expect(output.output).toContain("262,144-token context window")
expect(output.output).toContain("[Context Status: 72.5% used (190,000/262,144 tokens), 27.5% remaining]")
expect(output.output).not.toContain("1,000,000")
})
})

View File

@@ -1,12 +1,12 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive"
const ANTHROPIC_DISPLAY_LIMIT = 1_000_000
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
const CONTEXT_WARNING_THRESHOLD = 0.70
type ModelCacheStateLike = {
anthropicContext1MEnabled: boolean
modelContextLimitsCache?: Map<string, number>
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,11 +17,15 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT
}
const CONTEXT_REMINDER = `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
function createContextReminder(actualLimit: number): string {
const limitTokens = actualLimit.toLocaleString()
You are using Anthropic Claude with 1M context window.
You have plenty of context remaining - do NOT rush or skip tasks.
return `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
You are using a ${limitTokens}-token context window.
You still have context remaining - do NOT rush or skip tasks.
Complete your work thoroughly and methodically.`
}
interface TokenInfo {
input: number
@@ -32,6 +36,7 @@ interface TokenInfo {
interface CachedTokenState {
providerID: string
modelID: string
tokens: TokenInfo
}
@@ -57,25 +62,30 @@ export function createContextWindowMonitorHook(
const cached = tokenCache.get(sessionID)
if (!cached) return
if (!isAnthropicProvider(cached.providerID)) return
const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(
`${cached.providerID}/${cached.modelID}`
)
const actualLimit =
cachedLimit ??
(isAnthropicProvider(cached.providerID) ? getAnthropicActualLimit(modelCacheState) : null)
if (!actualLimit) return
const lastTokens = cached.tokens
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
const actualUsagePercentage =
totalInputTokens / getAnthropicActualLimit(modelCacheState)
const actualUsagePercentage = totalInputTokens / actualLimit
if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return
remindedSessions.add(sessionID)
const displayUsagePercentage = totalInputTokens / ANTHROPIC_DISPLAY_LIMIT
const usedPct = (displayUsagePercentage * 100).toFixed(1)
const remainingPct = ((1 - displayUsagePercentage) * 100).toFixed(1)
const usedPct = (actualUsagePercentage * 100).toFixed(1)
const remainingPct = ((1 - actualUsagePercentage) * 100).toFixed(1)
const usedTokens = totalInputTokens.toLocaleString()
const limitTokens = ANTHROPIC_DISPLAY_LIMIT.toLocaleString()
const limitTokens = actualLimit.toLocaleString()
output.output += `\n\n${CONTEXT_REMINDER}
output.output += `\n\n${createContextReminder(actualLimit)}
[Context Status: ${usedPct}% used (${usedTokens}/${limitTokens} tokens), ${remainingPct}% remaining]`
}
@@ -95,6 +105,7 @@ export function createContextWindowMonitorHook(
role?: string
sessionID?: string
providerID?: string
modelID?: string
finish?: boolean
tokens?: TokenInfo
} | undefined
@@ -104,6 +115,7 @@ export function createContextWindowMonitorHook(
tokenCache.set(info.sessionID, {
providerID: info.providerID,
modelID: info.modelID ?? "",
tokens: info.tokens,
})
}

View File

@@ -19,6 +19,20 @@ describe("createToolOutputTruncatorHook", () => {
hook = createToolOutputTruncatorHook({} as never)
})
it("passes modelContextLimitsCache through to createDynamicTruncator", () => {
const ctx = {} as never
const modelContextLimitsCache = new Map<string, number>()
const modelCacheState = {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
}
truncateSpy.mockClear()
createToolOutputTruncatorHook(ctx, { modelCacheState })
expect(truncateSpy).toHaveBeenLastCalledWith(ctx, modelCacheState)
})
describe("tool.execute.after", () => {
const createInput = (tool: string) => ({
tool,

View File

@@ -27,7 +27,10 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
}
interface ToolOutputTruncatorOptions {
modelCacheState?: { anthropicContext1MEnabled: boolean }
modelCacheState?: {
anthropicContext1MEnabled: boolean
modelContextLimitsCache?: Map<string, number>
}
experimental?: ExperimentalConfig
}

View File

@@ -24,7 +24,10 @@ function resetContextLimitEnv(): void {
}
}
function createContextUsageMockContext(inputTokens: number) {
function createContextUsageMockContext(
inputTokens: number,
options?: { providerID?: string; modelID?: string; cacheRead?: number }
) {
return {
client: {
session: {
@@ -33,11 +36,13 @@ function createContextUsageMockContext(inputTokens: number) {
{
info: {
role: "assistant",
providerID: options?.providerID ?? "anthropic",
modelID: options?.modelID,
tokens: {
input: inputTokens,
output: 0,
reasoning: 0,
cache: { read: 0, write: 0 },
cache: { read: options?.cacheRead ?? 0, write: 0 },
},
},
},
@@ -99,4 +104,24 @@ describe("getContextWindowUsage", () => {
expect(usage?.usagePercentage).toBe(0.3)
expect(usage?.remainingTokens).toBe(700000)
})
it("uses model-specific limit for non-anthropic providers when cached", async () => {
// given
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
const ctx = createContextUsageMockContext(180000, {
providerID: "opencode",
modelID: "kimi-k2.5-free",
})
// when
const usage = await getContextWindowUsage(ctx as never, "ses_model_limit", {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
// then
expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
expect(usage?.remainingTokens).toBe(82144)
})
})

View File

@@ -7,6 +7,7 @@ const DEFAULT_TARGET_MAX_TOKENS = 50_000;
type ModelCacheStateLike = {
anthropicContext1MEnabled: boolean;
modelContextLimitsCache?: Map<string, number>;
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,8 +18,14 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT;
}
function isAnthropicProvider(providerID: string): boolean {
return providerID === "anthropic" || providerID === "google-vertex-anthropic";
}
interface AssistantMessageInfo {
role: "assistant";
providerID?: string;
modelID?: string;
tokens: {
input: number;
output: number;
@@ -136,20 +143,35 @@ export async function getContextWindowUsage(
.map((m) => m.info as AssistantMessageInfo);
if (assistantMessages.length === 0) return null;
const lastAssistant = assistantMessages[assistantMessages.length - 1];
const lastTokens = lastAssistant.tokens;
const lastTokens = lastAssistant?.tokens;
if (!lastAssistant || !lastTokens) return null;
const cachedLimit =
lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
? modelCacheState?.modelContextLimitsCache?.get(
`${lastAssistant.providerID}/${lastAssistant.modelID}`,
)
: undefined;
const actualLimit =
cachedLimit ??
(lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
? getAnthropicActualLimit(modelCacheState)
: null);
if (!actualLimit) return null;
const usedTokens =
(lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0);
const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
const remainingTokens = anthropicActualLimit - usedTokens;
const remainingTokens = actualLimit - usedTokens;
return {
usedTokens,
remainingTokens,
usagePercentage: usedTokens / anthropicActualLimit,
usagePercentage: usedTokens / actualLimit,
};
} catch {
return null;