Merge pull request #2366 from code-yeongyu/fix/issue-2338
fix: honor model-specific context limits for non-Anthropic models
This commit is contained in:
@@ -0,0 +1,93 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { describe, expect, it } from "bun:test"
|
||||
import { createContextWindowMonitorHook } from "./context-window-monitor"
|
||||
|
||||
function createOutput() {
|
||||
return { title: "", output: "original", metadata: null }
|
||||
}
|
||||
|
||||
describe("context-window-monitor modelContextLimitsCache", () => {
|
||||
it("does not append reminder below cached non-anthropic threshold", async () => {
|
||||
// given
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
|
||||
|
||||
const hook = createContextWindowMonitorHook({} as never, {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_non_anthropic_below_threshold"
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "opencode",
|
||||
modelID: "kimi-k2.5-free",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 150000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// when
|
||||
const output = createOutput()
|
||||
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
|
||||
|
||||
// then
|
||||
expect(output.output).toBe("original")
|
||||
})
|
||||
|
||||
it("appends reminder above cached non-anthropic threshold", async () => {
|
||||
// given
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
|
||||
|
||||
const hook = createContextWindowMonitorHook({} as never, {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_non_anthropic_above_threshold"
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "opencode",
|
||||
modelID: "kimi-k2.5-free",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 180000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// when
|
||||
const output = createOutput()
|
||||
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
|
||||
|
||||
// then
|
||||
expect(output.output).toContain("context remaining")
|
||||
expect(output.output).toContain("262,144-token context window")
|
||||
expect(output.output).toContain("[Context Status: 72.5% used (190,000/262,144 tokens), 27.5% remaining]")
|
||||
expect(output.output).not.toContain("1,000,000")
|
||||
})
|
||||
})
|
||||
@@ -1,12 +1,12 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive"
|
||||
|
||||
const ANTHROPIC_DISPLAY_LIMIT = 1_000_000
|
||||
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
|
||||
const CONTEXT_WARNING_THRESHOLD = 0.70
|
||||
|
||||
type ModelCacheStateLike = {
|
||||
anthropicContext1MEnabled: boolean
|
||||
modelContextLimitsCache?: Map<string, number>
|
||||
}
|
||||
|
||||
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
|
||||
@@ -17,11 +17,15 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
|
||||
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT
|
||||
}
|
||||
|
||||
const CONTEXT_REMINDER = `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
|
||||
function createContextReminder(actualLimit: number): string {
|
||||
const limitTokens = actualLimit.toLocaleString()
|
||||
|
||||
You are using Anthropic Claude with 1M context window.
|
||||
You have plenty of context remaining - do NOT rush or skip tasks.
|
||||
return `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)}
|
||||
|
||||
You are using a ${limitTokens}-token context window.
|
||||
You still have context remaining - do NOT rush or skip tasks.
|
||||
Complete your work thoroughly and methodically.`
|
||||
}
|
||||
|
||||
interface TokenInfo {
|
||||
input: number
|
||||
@@ -32,6 +36,7 @@ interface TokenInfo {
|
||||
|
||||
interface CachedTokenState {
|
||||
providerID: string
|
||||
modelID: string
|
||||
tokens: TokenInfo
|
||||
}
|
||||
|
||||
@@ -57,25 +62,30 @@ export function createContextWindowMonitorHook(
|
||||
const cached = tokenCache.get(sessionID)
|
||||
if (!cached) return
|
||||
|
||||
if (!isAnthropicProvider(cached.providerID)) return
|
||||
const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(
|
||||
`${cached.providerID}/${cached.modelID}`
|
||||
)
|
||||
const actualLimit =
|
||||
cachedLimit ??
|
||||
(isAnthropicProvider(cached.providerID) ? getAnthropicActualLimit(modelCacheState) : null)
|
||||
|
||||
if (!actualLimit) return
|
||||
|
||||
const lastTokens = cached.tokens
|
||||
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
|
||||
|
||||
const actualUsagePercentage =
|
||||
totalInputTokens / getAnthropicActualLimit(modelCacheState)
|
||||
const actualUsagePercentage = totalInputTokens / actualLimit
|
||||
|
||||
if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return
|
||||
|
||||
remindedSessions.add(sessionID)
|
||||
|
||||
const displayUsagePercentage = totalInputTokens / ANTHROPIC_DISPLAY_LIMIT
|
||||
const usedPct = (displayUsagePercentage * 100).toFixed(1)
|
||||
const remainingPct = ((1 - displayUsagePercentage) * 100).toFixed(1)
|
||||
const usedPct = (actualUsagePercentage * 100).toFixed(1)
|
||||
const remainingPct = ((1 - actualUsagePercentage) * 100).toFixed(1)
|
||||
const usedTokens = totalInputTokens.toLocaleString()
|
||||
const limitTokens = ANTHROPIC_DISPLAY_LIMIT.toLocaleString()
|
||||
const limitTokens = actualLimit.toLocaleString()
|
||||
|
||||
output.output += `\n\n${CONTEXT_REMINDER}
|
||||
output.output += `\n\n${createContextReminder(actualLimit)}
|
||||
[Context Status: ${usedPct}% used (${usedTokens}/${limitTokens} tokens), ${remainingPct}% remaining]`
|
||||
}
|
||||
|
||||
@@ -95,6 +105,7 @@ export function createContextWindowMonitorHook(
|
||||
role?: string
|
||||
sessionID?: string
|
||||
providerID?: string
|
||||
modelID?: string
|
||||
finish?: boolean
|
||||
tokens?: TokenInfo
|
||||
} | undefined
|
||||
@@ -104,6 +115,7 @@ export function createContextWindowMonitorHook(
|
||||
|
||||
tokenCache.set(info.sessionID, {
|
||||
providerID: info.providerID,
|
||||
modelID: info.modelID ?? "",
|
||||
tokens: info.tokens,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,6 +19,20 @@ describe("createToolOutputTruncatorHook", () => {
|
||||
hook = createToolOutputTruncatorHook({} as never)
|
||||
})
|
||||
|
||||
it("passes modelContextLimitsCache through to createDynamicTruncator", () => {
|
||||
const ctx = {} as never
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
const modelCacheState = {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
}
|
||||
|
||||
truncateSpy.mockClear()
|
||||
createToolOutputTruncatorHook(ctx, { modelCacheState })
|
||||
|
||||
expect(truncateSpy).toHaveBeenLastCalledWith(ctx, modelCacheState)
|
||||
})
|
||||
|
||||
describe("tool.execute.after", () => {
|
||||
const createInput = (tool: string) => ({
|
||||
tool,
|
||||
|
||||
@@ -27,7 +27,10 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
|
||||
}
|
||||
|
||||
interface ToolOutputTruncatorOptions {
|
||||
modelCacheState?: { anthropicContext1MEnabled: boolean }
|
||||
modelCacheState?: {
|
||||
anthropicContext1MEnabled: boolean
|
||||
modelContextLimitsCache?: Map<string, number>
|
||||
}
|
||||
experimental?: ExperimentalConfig
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,10 @@ function resetContextLimitEnv(): void {
|
||||
}
|
||||
}
|
||||
|
||||
function createContextUsageMockContext(inputTokens: number) {
|
||||
function createContextUsageMockContext(
|
||||
inputTokens: number,
|
||||
options?: { providerID?: string; modelID?: string; cacheRead?: number }
|
||||
) {
|
||||
return {
|
||||
client: {
|
||||
session: {
|
||||
@@ -33,11 +36,13 @@ function createContextUsageMockContext(inputTokens: number) {
|
||||
{
|
||||
info: {
|
||||
role: "assistant",
|
||||
providerID: options?.providerID ?? "anthropic",
|
||||
modelID: options?.modelID,
|
||||
tokens: {
|
||||
input: inputTokens,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 0, write: 0 },
|
||||
cache: { read: options?.cacheRead ?? 0, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -99,4 +104,24 @@ describe("getContextWindowUsage", () => {
|
||||
expect(usage?.usagePercentage).toBe(0.3)
|
||||
expect(usage?.remainingTokens).toBe(700000)
|
||||
})
|
||||
|
||||
it("uses model-specific limit for non-anthropic providers when cached", async () => {
|
||||
// given
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
|
||||
const ctx = createContextUsageMockContext(180000, {
|
||||
providerID: "opencode",
|
||||
modelID: "kimi-k2.5-free",
|
||||
})
|
||||
|
||||
// when
|
||||
const usage = await getContextWindowUsage(ctx as never, "ses_model_limit", {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
|
||||
expect(usage?.remainingTokens).toBe(82144)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -7,6 +7,7 @@ const DEFAULT_TARGET_MAX_TOKENS = 50_000;
|
||||
|
||||
type ModelCacheStateLike = {
|
||||
anthropicContext1MEnabled: boolean;
|
||||
modelContextLimitsCache?: Map<string, number>;
|
||||
}
|
||||
|
||||
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
|
||||
@@ -17,8 +18,14 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
|
||||
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT;
|
||||
}
|
||||
|
||||
function isAnthropicProvider(providerID: string): boolean {
|
||||
return providerID === "anthropic" || providerID === "google-vertex-anthropic";
|
||||
}
|
||||
|
||||
interface AssistantMessageInfo {
|
||||
role: "assistant";
|
||||
providerID?: string;
|
||||
modelID?: string;
|
||||
tokens: {
|
||||
input: number;
|
||||
output: number;
|
||||
@@ -136,20 +143,35 @@ export async function getContextWindowUsage(
|
||||
.map((m) => m.info as AssistantMessageInfo);
|
||||
|
||||
if (assistantMessages.length === 0) return null;
|
||||
|
||||
|
||||
const lastAssistant = assistantMessages[assistantMessages.length - 1];
|
||||
const lastTokens = lastAssistant.tokens;
|
||||
const lastTokens = lastAssistant?.tokens;
|
||||
if (!lastAssistant || !lastTokens) return null;
|
||||
|
||||
const cachedLimit =
|
||||
lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
|
||||
? modelCacheState?.modelContextLimitsCache?.get(
|
||||
`${lastAssistant.providerID}/${lastAssistant.modelID}`,
|
||||
)
|
||||
: undefined;
|
||||
const actualLimit =
|
||||
cachedLimit ??
|
||||
(lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
|
||||
? getAnthropicActualLimit(modelCacheState)
|
||||
: null);
|
||||
|
||||
if (!actualLimit) return null;
|
||||
|
||||
const usedTokens =
|
||||
(lastTokens?.input ?? 0) +
|
||||
(lastTokens?.cache?.read ?? 0) +
|
||||
(lastTokens?.output ?? 0);
|
||||
const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
|
||||
const remainingTokens = anthropicActualLimit - usedTokens;
|
||||
const remainingTokens = actualLimit - usedTokens;
|
||||
|
||||
return {
|
||||
usedTokens,
|
||||
remainingTokens,
|
||||
usagePercentage: usedTokens / anthropicActualLimit,
|
||||
usagePercentage: usedTokens / actualLimit,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
|
||||
Reference in New Issue
Block a user