fix(dynamic-truncator): use provider-aware context limits

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
YeonGyu-Kim
2026-03-08 02:10:48 +09:00
parent 17707ee835
commit fdabebe889
2 changed files with 54 additions and 7 deletions

View File

@@ -24,7 +24,10 @@ function resetContextLimitEnv(): void {
}
}
function createContextUsageMockContext(inputTokens: number) {
function createContextUsageMockContext(
inputTokens: number,
options?: { providerID?: string; modelID?: string; cacheRead?: number }
) {
return {
client: {
session: {
@@ -33,11 +36,13 @@ function createContextUsageMockContext(inputTokens: number) {
{
info: {
role: "assistant",
providerID: options?.providerID ?? "anthropic",
modelID: options?.modelID,
tokens: {
input: inputTokens,
output: 0,
reasoning: 0,
cache: { read: 0, write: 0 },
cache: { read: options?.cacheRead ?? 0, write: 0 },
},
},
},
@@ -99,4 +104,24 @@ describe("getContextWindowUsage", () => {
expect(usage?.usagePercentage).toBe(0.3)
expect(usage?.remainingTokens).toBe(700000)
})
it("uses model-specific limit for non-anthropic providers when cached", async () => {
// given
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
const ctx = createContextUsageMockContext(180000, {
providerID: "opencode",
modelID: "kimi-k2.5-free",
})
// when
const usage = await getContextWindowUsage(ctx as never, "ses_model_limit", {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
// then
expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
expect(usage?.remainingTokens).toBe(82144)
})
})

View File

@@ -7,6 +7,7 @@ const DEFAULT_TARGET_MAX_TOKENS = 50_000;
type ModelCacheStateLike = {
anthropicContext1MEnabled: boolean;
modelContextLimitsCache?: Map<string, number>;
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,8 +18,14 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT;
}
function isAnthropicProvider(providerID: string): boolean {
return providerID === "anthropic" || providerID === "google-vertex-anthropic";
}
interface AssistantMessageInfo {
role: "assistant";
providerID?: string;
modelID?: string;
tokens: {
input: number;
output: number;
@@ -136,20 +143,35 @@ export async function getContextWindowUsage(
.map((m) => m.info as AssistantMessageInfo);
if (assistantMessages.length === 0) return null;
const lastAssistant = assistantMessages[assistantMessages.length - 1];
const lastTokens = lastAssistant.tokens;
const lastTokens = lastAssistant?.tokens;
if (!lastAssistant || !lastTokens) return null;
const cachedLimit =
lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
? modelCacheState?.modelContextLimitsCache?.get(
`${lastAssistant.providerID}/${lastAssistant.modelID}`,
)
: undefined;
const actualLimit =
cachedLimit ??
(lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
? getAnthropicActualLimit(modelCacheState)
: null);
if (!actualLimit) return null;
const usedTokens =
(lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0);
const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
const remainingTokens = anthropicActualLimit - usedTokens;
const remainingTokens = actualLimit - usedTokens;
return {
usedTokens,
remainingTokens,
usagePercentage: usedTokens / anthropicActualLimit,
usagePercentage: usedTokens / actualLimit,
};
} catch {
return null;