fix(dynamic-truncator): use provider-aware context limits

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-08 02:10:48 +09:00
parent 17707ee835
commit fdabebe889
2 changed files with 54 additions and 7 deletions
--- a/src/shared/dynamic-truncator.test.ts
+++ b/src/shared/dynamic-truncator.test.ts
@@ -24,7 +24,10 @@ function resetContextLimitEnv(): void {
  }
 }

-function createContextUsageMockContext(inputTokens: number) {
+function createContextUsageMockContext(
+  inputTokens: number,
+  options?: { providerID?: string; modelID?: string; cacheRead?: number }
+) {
  return {
    client: {
      session: {
@@ -33,11 +36,13 @@ function createContextUsageMockContext(inputTokens: number) {
            {
              info: {
                role: "assistant",
+                providerID: options?.providerID ?? "anthropic",
+                modelID: options?.modelID,
                tokens: {
                  input: inputTokens,
                  output: 0,
                  reasoning: 0,
-                  cache: { read: 0, write: 0 },
+                  cache: { read: options?.cacheRead ?? 0, write: 0 },
                },
              },
            },
@@ -99,4 +104,24 @@ describe("getContextWindowUsage", () => {
    expect(usage?.usagePercentage).toBe(0.3)
    expect(usage?.remainingTokens).toBe(700000)
  })
+
+  it("uses model-specific limit for non-anthropic providers when cached", async () => {
+    // given
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+    const ctx = createContextUsageMockContext(180000, {
+      providerID: "opencode",
+      modelID: "kimi-k2.5-free",
+    })
+
+    // when
+    const usage = await getContextWindowUsage(ctx as never, "ses_model_limit", {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+
+    // then
+    expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
+    expect(usage?.remainingTokens).toBe(82144)
+  })
 })
--- a/src/shared/dynamic-truncator.ts
+++ b/src/shared/dynamic-truncator.ts
@@ -7,6 +7,7 @@ const DEFAULT_TARGET_MAX_TOKENS = 50_000;

 type ModelCacheStateLike = {
 	anthropicContext1MEnabled: boolean;
+	modelContextLimitsCache?: Map<string, number>;
 }

 function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -17,8 +18,14 @@ function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number
 		: DEFAULT_ANTHROPIC_ACTUAL_LIMIT;
 }

+function isAnthropicProvider(providerID: string): boolean {
+	return providerID === "anthropic" || providerID === "google-vertex-anthropic";
+}
+
 interface AssistantMessageInfo {
 	role: "assistant";
+	providerID?: string;
+	modelID?: string;
 	tokens: {
 		input: number;
 		output: number;
@@ -136,20 +143,35 @@ export async function getContextWindowUsage(
 			.map((m) => m.info as AssistantMessageInfo);

 		if (assistantMessages.length === 0) return null;
-
+		
 		const lastAssistant = assistantMessages[assistantMessages.length - 1];
-		const lastTokens = lastAssistant.tokens;
+		const lastTokens = lastAssistant?.tokens;
+		if (!lastAssistant || !lastTokens) return null;
+
+		const cachedLimit =
+			lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
+				? modelCacheState?.modelContextLimitsCache?.get(
+					`${lastAssistant.providerID}/${lastAssistant.modelID}`,
+				)
+				: undefined;
+		const actualLimit =
+			cachedLimit ??
+			(lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
+				? getAnthropicActualLimit(modelCacheState)
+				: null);
+
+		if (!actualLimit) return null;
+
 		const usedTokens =
 			(lastTokens?.input ?? 0) +
 			(lastTokens?.cache?.read ?? 0) +
 			(lastTokens?.output ?? 0);
-		const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
-		const remainingTokens = anthropicActualLimit - usedTokens;
+		const remainingTokens = actualLimit - usedTokens;

 		return {
 			usedTokens,
 			remainingTokens,
-			usagePercentage: usedTokens / anthropicActualLimit,
+			usagePercentage: usedTokens / actualLimit,
 		};
 	} catch {
 		return null;