Root cause: DEFAULT_TARGET_MAX_TOKENS (50k tokens ~200k chars) was too high for webfetch outputs. Web pages can be large but most content doesn't exceed this limit, so truncation rarely triggered. Changes: - Add WEBFETCH_MAX_TOKENS = 10k tokens (~40k chars) for web content - Introduce TOOL_SPECIFIC_MAX_TOKENS map for per-tool limits - webfetch/WebFetch now use aggressive 10k token limit - Other tools continue using default 50k token limit - Add comprehensive tests for truncation behavior Fixes #195 Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
169 lines
5.8 KiB
TypeScript
169 lines
5.8 KiB
TypeScript
import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
|
|
import { createToolOutputTruncatorHook } from "./tool-output-truncator"
|
|
import * as dynamicTruncator from "../shared/dynamic-truncator"
|
|
|
|
describe("createToolOutputTruncatorHook", () => {
|
|
let hook: ReturnType<typeof createToolOutputTruncatorHook>
|
|
let truncateSpy: ReturnType<typeof spyOn>
|
|
|
|
beforeEach(() => {
|
|
truncateSpy = spyOn(dynamicTruncator, "createDynamicTruncator").mockReturnValue({
|
|
truncate: mock(async (_sessionID: string, output: string, options?: { targetMaxTokens?: number }) => ({
|
|
result: output,
|
|
truncated: false,
|
|
targetMaxTokens: options?.targetMaxTokens,
|
|
})),
|
|
getUsage: mock(async () => null),
|
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
|
})
|
|
hook = createToolOutputTruncatorHook({} as never)
|
|
})
|
|
|
|
describe("tool.execute.after", () => {
|
|
const createInput = (tool: string) => ({
|
|
tool,
|
|
sessionID: "test-session",
|
|
callID: "test-call-id",
|
|
})
|
|
|
|
const createOutput = (outputText: string) => ({
|
|
title: "Result",
|
|
output: outputText,
|
|
metadata: {},
|
|
})
|
|
|
|
describe("#given webfetch tool", () => {
|
|
describe("#when output is processed", () => {
|
|
it("#then should use aggressive truncation limit (10k tokens)", async () => {
|
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
|
result: "truncated",
|
|
truncated: true,
|
|
targetMaxTokens: options?.targetMaxTokens,
|
|
}))
|
|
truncateSpy.mockReturnValue({
|
|
truncate: truncateMock,
|
|
getUsage: mock(async () => null),
|
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
|
})
|
|
hook = createToolOutputTruncatorHook({} as never)
|
|
|
|
const input = createInput("webfetch")
|
|
const output = createOutput("large content")
|
|
|
|
await hook["tool.execute.after"](input, output)
|
|
|
|
expect(truncateMock).toHaveBeenCalledWith(
|
|
"test-session",
|
|
"large content",
|
|
{ targetMaxTokens: 10_000 }
|
|
)
|
|
})
|
|
})
|
|
|
|
describe("#when using WebFetch variant", () => {
|
|
it("#then should also use aggressive truncation limit", async () => {
|
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
|
result: "truncated",
|
|
truncated: true,
|
|
}))
|
|
truncateSpy.mockReturnValue({
|
|
truncate: truncateMock,
|
|
getUsage: mock(async () => null),
|
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
|
})
|
|
hook = createToolOutputTruncatorHook({} as never)
|
|
|
|
const input = createInput("WebFetch")
|
|
const output = createOutput("large content")
|
|
|
|
await hook["tool.execute.after"](input, output)
|
|
|
|
expect(truncateMock).toHaveBeenCalledWith(
|
|
"test-session",
|
|
"large content",
|
|
{ targetMaxTokens: 10_000 }
|
|
)
|
|
})
|
|
})
|
|
})
|
|
|
|
describe("#given grep tool", () => {
|
|
describe("#when output is processed", () => {
|
|
it("#then should use default truncation limit (50k tokens)", async () => {
|
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
|
result: "truncated",
|
|
truncated: true,
|
|
}))
|
|
truncateSpy.mockReturnValue({
|
|
truncate: truncateMock,
|
|
getUsage: mock(async () => null),
|
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
|
})
|
|
hook = createToolOutputTruncatorHook({} as never)
|
|
|
|
const input = createInput("grep")
|
|
const output = createOutput("grep output")
|
|
|
|
await hook["tool.execute.after"](input, output)
|
|
|
|
expect(truncateMock).toHaveBeenCalledWith(
|
|
"test-session",
|
|
"grep output",
|
|
{ targetMaxTokens: 50_000 }
|
|
)
|
|
})
|
|
})
|
|
})
|
|
|
|
describe("#given non-truncatable tool", () => {
|
|
describe("#when tool is not in TRUNCATABLE_TOOLS list", () => {
|
|
it("#then should not call truncator", async () => {
|
|
const truncateMock = mock(async () => ({
|
|
result: "truncated",
|
|
truncated: true,
|
|
}))
|
|
truncateSpy.mockReturnValue({
|
|
truncate: truncateMock,
|
|
getUsage: mock(async () => null),
|
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
|
})
|
|
hook = createToolOutputTruncatorHook({} as never)
|
|
|
|
const input = createInput("Read")
|
|
const output = createOutput("file content")
|
|
|
|
await hook["tool.execute.after"](input, output)
|
|
|
|
expect(truncateMock).not.toHaveBeenCalled()
|
|
})
|
|
})
|
|
})
|
|
|
|
describe("#given truncate_all_tool_outputs enabled", () => {
|
|
describe("#when any tool output is processed", () => {
|
|
it("#then should truncate non-listed tools too", async () => {
|
|
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
|
|
result: "truncated",
|
|
truncated: true,
|
|
}))
|
|
truncateSpy.mockReturnValue({
|
|
truncate: truncateMock,
|
|
getUsage: mock(async () => null),
|
|
truncateSync: mock(() => ({ result: "", truncated: false })),
|
|
})
|
|
hook = createToolOutputTruncatorHook({} as never, {
|
|
experimental: { truncate_all_tool_outputs: true },
|
|
})
|
|
|
|
const input = createInput("Read")
|
|
const output = createOutput("file content")
|
|
|
|
await hook["tool.execute.after"](input, output)
|
|
|
|
expect(truncateMock).toHaveBeenCalled()
|
|
})
|
|
})
|
|
})
|
|
})
|
|
})
|