慣性聚合 高效追讀感興趣之博客、新聞、科技資訊
閱原文 以慣性聚合開啟

推薦訂閱源

博客园 - 司徒正美
V
V2EX
T
Tailwind CSS Blog
有赞技术团队
有赞技术团队
aimingoo的专栏
aimingoo的专栏
Apple Machine Learning Research
Apple Machine Learning Research
IT之家
IT之家
Blog — PlanetScale
Blog — PlanetScale
A
About on SuperTechFans
月光博客
月光博客
T
The Blog of Author Tim Ferriss
宝玉的分享
宝玉的分享
Martin Fowler
Martin Fowler
博客园 - 聂微东
The GitHub Blog
The GitHub Blog
V
Visual Studio Blog
WordPress大学
WordPress大学
酷 壳 – CoolShell
酷 壳 – CoolShell
Engineering at Meta
Engineering at Meta
GbyAI
GbyAI

Recent Commits to openclaw:main

test: merge chat side-result checks · openclaw/openclaw@ddd2c2a test: merge cron history checks · openclaw/openclaw@f7eb746 test: merge responsive navigation shell checks · openclaw/openclaw@c2e4b47 docs(changelog): add codex oauth fixes · openclaw/openclaw@628e6cd test: merge navigation routing cases · openclaw/openclaw@5d8cecb Tests: mock channel registry bundled fallback · openclaw/openclaw@2b08233 Secrets: avoid broad web search discovery for single plugin config · openclaw/openclaw@a464f59 test: merge config view browser checks · openclaw/openclaw@20cf511 fix(status): align oauth health with runtime · openclaw/openclaw@eed7116 feat: add macOS screen snapshots for monitor preview (#67954) thanks … · openclaw/openclaw@f377db1 fix: report shared auth scopes in hello-ok (#67810) thanks @BunsDev · openclaw/openclaw@0b6c39b Auto-reply: avoid eager bundled route fallback · openclaw/openclaw@3ea1bf4 Tests: narrow session binding contract setup · openclaw/openclaw@54e4e16 fix(macOS): enable undo/redo in webchat composer text input (#34962) · openclaw/openclaw@00951dc Tests: speed up channel setup promotion · openclaw/openclaw@82b529a Docs: refresh agent instructions · openclaw/openclaw@5775fe2 fix(auth): serialize OAuth refresh across agents to fix #26322 (#67876) · openclaw/openclaw@8e79080 test: allow ollama public surface boundary test · openclaw/openclaw@7d4f1a6 Docs: add test performance guardrails · openclaw/openclaw@89706d3 Tests: restore context-engine usage proof · openclaw/openclaw@e4c4f95 Tests: slim context engine runtime coverage · openclaw/openclaw@74c198f ci: retry failed custom checkouts · openclaw/openclaw@0ee5baf test: trim duplicate provider auth onboarding cases · openclaw/openclaw@1ffc02e matrix: fix sessions_spawn --thread subagent session spawning (#67643) · openclaw/openclaw@1ce2596 test: reduce auth choice fixture churn · openclaw/openclaw@857b9cd test: mock health status config boundaries · openclaw/openclaw@9d5ab4a test: mock onboard config io boundary · openclaw/openclaw@299694d test: mock legacy state plugin boundaries · openclaw/openclaw@2713089 test: mock channel install boundaries · openclaw/openclaw@b945248 test: mock doctor preview channel boundaries · openclaw/openclaw@b1a3ad4 test: trim doctor command hotspots · openclaw/openclaw@c66f16a test: isolate agent auth and spawn hotspots · openclaw/openclaw@9285935 test: stabilize MCP startup disposal race · openclaw/openclaw@dd9d2eb test: merge browser contract server suites · openclaw/openclaw@5817a76 test: narrow ollama provider discovery setup · openclaw/openclaw@a0d9598 build: declare qa-lab aimock runtime dependency · openclaw/openclaw@24431e5 test: speed up safe-bins exec harness · openclaw/openclaw@ee856ab test: preserve tool helpers in embedded runner mocks · openclaw/openclaw@acd86a0 refactor: move memory embeddings into provider plugins · openclaw/openclaw@77e6e4c test: reuse system-run temp fixtures · openclaw/openclaw@7e9ff0f test: trim hotspot wait overhead · openclaw/openclaw@12a59b0 Check: avoid duplicate boundary prep · openclaw/openclaw@baf11b8 test: reduce hotspot fixture overhead · openclaw/openclaw@3a59edd feat(ui): overhaul settings and slash command UX (#67819) thanks @Bun… · openclaw/openclaw@2cfb660 QA Matrix: exit cleanly on failure · openclaw/openclaw@42805d2 QA Matrix: isolate scenario coverage · openclaw/openclaw@7e659e1 Matrix: refresh crypto bootstrap state · openclaw/openclaw@94081d8 QA Lab: add provider registry · openclaw/openclaw@bb7e982 Matrix: add plugin changelog · openclaw/openclaw@4acab55 test: trim more hotspot overhead · openclaw/openclaw@f485311
修(诸)器:制代理之代偿,限载后之成数(clamp proxy completions caps after payload shaping (#85889) · openclaw/openclaw@1d1a7c2)
rendrag-git · 2026-05-24 · via Recent Commits to openclaw:main

@@ -4766,6 +4766,248 @@ describe("openai transport stream", () => {

47664766

expect(params).not.toHaveProperty("max_completion_tokens");

47674767

});

476847684769+

it("clamps max_completion_tokens to the remaining context budget for proxy-like endpoints when prompt + output would exceed contextWindow (covers #83086)", () => {

4770+

// StepFun-style shape: large context window, max_tokens equal to context,

4771+

// and a substantial prompt that should leave well under the context budget.

4772+

// 200_000 ASCII chars -> estimated 62_500 input tokens (chars/4 * 1.25).

4773+

// That leaves remaining budget of 262_144 - 62_500 - 1 = 199_643 tokens.

4774+

const systemPrompt = "x".repeat(200_000);

4775+

const params = buildOpenAICompletionsParams(

4776+

{

4777+

id: "step-router-v1",

4778+

name: "StepFun step-router-v1",

4779+

api: "openai-completions",

4780+

provider: "stepfun-plan",

4781+

baseUrl: "https://api.stepfun.com/v1",

4782+

reasoning: false,

4783+

input: ["text"],

4784+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4785+

contextWindow: 262_144,

4786+

maxTokens: 262_144,

4787+

} satisfies Model<"openai-completions">,

4788+

{

4789+

systemPrompt,

4790+

messages: [],

4791+

tools: [],

4792+

} as never,

4793+

undefined,

4794+

);

4795+4796+

expect(typeof params.max_completion_tokens).toBe("number");

4797+

const cap = params.max_completion_tokens as number;

4798+

const estimatedInputTokens = Math.ceil((systemPrompt.length / 4) * 1.25);

4799+

expect(cap).toBe(262_144 - estimatedInputTokens - 1);

4800+

expect(cap).toBeLessThan(262_144);

4801+

});

4802+4803+

it("uses CJK-aware input estimates when clamping proxy-like completions output budgets", () => {

4804+

const cjkPrompt = "你好世界".repeat(1_000);

4805+

const params = buildOpenAICompletionsParams(

4806+

{

4807+

id: "kimi-k2.6",

4808+

name: "Kimi K2.6",

4809+

api: "openai-completions",

4810+

provider: "dashscope",

4811+

baseUrl: "https://dashscope.aliyuncs.com/compatible-mode/v1",

4812+

reasoning: false,

4813+

input: ["text"],

4814+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4815+

contextWindow: 10_000,

4816+

maxTokens: 10_000,

4817+

} satisfies Model<"openai-completions">,

4818+

{

4819+

systemPrompt: cjkPrompt,

4820+

messages: [],

4821+

tools: [],

4822+

} as never,

4823+

undefined,

4824+

);

4825+4826+

// 4,000 CJK chars count as 16,000 adjusted chars, then chars/4 * 1.25.

4827+

expect(params.max_completion_tokens).toBe(10_000 - 5_000 - 1);

4828+

});

4829+4830+

it("rounds proxy-like completions input estimates after summing message content", () => {

4831+

const messages = Array.from({ length: 4_000 }, () => ({

4832+

role: "user",

4833+

content: "x",

4834+

}));

4835+

const params = buildOpenAICompletionsParams(

4836+

{

4837+

id: "qwen3-5-122b-a10b-nvfp4",

4838+

name: "qwen3-5-122b-a10b-nvfp4",

4839+

api: "openai-completions",

4840+

provider: "vllm",

4841+

baseUrl: "http://localhost:8000/v1",

4842+

reasoning: false,

4843+

input: ["text"],

4844+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4845+

contextWindow: 10_000,

4846+

maxTokens: 10_000,

4847+

} satisfies Model<"openai-completions">,

4848+

{

4849+

systemPrompt: undefined,

4850+

messages,

4851+

tools: [],

4852+

} as never,

4853+

undefined,

4854+

);

4855+4856+

expect(params.max_completion_tokens).toBe(10_000 - 1_250 - 1);

4857+

});

4858+4859+

it("estimates proxy-like completions input from the final outbound messages after compat transforms", () => {

4860+

const userText = "ok";

4861+

const params = buildOpenAICompletionsParams(

4862+

{

4863+

id: "qwen3-5-122b-a10b-nvfp4",

4864+

name: "qwen3-5-122b-a10b-nvfp4",

4865+

api: "openai-completions",

4866+

provider: "vllm",

4867+

baseUrl: "http://localhost:8000/v1",

4868+

reasoning: false,

4869+

input: ["text"],

4870+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4871+

contextWindow: 10_000,

4872+

maxTokens: 10_000,

4873+

} satisfies Model<"openai-completions">,

4874+

{

4875+

messages: [

4876+

{ role: "user", content: userText, timestamp: 1 },

4877+

{

4878+

role: "assistant",

4879+

content: [{ type: "text", text: "x".repeat(20_000) }],

4880+

api: "openai-completions",

4881+

provider: "vllm",

4882+

model: "qwen3-5-122b-a10b-nvfp4",

4883+

usage: {

4884+

input: 0,

4885+

output: 0,

4886+

cacheRead: 0,

4887+

cacheWrite: 0,

4888+

totalTokens: 0,

4889+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },

4890+

},

4891+

stopReason: "aborted",

4892+

timestamp: 2,

4893+

},

4894+

],

4895+

tools: [],

4896+

} as never,

4897+

undefined,

4898+

);

4899+4900+

const estimatedInputTokens = Math.ceil((userText.length / 4) * 1.25);

4901+

expect(params.max_completion_tokens).toBe(10_000 - estimatedInputTokens - 1);

4902+

});

4903+4904+

it("clamps proxy-like completions output budgets against contextTokens before contextWindow", () => {

4905+

const params = buildOpenAICompletionsParams(

4906+

{

4907+

id: "qwen3-5-122b-a10b-nvfp4",

4908+

name: "qwen3-5-122b-a10b-nvfp4",

4909+

api: "openai-completions",

4910+

provider: "vllm",

4911+

baseUrl: "http://localhost:8000/v1",

4912+

reasoning: false,

4913+

input: ["text"],

4914+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4915+

contextWindow: 131_072,

4916+

contextTokens: 4_096,

4917+

maxTokens: 200_000,

4918+

} as unknown as Model<"openai-completions">,

4919+

{

4920+

systemPrompt: "system",

4921+

messages: [],

4922+

tools: [],

4923+

} as never,

4924+

undefined,

4925+

);

4926+4927+

expect(params.max_completion_tokens).toBe(4_096 - 2 - 1);

4928+

});

4929+4930+

it("clamps max_completion_tokens for proxy-like endpoints when configured maxTokens >= contextWindow and prompt is small", () => {

4931+

// Misconfig case: tiny prompt, but configured maxTokens still exceeds the

4932+

// model's contextWindow. Clamp should land just under the window.

4933+

const params = buildOpenAICompletionsParams(

4934+

{

4935+

id: "qwen3-5-122b-a10b-nvfp4",

4936+

name: "qwen3-5-122b-a10b-nvfp4",

4937+

api: "openai-completions",

4938+

provider: "vllm",

4939+

baseUrl: "http://localhost:8000/v1",

4940+

reasoning: false,

4941+

input: ["text"],

4942+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4943+

contextWindow: 131_072,

4944+

maxTokens: 200_000,

4945+

} satisfies Model<"openai-completions">,

4946+

{

4947+

systemPrompt: "system",

4948+

messages: [],

4949+

tools: [],

4950+

} as never,

4951+

undefined,

4952+

);

4953+4954+

expect(typeof params.max_completion_tokens).toBe("number");

4955+

const cap = params.max_completion_tokens as number;

4956+

expect(cap).toBeLessThan(131_072);

4957+

// Small prompt → cap is essentially contextWindow - 1 - tiny_input_estimate.

4958+

expect(cap).toBeGreaterThanOrEqual(131_000);

4959+

});

4960+4961+

it("does not clamp max_completion_tokens for proxy-like endpoints when maxTokens fits the context window", () => {

4962+

const params = buildOpenAICompletionsParams(

4963+

{

4964+

id: "qwen3-5-122b-a10b-nvfp4",

4965+

name: "qwen3-5-122b-a10b-nvfp4",

4966+

api: "openai-completions",

4967+

provider: "vllm",

4968+

baseUrl: "http://localhost:8000/v1",

4969+

reasoning: false,

4970+

input: ["text"],

4971+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4972+

contextWindow: 131_072,

4973+

maxTokens: 8192,

4974+

} satisfies Model<"openai-completions">,

4975+

{

4976+

systemPrompt: "system",

4977+

messages: [],

4978+

tools: [],

4979+

} as never,

4980+

undefined,

4981+

);

4982+4983+

expect(params.max_completion_tokens).toBe(8192);

4984+

});

4985+4986+

it("preserves the configured maxTokens for native openai-completions endpoints even when it equals or exceeds contextWindow", () => {

4987+

const params = buildOpenAICompletionsParams(

4988+

{

4989+

id: "gpt-5.4",

4990+

name: "GPT-5.4",

4991+

api: "openai-completions",

4992+

provider: "openai",

4993+

baseUrl: "https://api.openai.com/v1",

4994+

reasoning: false,

4995+

input: ["text"],

4996+

cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

4997+

contextWindow: 100_000,

4998+

maxTokens: 200_000,

4999+

} satisfies Model<"openai-completions">,

5000+

{

5001+

systemPrompt: "system",

5002+

messages: [],

5003+

tools: [],

5004+

} as never,

5005+

undefined,

5006+

);

5007+5008+

expect(params.max_completion_tokens).toBe(200_000);

5009+

});

5010+47695011

it("omits strict tool shaping for Z.ai default-route completions providers", () => {

47705012

const params = buildOpenAICompletionsParams(

47715013

{