
























@@ -21,8 +21,121 @@ import {
2121const log = createSubsystemLogger("bedrock-discovery");
22222323const DEFAULT_REFRESH_INTERVAL_SECONDS = 3600;
24-const DEFAULT_CONTEXT_WINDOW = 32000;
24+const DEFAULT_CONTEXT_WINDOW = 32_000;
2525const DEFAULT_MAX_TOKENS = 4096;
26+27+// ---------------------------------------------------------------------------
28+// Known model context windows (Bedrock API does not expose token limits)
29+// ---------------------------------------------------------------------------
30+31+/**
32+ * Bedrock's ListFoundationModels and GetFoundationModel APIs return no token
33+ * limit information — only model ID, name, modalities, and lifecycle status.
34+ * There is currently no Bedrock API to discover context windows or max output
35+ * tokens programmatically.
36+ *
37+ * This map provides correct context window values for known models so that
38+ * session management, compaction thresholds, and context overflow detection
39+ * work correctly. If AWS adds token metadata to the API in the future, this
40+ * table should become a fallback rather than the primary source.
41+ *
42+ * Inference profile prefixes (us., eu., ap., global.) are stripped before lookup.
43+ *
44+ * Sources: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html
45+ * https://platform.claude.com/docs/en/about-claude/models
46+ */
47+const KNOWN_CONTEXT_WINDOWS: Record<string, number> = {
48+// Anthropic Claude
49+"anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
50+"anthropic.claude-opus-4-7": 1_000_000,
51+"anthropic.claude-opus-4-6-v1": 1_000_000,
52+"anthropic.claude-opus-4-6-v1:0": 1_000_000,
53+"anthropic.claude-sonnet-4-6": 1_000_000,
54+"anthropic.claude-sonnet-4-6-v1:0": 1_000_000,
55+"anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000,
56+"anthropic.claude-sonnet-4-20250514-v1:0": 200_000,
57+"anthropic.claude-opus-4-5-20251101-v1:0": 200_000,
58+"anthropic.claude-opus-4-1-20250805-v1:0": 200_000,
59+"anthropic.claude-haiku-4-5-20251001-v1:0": 200_000,
60+"anthropic.claude-3-5-haiku-20241022-v1:0": 200_000,
61+"anthropic.claude-3-haiku-20240307-v1:0": 200_000,
62+// Amazon Nova
63+"amazon.nova-premier-v1:0": 1_000_000,
64+"amazon.nova-pro-v1:0": 300_000,
65+"amazon.nova-lite-v1:0": 300_000,
66+"amazon.nova-micro-v1:0": 128_000,
67+"amazon.nova-2-lite-v1:0": 300_000,
68+// MiniMax
69+"minimax.minimax-m2.5": 1_000_000,
70+"minimax.minimax-m2.1": 1_000_000,
71+"minimax.minimax-m2": 1_000_000,
72+// Meta Llama 4
73+"meta.llama4-maverick-17b-instruct-v1:0": 1_000_000,
74+"meta.llama4-scout-17b-instruct-v1:0": 512_000,
75+// Meta Llama 3
76+"meta.llama3-3-70b-instruct-v1:0": 128_000,
77+"meta.llama3-2-90b-instruct-v1:0": 128_000,
78+"meta.llama3-2-11b-instruct-v1:0": 128_000,
79+"meta.llama3-2-3b-instruct-v1:0": 128_000,
80+"meta.llama3-2-1b-instruct-v1:0": 128_000,
81+"meta.llama3-1-405b-instruct-v1:0": 128_000,
82+"meta.llama3-1-70b-instruct-v1:0": 128_000,
83+"meta.llama3-1-8b-instruct-v1:0": 128_000,
84+// NVIDIA Nemotron
85+"nvidia.nemotron-super-3-120b": 256_000,
86+"nvidia.nemotron-nano-3-30b": 128_000,
87+"nvidia.nemotron-nano-12b-v2": 128_000,
88+"nvidia.nemotron-nano-9b-v2": 128_000,
89+// Mistral
90+"mistral.mistral-large-3-675b-instruct": 128_000,
91+"mistral.mistral-large-2407-v1:0": 128_000,
92+"mistral.mistral-small-2402-v1:0": 32_000,
93+// DeepSeek
94+"deepseek.r1-v1:0": 128_000,
95+"deepseek.v3.2": 128_000,
96+// Cohere
97+"cohere.command-r-plus-v1:0": 128_000,
98+"cohere.command-r-v1:0": 128_000,
99+// AI21
100+"ai21.jamba-1-5-large-v1:0": 256_000,
101+"ai21.jamba-1-5-mini-v1:0": 256_000,
102+// Google Gemma
103+"google.gemma-3-27b-it": 128_000,
104+"google.gemma-3-12b-it": 128_000,
105+"google.gemma-3-4b-it": 128_000,
106+// GLM
107+"zai.glm-5": 128_000,
108+"zai.glm-4.7": 128_000,
109+"zai.glm-4.7-flash": 128_000,
110+// Qwen
111+"qwen.qwen3-coder-next": 256_000,
112+"qwen.qwen3-coder-30b-a3b-v1:0": 256_000,
113+"qwen.qwen3-32b-v1:0": 128_000,
114+"qwen.qwen3-vl-235b-a22b": 128_000,
115+};
116+117+/**
118+ * Resolve the real context window for a Bedrock model ID.
119+ * Strips inference profile prefixes (us., eu., ap., global.) before lookup.
120+ */
121+function resolveKnownContextWindow(modelId: string): number | undefined {
122+const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
123+const candidates = [modelId, stripped];
124+for (const candidate of candidates) {
125+if (KNOWN_CONTEXT_WINDOWS[candidate] !== undefined) {
126+return KNOWN_CONTEXT_WINDOWS[candidate];
127+}
128+const withoutVersionSuffix = candidate.replace(/:0$/, "");
129+if (
130+withoutVersionSuffix !== candidate &&
131+KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix] !== undefined
132+) {
133+return KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix];
134+}
135+}
136+return undefined;
137+}
138+26139const DEFAULT_COST = {
27140input: 0,
28141output: 0,
@@ -163,7 +276,7 @@ function toModelDefinition(
163276reasoning: inferReasoningSupport(summary),
164277input: mapInputModalities(summary),
165278cost: DEFAULT_COST,
166-contextWindow: defaults.contextWindow,
279+contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow,
167280maxTokens: defaults.maxTokens,
168281};
169282}
@@ -192,7 +305,7 @@ function resolveBaseModelId(profile: InferenceProfileSummary): string | undefine
192305}
193306if (profile.type === "SYSTEM_DEFINED") {
194307const id = profile.inferenceProfileId ?? "";
195-const prefixMatch = /^(?:us|eu|ap|jp|global)\.(.+)$/i.exec(id);
308+const prefixMatch = /^(?:us|eu|ap|apac|au|jp|global)\.(.+)$/i.exec(id);
196309if (prefixMatch) {
197310return prefixMatch[1];
198311}
@@ -282,7 +395,9 @@ function resolveInferenceProfiles(
282395reasoning: baseModel?.reasoning ?? false,
283396input: baseModel?.input ?? ["text"],
284397cost: baseModel?.cost ?? DEFAULT_COST,
285-contextWindow: baseModel?.contextWindow ?? defaults.contextWindow,
398+contextWindow: baseModel?.contextWindow
399+?? resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "")
400+?? defaults.contextWindow,
286401maxTokens: baseModel?.maxTokens ?? defaults.maxTokens,
287402});
288403}
此内容由惯性聚合(RSS阅读器)自动聚合整理,仅供阅读参考。 原文来自 — 版权归原作者所有。