






















好久没写了,今天遇到个好玩的,记下来
WebGPU + WebAssembly + 量化模型——浏览器端运行大语言模型(LLM)的主流方案
| 组件 | 作用 | 关键特性 |
|---|---|---|
WebLLM (MLC)——专为 LLM 优化的 WebGPU 原生方案,支持 Llama、Phi、Gemma 等
安装依赖
npm install @mlc-ai/web-llm
test.html
<!DOCTYPE html> <html> <head> <script type="module"> import { CreateMLCEngine } from "https://esm.run/@mlc-ai/web-llm"; const modelId = "Qwen2.5-0.5B-Instruct-q4f32_1-MLC"; async function run() { try { // 1. 初始化引擎 const engine = await CreateMLCEngine(modelId, { initProgressCallback: (p) => console.log("[加载进度]", p.text), }); console.log("🤖", "模型加载完成,准备生成..."); // 2. 发送请求 (关键点:添加 stream: false) // 如果不加 stream: false,返回的是一个 AsyncIterable,直接 log 看不到内容 const res = await engine.chat.completions.create({ messages: [ { role: "user", content: "用Python写一个冒泡排序,并解释代码。" } ], stream: false, // 强制关闭流式,方便一次性查看结果 max_tokens: 500 // 限制生成长度,防止过长 }); // 3. 安全获取内容 const content = res.choices?.[0]?.message?.content; if (content) { console.log("✅ 生成结果:", content); } else { console.warn("⚠️ 模型返回为空或格式异常", res); } } catch (err) { // 4. 捕获并打印错误 console.error("❌ 发生错误:", err); // 针对常见错误的提示 if (err.message.includes("SharedArrayBuffer")) { alert("错误:检测到跨域隔离问题。请确保服务器配置了 COOP/COEP 响应头,或使用支持 WebGPU 的正确环境。"); } } } run(); </script> </head> <body></body> </html>
第一次加载模型会很慢,都是在console里输出的
现在改进下
test1.html
<!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <title>WebLLM Qwen2.5 测试</title> <style> * { box-sizing: border-box; } body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; max-width: 900px; margin: 2rem auto; padding: 0 1rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; } .container { background: white; border-radius: 16px; box-shadow: 0 10px 40px rgba(0,0,0,0.2); overflow: hidden; } .header { background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%); color: white; padding: 1.5rem; text-align: center; } .header h2 { margin: 0; font-size: 1.5rem; } .header p { margin: 0.5rem 0 0 0; opacity: 0.9; font-size: 0.9rem; } .status-bar { padding: 0.75rem 1rem; background: #f8fafc; border-bottom: 1px solid #e2e8f0; font-size: 0.85rem; display: flex; align-items: center; gap: 8px; } .status-icon { width: 8px; height: 8px; border-radius: 50%; background: #fbbf24; animation: pulse 1.5s ease-in-out infinite; } .status-icon.ready { background: #22c55e; animation: none; } .status-icon.error { background: #ef4444; animation: none; } @keyframes pulse { 0%, 100% { opacity: 1; transform: scale(1); } 50% { opacity: 0.5; transform: scale(1.2); } } .progress-bar { flex: 1; height: 6px; background: #e2e8f0; border-radius: 3px; overflow: hidden; } .progress-fill { height: 100%; background: linear-gradient(90deg, #2563eb, #1d4ed8); width: 0%; transition: width 0.3s ease; border-radius: 3px; } .chat-box { height: 450px; overflow-y: auto; padding: 1.5rem; display: flex; flex-direction: column; gap: 12px; background: #f8fafc; } .chat-box::-webkit-scrollbar { width: 8px; } .chat-box::-webkit-scrollbar-track { background: #f1f1f1; } .chat-box::-webkit-scrollbar-thumb { background: #c1c1c1; border-radius: 4px; } .message { padding: 12px 16px; border-radius: 12px; max-width: 85%; line-height: 1.6; animation: slideIn 0.3s ease; word-wrap: break-word; white-space: pre-wrap; } @keyframes slideIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .user { align-self: flex-end; background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%); color: white; border-bottom-right-radius: 4px; } .bot { align-self: flex-start; background: white; color: #333; border: 1px solid #e2e8f0; border-bottom-left-radius: 4px; } .typing-indicator { display: inline-flex; gap: 4px; align-items: center; } .typing-dot { width: 6px; height: 6px; border-radius: 50%; background: #9ca3af; animation: typing 1.4s ease-in-out infinite; } .typing-dot:nth-child(2) { animation-delay: 0.2s; } .typing-dot:nth-child(3) { animation-delay: 0.4s; } @keyframes typing { 0%, 100% { transform: translateY(0); opacity: 0.4; } 50% { transform: translateY(-4px); opacity: 1; } } .input-area { display: flex; padding: 1rem 1.5rem; border-top: 1px solid #e2e8f0; gap: 12px; background: white; } .input-wrapper { flex: 1; position: relative; } input { width: 100%; padding: 12px 16px; border: 2px solid #e2e8f0; border-radius: 12px; outline: none; font-size: 0.95rem; transition: all 0.2s; } input:focus { border-color: #2563eb; box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1); } input:disabled { background: #f3f4f6; cursor: not-allowed; } button { padding: 12px 24px; background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%); color: white; border: none; border-radius: 12px; cursor: pointer; font-weight: 600; font-size: 0.95rem; transition: all 0.2s; display: flex; align-items: center; gap: 6px; } button:hover:not(:disabled) { transform: translateY(-2px); box-shadow: 0 4px 12px rgba(37, 99, 235, 0.4); } button:active:not(:disabled) { transform: translateY(0); } button:disabled { background: #d1d5db; cursor: not-allowed; transform: none; } .log { font-size: 0.75rem; color: #6b7280; padding: 0.5rem 1rem; background: #f9fafb; border-top: 1px solid #e5e7eb; min-height: 24px; } .welcome-msg { text-align: center; color: #6b7280; font-size: 0.9rem; padding: 2rem 1rem; } .welcome-msg svg { width: 48px; height: 48px; margin-bottom: 1rem; opacity: 0.5; } .toast { position: fixed; bottom: 20px; left: 50%; transform: translateX(-50%) translateY(100px); background: #1f2937; color: white; padding: 12px 24px; border-radius: 8px; font-size: 0.9rem; opacity: 0; transition: all 0.3s ease; z-index: 1000; } .toast.show { transform: translateX(-50%) translateY(0); opacity: 1; } </style> </head> <body> <div class="container"> <div class="header"> <h2>🤖 Qwen2.5-0.5B WebLLM</h2> <p>本地运行的 AI 助手 · 流式响应</p> </div> <div id="status" class="status-bar"> <div id="statusIcon" class="status-icon"></div> <span id="statusText">正在初始化...</span> <div class="progress-bar"> <div id="progressFill" class="progress-fill"></div> </div> </div> <div class="chat-box" id="chatBox"> <div class="welcome-msg"> <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path> </svg> <div>正在加载模型,请稍候...</div> </div> </div> <div class="input-area"> <div class="input-wrapper"> <input type="text" id="userInput" placeholder="等待模型加载..." disabled /> </div> <button id="sendBtn" disabled> <span>发送</span> <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> <line x1="22" y1="2" x2="11" y2="13"></line> <polygon points="22 2 15 22 11 13 2 9 22 2"></polygon> </svg> </button> </div> <div id="log" class="log"></div> </div> <div id="toast" class="toast"></div> <script type="module"> import { CreateMLCEngine } from "https://esm.run/@mlc-ai/web-llm"; const modelId = "Qwen2.5-0.5B-Instruct-q4f32_1-MLC"; const chatBox = document.getElementById("chatBox"); const userInput = document.getElementById("userInput"); const sendBtn = document.getElementById("sendBtn"); const statusText = document.getElementById("statusText"); const statusIcon = document.getElementById("statusIcon"); const progressFill = document.getElementById("progressFill"); const logDiv = document.getElementById("log"); const toastEl = document.getElementById("toast"); let engine = null; let isGenerating = false; let messageCount = 0; // 检查 WebGPU if (!navigator.gpu) { statusIcon.className = "status-icon error"; statusText.textContent = "❌ 浏览器不支持 WebGPU"; statusText.style.color = "#ef4444"; logDiv.textContent = "请使用最新版 Chrome 或 Edge 浏览器"; showToast("错误:WebGPU 不支持"); throw new Error("WebGPU not supported"); } // Toast 提示 function showToast(message, duration = 3000) { toastEl.textContent = message; toastEl.classList.add("show"); setTimeout(() => toastEl.classList.remove("show"), duration); } // 更新状态 function updateStatus(text, progress = 0, isReady = false, isError = false) { statusText.textContent = text; if (isReady) { statusIcon.className = "status-icon ready"; progressFill.style.width = "100%"; } else if (isError) { statusIcon.className = "status-icon error"; statusText.style.color = "#ef4444"; } else { statusIcon.className = "status-icon"; progressFill.style.width = `${Math.min(100, Math.max(0, progress))}%`; } } // 添加消息 function addMessage(text, type) { const welcomeMsg = chatBox.querySelector(".welcome-msg"); if (welcomeMsg) welcomeMsg.remove(); const div = document.createElement("div"); div.className = `message ${type}`; div.textContent = text; chatBox.appendChild(div); chatBox.scrollTop = chatBox.scrollHeight; return div; } // 创建打字指示器 function createTypingMessage() { const div = document.createElement("div"); div.className = "message bot"; div.id = "typing-msg"; div.innerHTML = '<div class="typing-indicator"><div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div></div>'; chatBox.appendChild(div); chatBox.scrollTop = chatBox.scrollHeight; return div; } // 流式输出效果 async function streamResponse(text, messageEl) { const words = text.split(""); messageEl.textContent = ""; for (let i = 0; i < words.length; i++) { messageEl.textContent += words[i]; chatBox.scrollTop = chatBox.scrollHeight; await new Promise(r => setTimeout(r, 30 + Math.random() * 20)); } } // 初始化引擎 async function initEngine() { try { updateStatus("正在加载模型...", 0); logDiv.textContent = "开始下载模型 (约 300-500MB)..."; showToast("开始加载模型,请耐心等待..."); engine = await CreateMLCEngine(modelId, { initProgressCallback: (progress) => { const percent = progress.progress ? Math.round(progress.progress * 100) : 0; const text = progress.text || ""; if (text.includes("Loading")) { updateStatus(`加载模型中... ${percent}%`, percent); logDiv.textContent = text; } else if (text.includes("Download")) { updateStatus(`下载中... ${percent}%`, percent); logDiv.textContent = text; } else { updateStatus(text, percent); logDiv.textContent = text; } }, }); updateStatus("✅ 已就绪", 100, true); logDiv.textContent = "模型加载完成,可以开始对话"; // 移除旧的欢迎/加载提示 const welcomeMsg = chatBox.querySelector(".welcome-msg"); if (welcomeMsg) welcomeMsg.remove(); // 添加新的欢迎消息 addMessage("你好!我是 Qwen2.5,有什么可以帮你?", "bot"); userInput.disabled = false; sendBtn.disabled = false; userInput.placeholder = "输入消息..."; userInput.focus(); messageCount++; showToast("✅ 模型加载完成!", 2000); console.log("🟢 引擎初始化成功"); } catch (err) { updateStatus("加载失败", 0, false, true); logDiv.textContent = `错误:${err.message}`; showToast(`❌ 加载失败:${err.message}`); console.error(err); } } // 发送消息(带性能计时) async function sendMessage() { const text = userInput.value.trim(); if (!text || isGenerating || !engine) return; isGenerating = true; userInput.disabled = true; sendBtn.disabled = true; addMessage(text, "user"); userInput.value = ""; createTypingMessage(); // 🕐 开始计时 const startTime = performance.now(); let responseTime = 0; let renderTime = 0; try { // 发送请求并等待响应 const response = await engine.chat.completions.create({ messages: [ { role: "system", content: "你是一个有用的助手,回答简洁明了。" }, { role: "user", content: text }, ], temperature: 0.7, max_tokens: 512, }); const botReply = response.choices[0].message.content || "抱歉,我无法回答这个问题。"; // 记录响应时间(拿到完整回复的时刻) responseTime = (performance.now() - startTime) / 1000; const typingMsg = document.getElementById("typing-msg"); if (typingMsg) typingMsg.remove(); const botMessageEl = addMessage("", "bot"); // 流式显示回复并计时 const renderStart = performance.now(); await streamResponse(botReply, botMessageEl); renderTime = (performance.now() - renderStart) / 1000; const totalTime = (performance.now() - startTime) / 1000; const charsPerSecond = botReply.length / renderTime; // 📊 打印性能统计到控制台 console.log("\n" + "=".repeat(50)); console.log("📊 性能统计"); console.log("=".repeat(50)); console.log(`📤 用户问题:${text}`); console.log(`📄 回复长度:${botReply.length} 字符`); console.log(`⏱️ 响应时间:${responseTime.toFixed(2)}s (从发送到拿到完整回复)`); console.log(`🎨 渲染时间:${renderTime.toFixed(2)}s (流式显示完成)`); console.log(`🕐 总耗时:${totalTime.toFixed(2)}s (从发送到显示完毕)`); console.log(`⚡ 每秒字符:${charsPerSecond.toFixed(1)} chars/s`); console.log("=".repeat(50) + "\n"); } catch (err) { console.error(err); const typingMsg = document.getElementById("typing-msg"); if (typingMsg) { typingMsg.textContent = `❌ 错误:${err.message}`; typingMsg.style.color = "#ef4444"; } showToast(`❌ 请求失败:${err.message}`); console.log("\n❌ 请求失败"); console.log(`错误信息:${err.message}`); } finally { isGenerating = false; userInput.disabled = false; sendBtn.disabled = false; userInput.focus(); } } // 事件监听 sendBtn.addEventListener("click", sendMessage); userInput.addEventListener("keypress", (e) => { if (e.key === "Enter") sendMessage(); }); // 启动 initEngine(); </script> </body> </html>
在vscode里安装Live Server
右键,Open With Live Server

https://webllm.mlc.ai/
此内容由惯性聚合(RSS阅读器)自动聚合整理,仅供阅读参考。 原文来自 — 版权归原作者所有。