在浏览器跑 Qwen2.5

博客园 - 慕尘

使用 WSL 在 Windows 上安装 Linux LangExtract pgvector 向量数据库 Faiss Goose trafilatura unstructured python里使用Playwright python的jieba MinGW nomic-embed-text 解析非结构化数据 LangChain 的 DocumentLoader 能够使用require但不能使用import ChromaDB nvm-windows 使用js实现文字转语音 pyttsx3 Ollama笔记

在浏览器跑 Qwen2.5

慕尘 · 2026-04-10 · via 博客园 - 慕尘

好久没写了，今天遇到个好玩的，记下来

WebGPU + WebAssembly + 量化模型——浏览器端运行大语言模型（LLM）的主流方案

组件	作用	关键特性
WebGPU	GPU 加速计算	提供底层 GPU 计算着色器访问，比 WebGL 更高效
WebAssembly (WASM)	CPU 回退执行	多线程支持，接近原生性能，作为 WebGPU 不可用时的降级方案
量化模型	模型压缩	INT4/INT8/FP16 精度，减少 75%-87% 模型体积

WebLLM (MLC)——专为 LLM 优化的 WebGPU 原生方案，支持 Llama、Phi、Gemma 等

安装依赖

npm install @mlc-ai/web-llm

test.html

<!DOCTYPE html>
<html>
<head>
  <script type="module">
    import { CreateMLCEngine } from "https://esm.run/@mlc-ai/web-llm";
    
    const modelId = "Qwen2.5-0.5B-Instruct-q4f32_1-MLC";
    
    async function run() {
        try {
        // 1. 初始化引擎
        const engine = await CreateMLCEngine(modelId, {
          initProgressCallback: (p) => console.log("[加载进度]", p.text),
        });
        
        console.log("🤖", "模型加载完成，准备生成...");

        // 2. 发送请求 (关键点：添加 stream: false)
        // 如果不加 stream: false，返回的是一个 AsyncIterable，直接 log 看不到内容
        const res = await engine.chat.completions.create({
          messages: [
            { role: "user", content: "用Python写一个冒泡排序，并解释代码。" }
          ],
          stream: false, // 强制关闭流式，方便一次性查看结果
          max_tokens: 500 // 限制生成长度，防止过长
        });

        // 3. 安全获取内容
        const content = res.choices?.[0]?.message?.content;
        if (content) {
            console.log("✅ 生成结果:", content);
        } else {
            console.warn("⚠️ 模型返回为空或格式异常", res);
        }

      } catch (err) {
        // 4. 捕获并打印错误
        console.error("❌ 发生错误:", err);
        
        // 针对常见错误的提示
        if (err.message.includes("SharedArrayBuffer")) {
            alert("错误：检测到跨域隔离问题。请确保服务器配置了 COOP/COEP 响应头，或使用支持 WebGPU 的正确环境。");
        }
      }
    }
    
    run();
  </script>
</head>
<body></body>
</html>

第一次加载模型会很慢，都是在console里输出的

现在改进下

test1.html

<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>WebLLM Qwen2.5 测试</title>
  <style>
    * { box-sizing: border-box; }
    body {
      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
      max-width: 900px;
      margin: 2rem auto;
      padding: 0 1rem;
      background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
      min-height: 100vh;
    }
    .container {
      background: white;
      border-radius: 16px;
      box-shadow: 0 10px 40px rgba(0,0,0,0.2);
      overflow: hidden;
    }
    .header {
      background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%);
      color: white;
      padding: 1.5rem;
      text-align: center;
    }
    .header h2 { margin: 0; font-size: 1.5rem; }
    .header p { margin: 0.5rem 0 0 0; opacity: 0.9; font-size: 0.9rem; }
    .status-bar {
      padding: 0.75rem 1rem;
      background: #f8fafc;
      border-bottom: 1px solid #e2e8f0;
      font-size: 0.85rem;
      display: flex;
      align-items: center;
      gap: 8px;
    }
    .status-icon {
      width: 8px;
      height: 8px;
      border-radius: 50%;
      background: #fbbf24;
      animation: pulse 1.5s ease-in-out infinite;
    }
    .status-icon.ready { background: #22c55e; animation: none; }
    .status-icon.error { background: #ef4444; animation: none; }
    @keyframes pulse {
      0%, 100% { opacity: 1; transform: scale(1); }
      50% { opacity: 0.5; transform: scale(1.2); }
    }
    .progress-bar {
      flex: 1;
      height: 6px;
      background: #e2e8f0;
      border-radius: 3px;
      overflow: hidden;
    }
    .progress-fill {
      height: 100%;
      background: linear-gradient(90deg, #2563eb, #1d4ed8);
      width: 0%;
      transition: width 0.3s ease;
      border-radius: 3px;
    }
    .chat-box {
      height: 450px;
      overflow-y: auto;
      padding: 1.5rem;
      display: flex;
      flex-direction: column;
      gap: 12px;
      background: #f8fafc;
    }
    .chat-box::-webkit-scrollbar { width: 8px; }
    .chat-box::-webkit-scrollbar-track { background: #f1f1f1; }
    .chat-box::-webkit-scrollbar-thumb { background: #c1c1c1; border-radius: 4px; }
    .message {
      padding: 12px 16px;
      border-radius: 12px;
      max-width: 85%;
      line-height: 1.6;
      animation: slideIn 0.3s ease;
      word-wrap: break-word;
      white-space: pre-wrap;
    }
    @keyframes slideIn {
      from { opacity: 0; transform: translateY(10px); }
      to { opacity: 1; transform: translateY(0); }
    }
    .user {
      align-self: flex-end;
      background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%);
      color: white;
      border-bottom-right-radius: 4px;
    }
    .bot {
      align-self: flex-start;
      background: white;
      color: #333;
      border: 1px solid #e2e8f0;
      border-bottom-left-radius: 4px;
    }
    .typing-indicator {
      display: inline-flex;
      gap: 4px;
      align-items: center;
    }
    .typing-dot {
      width: 6px;
      height: 6px;
      border-radius: 50%;
      background: #9ca3af;
      animation: typing 1.4s ease-in-out infinite;
    }
    .typing-dot:nth-child(2) { animation-delay: 0.2s; }
    .typing-dot:nth-child(3) { animation-delay: 0.4s; }
    @keyframes typing {
      0%, 100% { transform: translateY(0); opacity: 0.4; }
      50% { transform: translateY(-4px); opacity: 1; }
    }
    .input-area {
      display: flex;
      padding: 1rem 1.5rem;
      border-top: 1px solid #e2e8f0;
      gap: 12px;
      background: white;
    }
    .input-wrapper { flex: 1; position: relative; }
    input {
      width: 100%;
      padding: 12px 16px;
      border: 2px solid #e2e8f0;
      border-radius: 12px;
      outline: none;
      font-size: 0.95rem;
      transition: all 0.2s;
    }
    input:focus {
      border-color: #2563eb;
      box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1);
    }
    input:disabled { background: #f3f4f6; cursor: not-allowed; }
    button {
      padding: 12px 24px;
      background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%);
      color: white;
      border: none;
      border-radius: 12px;
      cursor: pointer;
      font-weight: 600;
      font-size: 0.95rem;
      transition: all 0.2s;
      display: flex;
      align-items: center;
      gap: 6px;
    }
    button:hover:not(:disabled) {
      transform: translateY(-2px);
      box-shadow: 0 4px 12px rgba(37, 99, 235, 0.4);
    }
    button:active:not(:disabled) { transform: translateY(0); }
    button:disabled { background: #d1d5db; cursor: not-allowed; transform: none; }
    .log {
      font-size: 0.75rem;
      color: #6b7280;
      padding: 0.5rem 1rem;
      background: #f9fafb;
      border-top: 1px solid #e5e7eb;
      min-height: 24px;
    }
    .welcome-msg {
      text-align: center;
      color: #6b7280;
      font-size: 0.9rem;
      padding: 2rem 1rem;
    }
    .welcome-msg svg {
      width: 48px;
      height: 48px;
      margin-bottom: 1rem;
      opacity: 0.5;
    }
    .toast {
      position: fixed;
      bottom: 20px;
      left: 50%;
      transform: translateX(-50%) translateY(100px);
      background: #1f2937;
      color: white;
      padding: 12px 24px;
      border-radius: 8px;
      font-size: 0.9rem;
      opacity: 0;
      transition: all 0.3s ease;
      z-index: 1000;
    }
    .toast.show {
      transform: translateX(-50%) translateY(0);
      opacity: 1;
    }
  </style>
</head>
<body>
  <div class="container">
    <div class="header">
      <h2>🤖 Qwen2.5-0.5B WebLLM</h2>
      <p>本地运行的 AI 助手 · 流式响应</p>
    </div>
    
    <div id="status" class="status-bar">
      <div id="statusIcon" class="status-icon"></div>
      <span id="statusText">正在初始化...</span>
      <div class="progress-bar">
        <div id="progressFill" class="progress-fill"></div>
      </div>
    </div>

    <div class="chat-box" id="chatBox">
      <div class="welcome-msg">
        <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
          <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path>
        </svg>
        <div>正在加载模型，请稍候...</div>
      </div>
    </div>

    <div class="input-area">
      <div class="input-wrapper">
        <input type="text" id="userInput" placeholder="等待模型加载..." disabled />
      </div>
      <button id="sendBtn" disabled>
        <span>发送</span>
        <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
          <line x1="22" y1="2" x2="11" y2="13"></line>
          <polygon points="22 2 15 22 11 13 2 9 22 2"></polygon>
        </svg>
      </button>
    </div>
    
    <div id="log" class="log"></div>
  </div>

  <div id="toast" class="toast"></div>

  <script type="module">
    import { CreateMLCEngine } from "https://esm.run/@mlc-ai/web-llm";

    const modelId = "Qwen2.5-0.5B-Instruct-q4f32_1-MLC";
    const chatBox = document.getElementById("chatBox");
    const userInput = document.getElementById("userInput");
    const sendBtn = document.getElementById("sendBtn");
    const statusText = document.getElementById("statusText");
    const statusIcon = document.getElementById("statusIcon");
    const progressFill = document.getElementById("progressFill");
    const logDiv = document.getElementById("log");
    const toastEl = document.getElementById("toast");

    let engine = null;
    let isGenerating = false;
    let messageCount = 0;

    // 检查 WebGPU
    if (!navigator.gpu) {
      statusIcon.className = "status-icon error";
      statusText.textContent = "❌ 浏览器不支持 WebGPU";
      statusText.style.color = "#ef4444";
      logDiv.textContent = "请使用最新版 Chrome 或 Edge 浏览器";
      showToast("错误：WebGPU 不支持");
      throw new Error("WebGPU not supported");
    }

    // Toast 提示
    function showToast(message, duration = 3000) {
      toastEl.textContent = message;
      toastEl.classList.add("show");
      setTimeout(() => toastEl.classList.remove("show"), duration);
    }

    // 更新状态
    function updateStatus(text, progress = 0, isReady = false, isError = false) {
      statusText.textContent = text;
      if (isReady) {
        statusIcon.className = "status-icon ready";
        progressFill.style.width = "100%";
      } else if (isError) {
        statusIcon.className = "status-icon error";
        statusText.style.color = "#ef4444";
      } else {
        statusIcon.className = "status-icon";
        progressFill.style.width = `${Math.min(100, Math.max(0, progress))}%`;
      }
    }

    // 添加消息
    function addMessage(text, type) {
      const welcomeMsg = chatBox.querySelector(".welcome-msg");
      if (welcomeMsg) welcomeMsg.remove();

      const div = document.createElement("div");
      div.className = `message ${type}`;
      div.textContent = text;
      chatBox.appendChild(div);
      chatBox.scrollTop = chatBox.scrollHeight;
      return div;
    }

    // 创建打字指示器
    function createTypingMessage() {
      const div = document.createElement("div");
      div.className = "message bot";
      div.id = "typing-msg";
      div.innerHTML = '<div class="typing-indicator"><div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div></div>';
      chatBox.appendChild(div);
      chatBox.scrollTop = chatBox.scrollHeight;
      return div;
    }

    // 流式输出效果
    async function streamResponse(text, messageEl) {
      const words = text.split("");
      messageEl.textContent = "";
      
      for (let i = 0; i < words.length; i++) {
        messageEl.textContent += words[i];
        chatBox.scrollTop = chatBox.scrollHeight;
        await new Promise(r => setTimeout(r, 30 + Math.random() * 20));
      }
    }

    // 初始化引擎
    async function initEngine() {
      try {
        updateStatus("正在加载模型...", 0);
        logDiv.textContent = "开始下载模型 (约 300-500MB)...";
        showToast("开始加载模型，请耐心等待...");

        engine = await CreateMLCEngine(modelId, {
          initProgressCallback: (progress) => {
            const percent = progress.progress ? Math.round(progress.progress * 100) : 0;
            const text = progress.text || "";
            
            if (text.includes("Loading")) {
              updateStatus(`加载模型中... ${percent}%`, percent);
              logDiv.textContent = text;
            } else if (text.includes("Download")) {
              updateStatus(`下载中... ${percent}%`, percent);
              logDiv.textContent = text;
            } else {
              updateStatus(text, percent);
              logDiv.textContent = text;
            }
          },
        });

        updateStatus("✅ 已就绪", 100, true);
        logDiv.textContent = "模型加载完成，可以开始对话";
        
        // 移除旧的欢迎/加载提示
        const welcomeMsg = chatBox.querySelector(".welcome-msg");
        if (welcomeMsg) welcomeMsg.remove();

        // 添加新的欢迎消息
        addMessage("你好！我是 Qwen2.5，有什么可以帮你？", "bot");
        
        userInput.disabled = false;
        sendBtn.disabled = false;
        userInput.placeholder = "输入消息...";
        userInput.focus();
        messageCount++;
        showToast("✅ 模型加载完成！", 2000);
        console.log("🟢 引擎初始化成功");
      } catch (err) {
        updateStatus("加载失败", 0, false, true);
        logDiv.textContent = `错误：${err.message}`;
        showToast(`❌ 加载失败：${err.message}`);
        console.error(err);
      }
    }

    // 发送消息（带性能计时）
    async function sendMessage() {
      const text = userInput.value.trim();
      if (!text || isGenerating || !engine) return;

      isGenerating = true;
      userInput.disabled = true;
      sendBtn.disabled = true;
      addMessage(text, "user");
      userInput.value = "";

      createTypingMessage();

      // 🕐 开始计时
      const startTime = performance.now();
      let responseTime = 0;
      let renderTime = 0;

      try {
        // 发送请求并等待响应
        const response = await engine.chat.completions.create({
          messages: [
            { role: "system", content: "你是一个有用的助手，回答简洁明了。" },
            { role: "user", content: text },
          ],
          temperature: 0.7,
          max_tokens: 512,
        });

        const botReply = response.choices[0].message.content || "抱歉，我无法回答这个问题。";
        
        // 记录响应时间（拿到完整回复的时刻）
        responseTime = (performance.now() - startTime) / 1000;

        const typingMsg = document.getElementById("typing-msg");
        if (typingMsg) typingMsg.remove();

        const botMessageEl = addMessage("", "bot");
        
        // 流式显示回复并计时
        const renderStart = performance.now();
        await streamResponse(botReply, botMessageEl);
        renderTime = (performance.now() - renderStart) / 1000;

        const totalTime = (performance.now() - startTime) / 1000;
        const charsPerSecond = botReply.length / renderTime;

        // 📊 打印性能统计到控制台
        console.log("\n" + "=".repeat(50));
        console.log("📊 性能统计");
        console.log("=".repeat(50));
        console.log(`📤 用户问题：${text}`);
        console.log(`📄 回复长度：${botReply.length} 字符`);
        console.log(`⏱️  响应时间：${responseTime.toFixed(2)}s  (从发送到拿到完整回复)`);
        console.log(`🎨 渲染时间：${renderTime.toFixed(2)}s  (流式显示完成)`);
        console.log(`🕐 总耗时：${totalTime.toFixed(2)}s  (从发送到显示完毕)`);
        console.log(`⚡ 每秒字符：${charsPerSecond.toFixed(1)} chars/s`);
        console.log("=".repeat(50) + "\n");

      } catch (err) {
        console.error(err);
        const typingMsg = document.getElementById("typing-msg");
        if (typingMsg) {
          typingMsg.textContent = `❌ 错误：${err.message}`;
          typingMsg.style.color = "#ef4444";
        }
        showToast(`❌ 请求失败：${err.message}`);
        
        console.log("\n❌ 请求失败");
        console.log(`错误信息：${err.message}`);
      } finally {
        isGenerating = false;
        userInput.disabled = false;
        sendBtn.disabled = false;
        userInput.focus();
      }
    }

    // 事件监听
    sendBtn.addEventListener("click", sendMessage);
    userInput.addEventListener("keypress", (e) => {
      if (e.key === "Enter") sendMessage();
    });

    // 启动
    initEngine();
  </script>
</body>
</html>

在vscode里安装Live Server

右键，Open With Live Server

https://webllm.mlc.ai/

此内容由惯性聚合(RSS阅读器)自动聚合整理，仅供阅读参考。原文来自 — 版权归原作者所有。

推荐订阅源

博客园 - 慕尘