




























@@ -179,6 +179,247 @@ describe("searchKeyword trigram fallback", () => {
179179});
180180});
181181182+describe("searchKeyword FTS MATCH fallback", () => {
183+const { DatabaseSync } = requireNodeSqlite();
184+185+function supportsFts(): boolean {
186+const db = new DatabaseSync(":memory:");
187+try {
188+const result = ensureMemoryIndexSchema({
189+ db,
190+embeddingCacheTable: "embedding_cache",
191+cacheEnabled: false,
192+ftsTable: "chunks_fts",
193+ftsEnabled: true,
194+});
195+return result.ftsAvailable;
196+} finally {
197+db.close();
198+}
199+}
200+201+function createFtsDb() {
202+const db = new DatabaseSync(":memory:");
203+const result = ensureMemoryIndexSchema({
204+ db,
205+embeddingCacheTable: "embedding_cache",
206+cacheEnabled: false,
207+ftsTable: "chunks_fts",
208+ftsEnabled: true,
209+});
210+if (!result.ftsAvailable) {
211+db.close();
212+throw new Error(`FTS5 unavailable: ${result.ftsError ?? "unknown error"}`);
213+}
214+return db;
215+}
216+217+const itWithFts = supportsFts() ? it : it.skip;
218+219+itWithFts("falls back to LIKE search when FTS MATCH throws", async () => {
220+const db = createFtsDb();
221+try {
222+const insert = db.prepare(
223+"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
224+);
225+insert.run(
226+"The Agent framework handles API calls and cron jobs",
227+"1",
228+"doc.md",
229+"sessions",
230+"mock-embed",
231+1,
232+5,
233+);
234+insert.run(
235+"Deploy the database cluster on Hetzner",
236+"2",
237+"ops.md",
238+"sessions",
239+"mock-embed",
240+1,
241+3,
242+);
243+244+// Simulate a buildFtsQuery that produces a broken MATCH expression
245+const brokenBuildFtsQuery = () => "BROKEN_QUERY_SYNTAX <<<";
246+247+const results = await searchKeyword({
248+ db,
249+ftsTable: "chunks_fts",
250+providerModel: "mock-embed",
251+query: "Agent",
252+ftsTokenizer: "unicode61",
253+limit: 10,
254+snippetMaxChars: 200,
255+sourceFilter: { sql: "", params: [] },
256+buildFtsQuery: brokenBuildFtsQuery,
257+bm25RankToScore: bm25RankToScore,
258+});
259+260+// LIKE fallback should find "Agent" in the first row
261+expect(results.length).toBeGreaterThan(0);
262+expect(results[0]?.id).toBe("1");
263+// Fallback results have textScore=1 (no BM25 ranking)
264+expect(results[0]?.textScore).toBe(1);
265+} finally {
266+db.close();
267+}
268+});
269+270+itWithFts("returns BM25-scored results when FTS MATCH succeeds", async () => {
271+const db = createFtsDb();
272+try {
273+const insert = db.prepare(
274+"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
275+);
276+insert.run(
277+"The Transformer architecture powers modern LLMs",
278+"1",
279+"ml.md",
280+"memory",
281+"mock-embed",
282+1,
283+3,
284+);
285+286+const results = await searchKeyword({
287+ db,
288+ftsTable: "chunks_fts",
289+providerModel: "mock-embed",
290+query: "Transformer",
291+ftsTokenizer: "unicode61",
292+limit: 10,
293+snippetMaxChars: 200,
294+sourceFilter: { sql: "", params: [] },
295+ buildFtsQuery,
296+ bm25RankToScore,
297+});
298+299+expect(results.length).toBe(1);
300+expect(results[0]?.id).toBe("1");
301+// BM25 score should be a real computed value, not the fallback default
302+expect(results[0]?.textScore).toBeGreaterThan(0);
303+expect(results[0]?.textScore).toBeLessThan(1);
304+} finally {
305+db.close();
306+}
307+});
308+309+itWithFts("applies source filter in LIKE fallback", async () => {
310+const db = createFtsDb();
311+try {
312+const insert = db.prepare(
313+"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
314+);
315+insert.run("Agent handles API calls", "1", "doc.md", "sessions", "mock-embed", 1, 3);
316+insert.run("Agent design patterns", "2", "notes.md", "memory", "mock-embed", 1, 3);
317+318+const brokenBuildFtsQuery = () => "BROKEN <<<";
319+const results = await searchKeyword({
320+ db,
321+ftsTable: "chunks_fts",
322+providerModel: "mock-embed",
323+query: "Agent",
324+ftsTokenizer: "unicode61",
325+limit: 10,
326+snippetMaxChars: 200,
327+sourceFilter: { sql: " AND source IN (?)", params: ["sessions"] },
328+buildFtsQuery: brokenBuildFtsQuery,
329+ bm25RankToScore,
330+});
331+332+expect(results.length).toBe(1);
333+expect(results[0]?.id).toBe("1");
334+expect(results[0]?.source).toBe("sessions");
335+} finally {
336+db.close();
337+}
338+});
339+340+itWithFts("splits multi-word query into per-token LIKE clauses in fallback", async () => {
341+const db = createFtsDb();
342+try {
343+const insert = db.prepare(
344+"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
345+);
346+// "Agent" and "cron" appear in this row but not adjacent
347+insert.run(
348+"The Agent framework handles API calls and cron jobs",
349+"1",
350+"doc.md",
351+"sessions",
352+"mock-embed",
353+1,
354+5,
355+);
356+// Only "Agent" appears in this row
357+insert.run(
358+"Agent design patterns for microservices",
359+"2",
360+"arch.md",
361+"sessions",
362+"mock-embed",
363+1,
364+3,
365+);
366+367+// A single-substring LIKE '%Agent cron%' would miss row 1 because
368+// the words are not adjacent. Per-token LIKE should find it.
369+const brokenBuildFtsQuery = () => "BROKEN <<<";
370+const results = await searchKeyword({
371+ db,
372+ftsTable: "chunks_fts",
373+providerModel: "mock-embed",
374+query: "Agent cron",
375+ftsTokenizer: "unicode61",
376+limit: 10,
377+snippetMaxChars: 200,
378+sourceFilter: { sql: "", params: [] },
379+buildFtsQuery: brokenBuildFtsQuery,
380+bm25RankToScore: bm25RankToScore,
381+});
382+383+// Per-token fallback: both "Agent" AND "cron" must match
384+expect(results.length).toBe(1);
385+expect(results[0]?.id).toBe("1");
386+} finally {
387+db.close();
388+}
389+});
390+391+itWithFts("logs warning when MATCH fallback is used", async () => {
392+const db = createFtsDb();
393+const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
394+try {
395+const insert = db.prepare(
396+"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
397+);
398+insert.run("test content", "1", "doc.md", "sessions", "mock-embed", 1, 1);
399+400+await searchKeyword({
401+ db,
402+ftsTable: "chunks_fts",
403+providerModel: "mock-embed",
404+query: "test",
405+ftsTokenizer: "unicode61",
406+limit: 10,
407+snippetMaxChars: 200,
408+sourceFilter: { sql: "", params: [] },
409+buildFtsQuery: () => "BROKEN <<<",
410+bm25RankToScore: bm25RankToScore,
411+});
412+413+expect(warnSpy).toHaveBeenCalledWith(
414+expect.stringContaining("FTS5 MATCH failed, falling back to LIKE"),
415+);
416+} finally {
417+warnSpy.mockRestore();
418+db.close();
419+}
420+});
421+});
422+182423describe("searchVector sqlite-vec KNN", () => {
183424const { DatabaseSync } = requireNodeSqlite();
184425此内容由惯性聚合(RSS阅读器)自动聚合整理,仅供阅读参考。 原文来自 — 版权归原作者所有。