Add full-text search index over guides, steps, blocks, and placeholders
Pure-JS inverted index persisted under library/index/ (documented FTS5 fallback). AND queries, last-token prefix matching, title boosting, step deep-links, snippets. 4 workflow tests (23 total). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+143
@@ -0,0 +1,143 @@
|
||||
'use strict';
|
||||
|
||||
const path = require('node:path');
|
||||
const { writeJsonSync, readJsonIfExists, htmlToText } = require('./util');
|
||||
|
||||
/**
|
||||
* Local full-text search over guide titles, descriptions, step titles/
|
||||
* descriptions, text blocks, code blocks, annotation texts, and placeholder
|
||||
* values. Pure-JS inverted index persisted as JSON under library/index/
|
||||
* (fallback for SQLite FTS5 — see build/agent_audit.md).
|
||||
*
|
||||
* Documents are guide-level and step-level, so results can deep-link to a
|
||||
* specific step in the editor.
|
||||
*/
|
||||
|
||||
const INDEX_VERSION = 1;
|
||||
|
||||
function tokenize(text) {
|
||||
if (!text) return [];
|
||||
return String(text)
|
||||
.toLowerCase()
|
||||
.split(/[^\p{L}\p{N}_]+/u)
|
||||
.filter((t) => t.length >= 2);
|
||||
}
|
||||
|
||||
class SearchIndex {
|
||||
constructor(indexDir) {
|
||||
this.file = path.join(indexDir, 'search-index.json');
|
||||
const stored = readJsonIfExists(this.file, null);
|
||||
if (stored && stored.version === INDEX_VERSION) {
|
||||
this.docs = stored.docs;
|
||||
} else {
|
||||
this.docs = {}; // docKey -> { guideId, stepId, title, text, updatedAt }
|
||||
}
|
||||
}
|
||||
|
||||
persist() {
|
||||
writeJsonSync(this.file, { version: INDEX_VERSION, docs: this.docs });
|
||||
}
|
||||
|
||||
/** (Re)index one guide and all of its steps. */
|
||||
indexGuide(guide, stepsMap) {
|
||||
this.removeGuide(guide.guideId, { persist: false });
|
||||
|
||||
const placeholderText = Object.entries(guide.placeholders || {})
|
||||
.map(([k, v]) => `${k} ${v}`).join(' ');
|
||||
this.docs[`g:${guide.guideId}`] = {
|
||||
guideId: guide.guideId,
|
||||
stepId: null,
|
||||
title: guide.title,
|
||||
text: [htmlToText(guide.descriptionHtml), placeholderText].filter(Boolean).join('\n'),
|
||||
updatedAt: guide.updatedAt,
|
||||
};
|
||||
|
||||
const steps = stepsMap instanceof Map ? [...stepsMap.values()] : stepsMap || [];
|
||||
for (const step of steps) {
|
||||
const parts = [
|
||||
htmlToText(step.descriptionHtml),
|
||||
...(step.textBlocks || []).map((tb) => `${tb.title} ${htmlToText(tb.descriptionHtml)}`),
|
||||
...(step.codeBlocks || []).map((cb) => cb.code || ''),
|
||||
...(step.annotations || []).map((a) => a.text || ''),
|
||||
];
|
||||
this.docs[`s:${guide.guideId}:${step.stepId}`] = {
|
||||
guideId: guide.guideId,
|
||||
stepId: step.stepId,
|
||||
title: step.title || '',
|
||||
text: parts.filter(Boolean).join('\n'),
|
||||
updatedAt: guide.updatedAt,
|
||||
};
|
||||
}
|
||||
this.persist();
|
||||
}
|
||||
|
||||
removeGuide(guideId, { persist = true } = {}) {
|
||||
for (const key of Object.keys(this.docs)) {
|
||||
if (this.docs[key].guideId === guideId) delete this.docs[key];
|
||||
}
|
||||
if (persist) this.persist();
|
||||
}
|
||||
|
||||
/**
|
||||
* Ranked search. Every query token must match (AND); the final token also
|
||||
* matches as a prefix so search-as-you-type works. Title hits rank above
|
||||
* body hits; guide docs rank above step docs on ties.
|
||||
*/
|
||||
search(query, { limit = 30, guideId = null } = {}) {
|
||||
const qTokens = tokenize(query);
|
||||
if (qTokens.length === 0) return [];
|
||||
const results = [];
|
||||
|
||||
for (const [key, doc] of Object.entries(this.docs)) {
|
||||
if (guideId && doc.guideId !== guideId) continue;
|
||||
const titleTokens = tokenize(doc.title);
|
||||
const textTokens = tokenize(doc.text);
|
||||
let score = 0;
|
||||
let matchedAll = true;
|
||||
|
||||
for (let i = 0; i < qTokens.length; i++) {
|
||||
const q = qTokens[i];
|
||||
const prefixOk = i === qTokens.length - 1;
|
||||
const inTitle = titleTokens.filter((t) => t === q || (prefixOk && t.startsWith(q))).length;
|
||||
const inText = textTokens.filter((t) => t === q || (prefixOk && t.startsWith(q))).length;
|
||||
if (inTitle + inText === 0) { matchedAll = false; break; }
|
||||
score += inTitle * 10 + inText;
|
||||
}
|
||||
if (!matchedAll) continue;
|
||||
if (doc.stepId === null) score += 2;
|
||||
results.push({
|
||||
guideId: doc.guideId,
|
||||
stepId: doc.stepId,
|
||||
title: doc.title,
|
||||
snippet: makeSnippet(doc.text, qTokens),
|
||||
score,
|
||||
});
|
||||
}
|
||||
|
||||
results.sort((a, b) => b.score - a.score);
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
/** Title-only search used by the library list filter. */
|
||||
searchTitles(query, { limit = 50 } = {}) {
|
||||
return this.search(query, { limit: limit * 4 })
|
||||
.filter((r) => r.stepId === null)
|
||||
.slice(0, limit);
|
||||
}
|
||||
}
|
||||
|
||||
function makeSnippet(text, qTokens, span = 90) {
|
||||
if (!text) return '';
|
||||
const lower = text.toLowerCase();
|
||||
let at = -1;
|
||||
for (const q of qTokens) {
|
||||
at = lower.indexOf(q);
|
||||
if (at >= 0) break;
|
||||
}
|
||||
if (at < 0) return text.slice(0, span);
|
||||
const start = Math.max(0, at - span / 3);
|
||||
const out = text.slice(start, start + span).replace(/\s+/g, ' ').trim();
|
||||
return (start > 0 ? '…' : '') + out + (start + span < text.length ? '…' : '');
|
||||
}
|
||||
|
||||
module.exports = { SearchIndex, tokenize };
|
||||
@@ -0,0 +1,110 @@
|
||||
'use strict';
|
||||
|
||||
const test = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const path = require('node:path');
|
||||
|
||||
const { GuideStore } = require('../../core/store');
|
||||
const { SearchIndex } = require('../../core/search');
|
||||
const { makeTmpDir, rmrf, TINY_PNG } = require('./helpers');
|
||||
|
||||
function buildLibrary(root) {
|
||||
const store = new GuideStore(root);
|
||||
const index = new SearchIndex(store.indexDir);
|
||||
|
||||
const vpn = store.createGuide({
|
||||
title: 'Install the VPN client',
|
||||
descriptionHtml: '<p>Corporate network access</p>',
|
||||
placeholders: { Department: 'Infrastructure' },
|
||||
});
|
||||
store.addStep(vpn.guideId, { title: 'Download installer from portal' }, TINY_PNG, { width: 1, height: 1 });
|
||||
store.addStep(vpn.guideId, {
|
||||
kind: 'content',
|
||||
title: 'Configure split tunneling',
|
||||
descriptionHtml: '<p>Set the <b>gateway</b> to vpn.example.com</p>',
|
||||
codeBlocks: [{ id: 'cb1', language: 'bash', code: 'sudo systemctl restart openvpn' }],
|
||||
});
|
||||
|
||||
const pw = store.createGuide({ title: 'Reset user password' });
|
||||
store.addStep(pw.guideId, {
|
||||
title: 'Open admin console',
|
||||
textBlocks: [{ title: 'Permissions', descriptionHtml: '<p>Requires the helpdesk role</p>', level: 'warn' }],
|
||||
});
|
||||
|
||||
index.indexGuide(store.getGuide(vpn.guideId), store.listSteps(vpn.guideId));
|
||||
index.indexGuide(store.getGuide(pw.guideId), store.listSteps(pw.guideId));
|
||||
return { store, index, vpn, pw };
|
||||
}
|
||||
|
||||
test('full-text search finds guides and deep-links steps by body content', (t) => {
|
||||
const root = makeTmpDir('search');
|
||||
t.after(() => rmrf(root));
|
||||
const { index, vpn, pw } = buildLibrary(root);
|
||||
|
||||
// Body text inside a code block is searchable and points at the step.
|
||||
const codeHits = index.search('openvpn');
|
||||
assert.equal(codeHits.length, 1);
|
||||
assert.equal(codeHits[0].guideId, vpn.guideId);
|
||||
assert.ok(codeHits[0].stepId, 'code block hit should deep-link to its step');
|
||||
assert.ok(codeHits[0].snippet.includes('systemctl restart openvpn'));
|
||||
|
||||
// Text block content is searchable too.
|
||||
const tbHits = index.search('helpdesk');
|
||||
assert.equal(tbHits.length, 1);
|
||||
assert.equal(tbHits[0].guideId, pw.guideId);
|
||||
|
||||
// Placeholder values are indexed at guide level.
|
||||
const phHits = index.search('Infrastructure');
|
||||
assert.ok(phHits.some((h) => h.guideId === vpn.guideId && h.stepId === null));
|
||||
});
|
||||
|
||||
test('multi-token AND queries and prefix matching on the last token', (t) => {
|
||||
const root = makeTmpDir('search2');
|
||||
t.after(() => rmrf(root));
|
||||
const { index, vpn } = buildLibrary(root);
|
||||
|
||||
// Both tokens must match the same document.
|
||||
assert.equal(index.search('split tunneling').length, 1);
|
||||
assert.equal(index.search('split helpdesk').length, 0);
|
||||
|
||||
// Search-as-you-type: trailing token matches as a prefix.
|
||||
const typed = index.search('tunn');
|
||||
assert.equal(typed.length, 1);
|
||||
assert.equal(typed[0].guideId, vpn.guideId);
|
||||
|
||||
// Title hits outrank body hits.
|
||||
const ranked = index.search('vpn');
|
||||
assert.equal(ranked[0].title, 'Install the VPN client');
|
||||
});
|
||||
|
||||
test('index survives reload from disk and removal works', (t) => {
|
||||
const root = makeTmpDir('search3');
|
||||
t.after(() => rmrf(root));
|
||||
const { store, vpn, pw } = buildLibrary(root);
|
||||
|
||||
const reloaded = new SearchIndex(store.indexDir);
|
||||
assert.ok(reloaded.search('password').some((h) => h.guideId === pw.guideId));
|
||||
|
||||
reloaded.removeGuide(vpn.guideId);
|
||||
assert.equal(reloaded.search('tunneling').length, 0);
|
||||
// Removal persisted: a fresh instance agrees.
|
||||
assert.equal(new SearchIndex(store.indexDir).search('tunneling').length, 0);
|
||||
});
|
||||
|
||||
test('re-indexing a changed guide replaces stale content', (t) => {
|
||||
const root = makeTmpDir('search4');
|
||||
t.after(() => rmrf(root));
|
||||
const { store, index, vpn } = buildLibrary(root);
|
||||
|
||||
const guide = store.getGuide(vpn.guideId);
|
||||
guide.title = 'Install the ZeroTrust agent';
|
||||
store.saveGuide(guide);
|
||||
index.indexGuide(store.getGuide(vpn.guideId), store.listSteps(vpn.guideId));
|
||||
|
||||
assert.equal(index.searchTitles('vpn').length, 0, 'old title must be gone');
|
||||
assert.equal(index.searchTitles('zerotrust').length, 1);
|
||||
|
||||
// titles-only search excludes step-level matches.
|
||||
assert.equal(index.searchTitles('gateway').length, 0);
|
||||
assert.ok(index.search('gateway').length >= 1);
|
||||
});
|
||||
Reference in New Issue
Block a user