Add full-text search index over guides, steps, blocks, and placeholders

Pure-JS inverted index persisted under library/index/ (documented FTS5
fallback). AND queries, last-token prefix matching, title boosting,
step deep-links, snippets. 4 workflow tests (23 total).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Iisyourdad
2026-06-10 16:39:49 -05:00
parent 3c51cf6f81
commit 0edcc38886
2 changed files with 253 additions and 0 deletions
+143
View File
@@ -0,0 +1,143 @@
'use strict';
const path = require('node:path');
const { writeJsonSync, readJsonIfExists, htmlToText } = require('./util');
/**
* Local full-text search over guide titles, descriptions, step titles/
* descriptions, text blocks, code blocks, annotation texts, and placeholder
* values. Pure-JS inverted index persisted as JSON under library/index/
* (fallback for SQLite FTS5 — see build/agent_audit.md).
*
* Documents are guide-level and step-level, so results can deep-link to a
* specific step in the editor.
*/
const INDEX_VERSION = 1;
function tokenize(text) {
if (!text) return [];
return String(text)
.toLowerCase()
.split(/[^\p{L}\p{N}_]+/u)
.filter((t) => t.length >= 2);
}
class SearchIndex {
constructor(indexDir) {
this.file = path.join(indexDir, 'search-index.json');
const stored = readJsonIfExists(this.file, null);
if (stored && stored.version === INDEX_VERSION) {
this.docs = stored.docs;
} else {
this.docs = {}; // docKey -> { guideId, stepId, title, text, updatedAt }
}
}
persist() {
writeJsonSync(this.file, { version: INDEX_VERSION, docs: this.docs });
}
/** (Re)index one guide and all of its steps. */
indexGuide(guide, stepsMap) {
this.removeGuide(guide.guideId, { persist: false });
const placeholderText = Object.entries(guide.placeholders || {})
.map(([k, v]) => `${k} ${v}`).join(' ');
this.docs[`g:${guide.guideId}`] = {
guideId: guide.guideId,
stepId: null,
title: guide.title,
text: [htmlToText(guide.descriptionHtml), placeholderText].filter(Boolean).join('\n'),
updatedAt: guide.updatedAt,
};
const steps = stepsMap instanceof Map ? [...stepsMap.values()] : stepsMap || [];
for (const step of steps) {
const parts = [
htmlToText(step.descriptionHtml),
...(step.textBlocks || []).map((tb) => `${tb.title} ${htmlToText(tb.descriptionHtml)}`),
...(step.codeBlocks || []).map((cb) => cb.code || ''),
...(step.annotations || []).map((a) => a.text || ''),
];
this.docs[`s:${guide.guideId}:${step.stepId}`] = {
guideId: guide.guideId,
stepId: step.stepId,
title: step.title || '',
text: parts.filter(Boolean).join('\n'),
updatedAt: guide.updatedAt,
};
}
this.persist();
}
removeGuide(guideId, { persist = true } = {}) {
for (const key of Object.keys(this.docs)) {
if (this.docs[key].guideId === guideId) delete this.docs[key];
}
if (persist) this.persist();
}
/**
* Ranked search. Every query token must match (AND); the final token also
* matches as a prefix so search-as-you-type works. Title hits rank above
* body hits; guide docs rank above step docs on ties.
*/
search(query, { limit = 30, guideId = null } = {}) {
const qTokens = tokenize(query);
if (qTokens.length === 0) return [];
const results = [];
for (const [key, doc] of Object.entries(this.docs)) {
if (guideId && doc.guideId !== guideId) continue;
const titleTokens = tokenize(doc.title);
const textTokens = tokenize(doc.text);
let score = 0;
let matchedAll = true;
for (let i = 0; i < qTokens.length; i++) {
const q = qTokens[i];
const prefixOk = i === qTokens.length - 1;
const inTitle = titleTokens.filter((t) => t === q || (prefixOk && t.startsWith(q))).length;
const inText = textTokens.filter((t) => t === q || (prefixOk && t.startsWith(q))).length;
if (inTitle + inText === 0) { matchedAll = false; break; }
score += inTitle * 10 + inText;
}
if (!matchedAll) continue;
if (doc.stepId === null) score += 2;
results.push({
guideId: doc.guideId,
stepId: doc.stepId,
title: doc.title,
snippet: makeSnippet(doc.text, qTokens),
score,
});
}
results.sort((a, b) => b.score - a.score);
return results.slice(0, limit);
}
/** Title-only search used by the library list filter. */
searchTitles(query, { limit = 50 } = {}) {
return this.search(query, { limit: limit * 4 })
.filter((r) => r.stepId === null)
.slice(0, limit);
}
}
function makeSnippet(text, qTokens, span = 90) {
if (!text) return '';
const lower = text.toLowerCase();
let at = -1;
for (const q of qTokens) {
at = lower.indexOf(q);
if (at >= 0) break;
}
if (at < 0) return text.slice(0, span);
const start = Math.max(0, at - span / 3);
const out = text.slice(start, start + span).replace(/\s+/g, ' ').trim();
return (start > 0 ? '…' : '') + out + (start + span < text.length ? '…' : '');
}
module.exports = { SearchIndex, tokenize };
+110
View File
@@ -0,0 +1,110 @@
'use strict';
const test = require('node:test');
const assert = require('node:assert/strict');
const path = require('node:path');
const { GuideStore } = require('../../core/store');
const { SearchIndex } = require('../../core/search');
const { makeTmpDir, rmrf, TINY_PNG } = require('./helpers');
function buildLibrary(root) {
const store = new GuideStore(root);
const index = new SearchIndex(store.indexDir);
const vpn = store.createGuide({
title: 'Install the VPN client',
descriptionHtml: '<p>Corporate network access</p>',
placeholders: { Department: 'Infrastructure' },
});
store.addStep(vpn.guideId, { title: 'Download installer from portal' }, TINY_PNG, { width: 1, height: 1 });
store.addStep(vpn.guideId, {
kind: 'content',
title: 'Configure split tunneling',
descriptionHtml: '<p>Set the <b>gateway</b> to vpn.example.com</p>',
codeBlocks: [{ id: 'cb1', language: 'bash', code: 'sudo systemctl restart openvpn' }],
});
const pw = store.createGuide({ title: 'Reset user password' });
store.addStep(pw.guideId, {
title: 'Open admin console',
textBlocks: [{ title: 'Permissions', descriptionHtml: '<p>Requires the helpdesk role</p>', level: 'warn' }],
});
index.indexGuide(store.getGuide(vpn.guideId), store.listSteps(vpn.guideId));
index.indexGuide(store.getGuide(pw.guideId), store.listSteps(pw.guideId));
return { store, index, vpn, pw };
}
test('full-text search finds guides and deep-links steps by body content', (t) => {
const root = makeTmpDir('search');
t.after(() => rmrf(root));
const { index, vpn, pw } = buildLibrary(root);
// Body text inside a code block is searchable and points at the step.
const codeHits = index.search('openvpn');
assert.equal(codeHits.length, 1);
assert.equal(codeHits[0].guideId, vpn.guideId);
assert.ok(codeHits[0].stepId, 'code block hit should deep-link to its step');
assert.ok(codeHits[0].snippet.includes('systemctl restart openvpn'));
// Text block content is searchable too.
const tbHits = index.search('helpdesk');
assert.equal(tbHits.length, 1);
assert.equal(tbHits[0].guideId, pw.guideId);
// Placeholder values are indexed at guide level.
const phHits = index.search('Infrastructure');
assert.ok(phHits.some((h) => h.guideId === vpn.guideId && h.stepId === null));
});
test('multi-token AND queries and prefix matching on the last token', (t) => {
const root = makeTmpDir('search2');
t.after(() => rmrf(root));
const { index, vpn } = buildLibrary(root);
// Both tokens must match the same document.
assert.equal(index.search('split tunneling').length, 1);
assert.equal(index.search('split helpdesk').length, 0);
// Search-as-you-type: trailing token matches as a prefix.
const typed = index.search('tunn');
assert.equal(typed.length, 1);
assert.equal(typed[0].guideId, vpn.guideId);
// Title hits outrank body hits.
const ranked = index.search('vpn');
assert.equal(ranked[0].title, 'Install the VPN client');
});
test('index survives reload from disk and removal works', (t) => {
const root = makeTmpDir('search3');
t.after(() => rmrf(root));
const { store, vpn, pw } = buildLibrary(root);
const reloaded = new SearchIndex(store.indexDir);
assert.ok(reloaded.search('password').some((h) => h.guideId === pw.guideId));
reloaded.removeGuide(vpn.guideId);
assert.equal(reloaded.search('tunneling').length, 0);
// Removal persisted: a fresh instance agrees.
assert.equal(new SearchIndex(store.indexDir).search('tunneling').length, 0);
});
test('re-indexing a changed guide replaces stale content', (t) => {
const root = makeTmpDir('search4');
t.after(() => rmrf(root));
const { store, index, vpn } = buildLibrary(root);
const guide = store.getGuide(vpn.guideId);
guide.title = 'Install the ZeroTrust agent';
store.saveGuide(guide);
index.indexGuide(store.getGuide(vpn.guideId), store.listSteps(vpn.guideId));
assert.equal(index.searchTitles('vpn').length, 0, 'old title must be gone');
assert.equal(index.searchTitles('zerotrust').length, 1);
// titles-only search excludes step-level matches.
assert.equal(index.searchTitles('gateway').length, 0);
assert.ok(index.search('gateway').length >= 1);
});