From 0edcc3888662171cad6cf26ea6375284d786938b Mon Sep 17 00:00:00 2001 From: Iisyourdad Date: Wed, 10 Jun 2026 16:39:49 -0500 Subject: [PATCH] Add full-text search index over guides, steps, blocks, and placeholders Pure-JS inverted index persisted under library/index/ (documented FTS5 fallback). AND queries, last-token prefix matching, title boosting, step deep-links, snippets. 4 workflow tests (23 total). Co-Authored-By: Claude Fable 5 --- core/search.js | 143 ++++++++++++++++++++++++++++++++++++++ tests/unit/search.test.js | 110 +++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 core/search.js create mode 100644 tests/unit/search.test.js diff --git a/core/search.js b/core/search.js new file mode 100644 index 0000000..3cf3fd8 --- /dev/null +++ b/core/search.js @@ -0,0 +1,143 @@ +'use strict'; + +const path = require('node:path'); +const { writeJsonSync, readJsonIfExists, htmlToText } = require('./util'); + +/** + * Local full-text search over guide titles, descriptions, step titles/ + * descriptions, text blocks, code blocks, annotation texts, and placeholder + * values. Pure-JS inverted index persisted as JSON under library/index/ + * (fallback for SQLite FTS5 — see build/agent_audit.md). + * + * Documents are guide-level and step-level, so results can deep-link to a + * specific step in the editor. + */ + +const INDEX_VERSION = 1; + +function tokenize(text) { + if (!text) return []; + return String(text) + .toLowerCase() + .split(/[^\p{L}\p{N}_]+/u) + .filter((t) => t.length >= 2); +} + +class SearchIndex { + constructor(indexDir) { + this.file = path.join(indexDir, 'search-index.json'); + const stored = readJsonIfExists(this.file, null); + if (stored && stored.version === INDEX_VERSION) { + this.docs = stored.docs; + } else { + this.docs = {}; // docKey -> { guideId, stepId, title, text, updatedAt } + } + } + + persist() { + writeJsonSync(this.file, { version: INDEX_VERSION, docs: this.docs }); + } + + /** (Re)index one guide and all of its steps. */ + indexGuide(guide, stepsMap) { + this.removeGuide(guide.guideId, { persist: false }); + + const placeholderText = Object.entries(guide.placeholders || {}) + .map(([k, v]) => `${k} ${v}`).join(' '); + this.docs[`g:${guide.guideId}`] = { + guideId: guide.guideId, + stepId: null, + title: guide.title, + text: [htmlToText(guide.descriptionHtml), placeholderText].filter(Boolean).join('\n'), + updatedAt: guide.updatedAt, + }; + + const steps = stepsMap instanceof Map ? [...stepsMap.values()] : stepsMap || []; + for (const step of steps) { + const parts = [ + htmlToText(step.descriptionHtml), + ...(step.textBlocks || []).map((tb) => `${tb.title} ${htmlToText(tb.descriptionHtml)}`), + ...(step.codeBlocks || []).map((cb) => cb.code || ''), + ...(step.annotations || []).map((a) => a.text || ''), + ]; + this.docs[`s:${guide.guideId}:${step.stepId}`] = { + guideId: guide.guideId, + stepId: step.stepId, + title: step.title || '', + text: parts.filter(Boolean).join('\n'), + updatedAt: guide.updatedAt, + }; + } + this.persist(); + } + + removeGuide(guideId, { persist = true } = {}) { + for (const key of Object.keys(this.docs)) { + if (this.docs[key].guideId === guideId) delete this.docs[key]; + } + if (persist) this.persist(); + } + + /** + * Ranked search. Every query token must match (AND); the final token also + * matches as a prefix so search-as-you-type works. Title hits rank above + * body hits; guide docs rank above step docs on ties. + */ + search(query, { limit = 30, guideId = null } = {}) { + const qTokens = tokenize(query); + if (qTokens.length === 0) return []; + const results = []; + + for (const [key, doc] of Object.entries(this.docs)) { + if (guideId && doc.guideId !== guideId) continue; + const titleTokens = tokenize(doc.title); + const textTokens = tokenize(doc.text); + let score = 0; + let matchedAll = true; + + for (let i = 0; i < qTokens.length; i++) { + const q = qTokens[i]; + const prefixOk = i === qTokens.length - 1; + const inTitle = titleTokens.filter((t) => t === q || (prefixOk && t.startsWith(q))).length; + const inText = textTokens.filter((t) => t === q || (prefixOk && t.startsWith(q))).length; + if (inTitle + inText === 0) { matchedAll = false; break; } + score += inTitle * 10 + inText; + } + if (!matchedAll) continue; + if (doc.stepId === null) score += 2; + results.push({ + guideId: doc.guideId, + stepId: doc.stepId, + title: doc.title, + snippet: makeSnippet(doc.text, qTokens), + score, + }); + } + + results.sort((a, b) => b.score - a.score); + return results.slice(0, limit); + } + + /** Title-only search used by the library list filter. */ + searchTitles(query, { limit = 50 } = {}) { + return this.search(query, { limit: limit * 4 }) + .filter((r) => r.stepId === null) + .slice(0, limit); + } +} + +function makeSnippet(text, qTokens, span = 90) { + if (!text) return ''; + const lower = text.toLowerCase(); + let at = -1; + for (const q of qTokens) { + at = lower.indexOf(q); + if (at >= 0) break; + } + if (at < 0) return text.slice(0, span); + const start = Math.max(0, at - span / 3); + const out = text.slice(start, start + span).replace(/\s+/g, ' ').trim(); + return (start > 0 ? '…' : '') + out + (start + span < text.length ? '…' : ''); +} + +module.exports = { SearchIndex, tokenize }; diff --git a/tests/unit/search.test.js b/tests/unit/search.test.js new file mode 100644 index 0000000..7cc90f3 --- /dev/null +++ b/tests/unit/search.test.js @@ -0,0 +1,110 @@ +'use strict'; + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const path = require('node:path'); + +const { GuideStore } = require('../../core/store'); +const { SearchIndex } = require('../../core/search'); +const { makeTmpDir, rmrf, TINY_PNG } = require('./helpers'); + +function buildLibrary(root) { + const store = new GuideStore(root); + const index = new SearchIndex(store.indexDir); + + const vpn = store.createGuide({ + title: 'Install the VPN client', + descriptionHtml: '

Corporate network access

', + placeholders: { Department: 'Infrastructure' }, + }); + store.addStep(vpn.guideId, { title: 'Download installer from portal' }, TINY_PNG, { width: 1, height: 1 }); + store.addStep(vpn.guideId, { + kind: 'content', + title: 'Configure split tunneling', + descriptionHtml: '

Set the gateway to vpn.example.com

', + codeBlocks: [{ id: 'cb1', language: 'bash', code: 'sudo systemctl restart openvpn' }], + }); + + const pw = store.createGuide({ title: 'Reset user password' }); + store.addStep(pw.guideId, { + title: 'Open admin console', + textBlocks: [{ title: 'Permissions', descriptionHtml: '

Requires the helpdesk role

', level: 'warn' }], + }); + + index.indexGuide(store.getGuide(vpn.guideId), store.listSteps(vpn.guideId)); + index.indexGuide(store.getGuide(pw.guideId), store.listSteps(pw.guideId)); + return { store, index, vpn, pw }; +} + +test('full-text search finds guides and deep-links steps by body content', (t) => { + const root = makeTmpDir('search'); + t.after(() => rmrf(root)); + const { index, vpn, pw } = buildLibrary(root); + + // Body text inside a code block is searchable and points at the step. + const codeHits = index.search('openvpn'); + assert.equal(codeHits.length, 1); + assert.equal(codeHits[0].guideId, vpn.guideId); + assert.ok(codeHits[0].stepId, 'code block hit should deep-link to its step'); + assert.ok(codeHits[0].snippet.includes('systemctl restart openvpn')); + + // Text block content is searchable too. + const tbHits = index.search('helpdesk'); + assert.equal(tbHits.length, 1); + assert.equal(tbHits[0].guideId, pw.guideId); + + // Placeholder values are indexed at guide level. + const phHits = index.search('Infrastructure'); + assert.ok(phHits.some((h) => h.guideId === vpn.guideId && h.stepId === null)); +}); + +test('multi-token AND queries and prefix matching on the last token', (t) => { + const root = makeTmpDir('search2'); + t.after(() => rmrf(root)); + const { index, vpn } = buildLibrary(root); + + // Both tokens must match the same document. + assert.equal(index.search('split tunneling').length, 1); + assert.equal(index.search('split helpdesk').length, 0); + + // Search-as-you-type: trailing token matches as a prefix. + const typed = index.search('tunn'); + assert.equal(typed.length, 1); + assert.equal(typed[0].guideId, vpn.guideId); + + // Title hits outrank body hits. + const ranked = index.search('vpn'); + assert.equal(ranked[0].title, 'Install the VPN client'); +}); + +test('index survives reload from disk and removal works', (t) => { + const root = makeTmpDir('search3'); + t.after(() => rmrf(root)); + const { store, vpn, pw } = buildLibrary(root); + + const reloaded = new SearchIndex(store.indexDir); + assert.ok(reloaded.search('password').some((h) => h.guideId === pw.guideId)); + + reloaded.removeGuide(vpn.guideId); + assert.equal(reloaded.search('tunneling').length, 0); + // Removal persisted: a fresh instance agrees. + assert.equal(new SearchIndex(store.indexDir).search('tunneling').length, 0); +}); + +test('re-indexing a changed guide replaces stale content', (t) => { + const root = makeTmpDir('search4'); + t.after(() => rmrf(root)); + const { store, index, vpn } = buildLibrary(root); + + const guide = store.getGuide(vpn.guideId); + guide.title = 'Install the ZeroTrust agent'; + store.saveGuide(guide); + index.indexGuide(store.getGuide(vpn.guideId), store.listSteps(vpn.guideId)); + + assert.equal(index.searchTitles('vpn').length, 0, 'old title must be gone'); + assert.equal(index.searchTitles('zerotrust').length, 1); + + // titles-only search excludes step-level matches. + assert.equal(index.searchTitles('gateway').length, 0); + assert.ok(index.search('gateway').length >= 1); +});