Skip to content

Commit 0ddefb6

Browse files
clydinalan-agius4
authored andcommitted
refactor(@angular/cli): enhance example search with structured data
This commit refactors the `find_examples` MCP tool and its associated database generator to leverage the structured YAML front matter present in the example markdown files. Key changes: - The SQLite database schema is now relational, with dedicated columns for `title`, `summary`, and `keywords`. - An FTS5 virtual table indexes these structured fields, improving search relevance by allowing queries to target specific metadata. - The build-time database generator (`tools/example_db_generator.js`) now parses and validates the front matter of each example file using Zod. The build will fail if an example is missing a required field (`title`, `summary`), ensuring data integrity. - The runtime tool (`packages/.../examples.ts`) uses the same parsing logic but will warn and skip invalid files to be more resilient. This change provides a more robust and accurate foundation for the example search feature, enabling more precise results and paving the way for future enhancements like semantic search.
1 parent e6a3b55 commit 0ddefb6

File tree

5 files changed

+235
-19
lines changed

5 files changed

+235
-19
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@
145145
"verdaccio": "6.1.6",
146146
"verdaccio-auth-memory": "^10.0.0",
147147
"yargs-parser": "22.0.0",
148+
"zod": "4.1.5",
148149
"zone.js": "^0.15.0"
149150
},
150151
"dependenciesMeta": {

packages/angular/cli/src/commands/mcp/tools/examples.ts

Lines changed: 112 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ async function createFindExampleHandler({ exampleDatabasePath }: McpToolContext)
114114
db = new DatabaseSync(exampleDatabasePath, { readOnly: true });
115115
}
116116
if (!queryStatement) {
117-
queryStatement = db.prepare('SELECT * from examples WHERE examples MATCH ? ORDER BY rank;');
117+
queryStatement = db.prepare(
118+
'SELECT content from examples_fts WHERE examples_fts MATCH ? ORDER BY rank;',
119+
);
118120
}
119121

120122
const sanitizedQuery = escapeSearchQuery(query);
@@ -218,24 +220,128 @@ function suppressSqliteWarning() {
218220
};
219221
}
220222

223+
/**
224+
* A simple YAML front matter parser.
225+
*
226+
* This function extracts the YAML block enclosed by `---` at the beginning of a string
227+
* and parses it into a JavaScript object. It is not a full YAML parser and only
228+
* supports simple key-value pairs and string arrays.
229+
*
230+
* @param content The string content to parse.
231+
* @returns A record containing the parsed front matter data.
232+
*/
233+
function parseFrontmatter(content: string): Record<string, unknown> {
234+
const match = content.match(/^---\r?\n(.*?)\r?\n---/s);
235+
if (!match) {
236+
return {};
237+
}
238+
239+
const frontmatter = match[1];
240+
const data: Record<string, unknown> = {};
241+
const lines = frontmatter.split(/\r?\n/);
242+
243+
let currentKey = '';
244+
let isArray = false;
245+
const arrayValues: string[] = [];
246+
247+
for (const line of lines) {
248+
const keyValueMatch = line.match(/^([^:]+):\s*(.*)/);
249+
if (keyValueMatch) {
250+
if (currentKey && isArray) {
251+
data[currentKey] = arrayValues.slice();
252+
arrayValues.length = 0;
253+
}
254+
255+
const [, key, value] = keyValueMatch;
256+
currentKey = key.trim();
257+
isArray = value.trim() === '';
258+
259+
if (!isArray) {
260+
data[currentKey] = value.trim();
261+
}
262+
} else {
263+
const arrayItemMatch = line.match(/^\s*-\s*(.*)/);
264+
if (arrayItemMatch && currentKey && isArray) {
265+
arrayValues.push(arrayItemMatch[1].trim());
266+
}
267+
}
268+
}
269+
270+
if (currentKey && isArray) {
271+
data[currentKey] = arrayValues;
272+
}
273+
274+
return data;
275+
}
276+
221277
async function setupRuntimeExamples(
222278
examplesPath: string,
223279
): Promise<import('node:sqlite').DatabaseSync> {
224280
const { DatabaseSync } = await import('node:sqlite');
225281
const db = new DatabaseSync(':memory:');
226282

227-
db.exec(`CREATE VIRTUAL TABLE examples USING fts5(content, tokenize = 'porter ascii');`);
283+
// Create a relational table to store the structured example data.
284+
db.exec(`
285+
CREATE TABLE examples (
286+
id INTEGER PRIMARY KEY,
287+
title TEXT NOT NULL,
288+
summary TEXT NOT NULL,
289+
keywords TEXT,
290+
content TEXT NOT NULL
291+
);
292+
`);
293+
294+
// Create an FTS5 virtual table to provide full-text search capabilities.
295+
// It indexes the title, summary, keywords, and the full content.
296+
db.exec(`
297+
CREATE VIRTUAL TABLE examples_fts USING fts5(
298+
title,
299+
summary,
300+
keywords,
301+
content,
302+
content='examples',
303+
content_rowid='id',
304+
tokenize = 'porter ascii'
305+
);
306+
`);
228307

229-
const insertStatement = db.prepare('INSERT INTO examples(content) VALUES(?);');
308+
// Create triggers to keep the FTS table synchronized with the examples table.
309+
db.exec(`
310+
CREATE TRIGGER examples_after_insert AFTER INSERT ON examples BEGIN
311+
INSERT INTO examples_fts(rowid, title, summary, keywords, content)
312+
VALUES (new.id, new.title, new.summary, new.keywords, new.content);
313+
END;
314+
`);
315+
316+
const insertStatement = db.prepare(
317+
'INSERT INTO examples(title, summary, keywords, content) VALUES(?, ?, ?, ?);',
318+
);
319+
320+
const frontmatterSchema = z.object({
321+
title: z.string(),
322+
summary: z.string(),
323+
keywords: z.array(z.string()).optional(),
324+
});
230325

231326
db.exec('BEGIN TRANSACTION');
232-
for await (const entry of glob('*.md', { cwd: examplesPath, withFileTypes: true })) {
327+
for await (const entry of glob('**/*.md', { cwd: examplesPath, withFileTypes: true })) {
233328
if (!entry.isFile()) {
234329
continue;
235330
}
236331

237-
const example = await readFile(path.join(entry.parentPath, entry.name), 'utf-8');
238-
insertStatement.run(example);
332+
const content = await readFile(path.join(entry.parentPath, entry.name), 'utf-8');
333+
const frontmatter = parseFrontmatter(content);
334+
335+
const validation = frontmatterSchema.safeParse(frontmatter);
336+
if (!validation.success) {
337+
// eslint-disable-next-line no-console
338+
console.warn(`Skipping invalid example file ${entry.name}:`, validation.error.issues);
339+
continue;
340+
}
341+
342+
const { title, summary, keywords } = validation.data;
343+
344+
insertStatement.run(title, summary, JSON.stringify(keywords ?? []), content);
239345
}
240346
db.exec('END TRANSACTION');
241347

pnpm-lock.yaml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ js_binary(
3535
name = "ng_example_db",
3636
data = [
3737
"example_db_generator.js",
38+
"//:node_modules/zod",
3839
],
3940
entry_point = "example_db_generator.js",
4041
)

tools/example_db_generator.js

Lines changed: 113 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,66 @@
66
* found in the LICENSE file at https://angular.dev/license
77
*/
88

9-
const { readdirSync, readFileSync, mkdirSync, existsSync, rmSync } = require('node:fs');
10-
const { resolve, dirname } = require('node:path');
9+
const { globSync, readdirSync, readFileSync, mkdirSync, existsSync, rmSync } = require('node:fs');
10+
const { resolve, dirname, join } = require('node:path');
1111
const { DatabaseSync } = require('node:sqlite');
12+
const { z } = require('zod');
1213

13-
function generate(inPath, outPath) {
14-
const examples = [];
14+
/**
15+
* A simple YAML front matter parser.
16+
*
17+
* This function extracts the YAML block enclosed by `---` at the beginning of a string
18+
* and parses it into a JavaScript object. It is not a full YAML parser and only
19+
* supports simple key-value pairs and string arrays.
20+
*
21+
* @param content The string content to parse.
22+
* @returns A record containing the parsed front matter data.
23+
*/
24+
function parseFrontmatter(content) {
25+
const match = content.match(/^---\r?\n(.*?)\r?\n---/s);
26+
if (!match) {
27+
return {};
28+
}
1529

16-
const entries = readdirSync(resolve(inPath), { withFileTypes: true });
17-
for (const entry of entries) {
18-
if (!entry.isFile()) {
19-
continue;
30+
const frontmatter = match[1];
31+
const data = {};
32+
const lines = frontmatter.split(/\r?\n/);
33+
34+
let currentKey = '';
35+
let isArray = false;
36+
const arrayValues = [];
37+
38+
for (const line of lines) {
39+
const keyValueMatch = line.match(/^([^:]+):\s*(.*)/);
40+
if (keyValueMatch) {
41+
if (currentKey && isArray) {
42+
data[currentKey] = arrayValues.slice();
43+
arrayValues.length = 0;
44+
}
45+
46+
const [, key, value] = keyValueMatch;
47+
currentKey = key.trim();
48+
isArray = value.trim() === '';
49+
50+
if (!isArray) {
51+
data[currentKey] = value.trim();
52+
}
53+
} else {
54+
const arrayItemMatch = line.match(/^\s*-\s*(.*)/);
55+
if (arrayItemMatch && currentKey && isArray) {
56+
arrayValues.push(arrayItemMatch[1].trim());
57+
}
2058
}
59+
}
2160

22-
examples.push(readFileSync(resolve(inPath, entry.name), 'utf-8'));
61+
if (currentKey && isArray) {
62+
data[currentKey] = arrayValues;
2363
}
2464

65+
return data;
66+
}
67+
68+
function generate(inPath, outPath) {
2569
const dbPath = outPath;
2670
mkdirSync(dirname(outPath), { recursive: true });
2771

@@ -30,13 +74,69 @@ function generate(inPath, outPath) {
3074
}
3175
const db = new DatabaseSync(dbPath);
3276

33-
db.exec(`CREATE VIRTUAL TABLE examples USING fts5(content, tokenize = 'porter ascii');`);
77+
// Create a relational table to store the structured example data.
78+
db.exec(`
79+
CREATE TABLE examples (
80+
id INTEGER PRIMARY KEY,
81+
title TEXT NOT NULL,
82+
summary TEXT NOT NULL,
83+
keywords TEXT,
84+
content TEXT NOT NULL
85+
);
86+
`);
87+
88+
// Create an FTS5 virtual table to provide full-text search capabilities.
89+
db.exec(`
90+
CREATE VIRTUAL TABLE examples_fts USING fts5(
91+
title,
92+
summary,
93+
keywords,
94+
content,
95+
content='examples',
96+
content_rowid='id',
97+
tokenize = 'porter ascii'
98+
);
99+
`);
34100

35-
const insertStatement = db.prepare('INSERT INTO examples(content) VALUES(?);');
101+
// Create triggers to keep the FTS table synchronized with the examples table.
102+
db.exec(`
103+
CREATE TRIGGER examples_after_insert AFTER INSERT ON examples BEGIN
104+
INSERT INTO examples_fts(rowid, title, summary, keywords, content)
105+
VALUES (new.id, new.title, new.summary, new.keywords, new.content);
106+
END;
107+
`);
108+
109+
const insertStatement = db.prepare(
110+
'INSERT INTO examples(title, summary, keywords, content) VALUES(?, ?, ?, ?);',
111+
);
112+
113+
const frontmatterSchema = z.object({
114+
title: z.string(),
115+
summary: z.string(),
116+
keywords: z.array(z.string()).optional(),
117+
});
36118

37119
db.exec('BEGIN TRANSACTION');
38-
for (const example of examples) {
39-
insertStatement.run(example);
120+
const entries = globSync
121+
? globSync('**/*.md', { cwd: resolve(inPath), withFileTypes: true })
122+
: readdirSync(resolve(inPath), { withFileTypes: true });
123+
for (const entry of entries) {
124+
if (!entry.isFile() || !entry.name.endsWith('.md')) {
125+
continue;
126+
}
127+
128+
const content = readFileSync(join(entry.parentPath, entry.name), 'utf-8');
129+
const frontmatter = parseFrontmatter(content);
130+
131+
const validation = frontmatterSchema.safeParse(frontmatter);
132+
if (!validation.success) {
133+
console.error(`Validation failed for example file: ${entry.name}`);
134+
console.error('Issues:', validation.error.issues);
135+
throw new Error(`Invalid front matter in ${entry.name}`);
136+
}
137+
138+
const { title, summary, keywords } = validation.data;
139+
insertStatement.run(title, summary, JSON.stringify(keywords ?? []), content);
40140
}
41141
db.exec('END TRANSACTION');
42142

0 commit comments

Comments
 (0)