Files
awesome-copilot/eng/generate-website-data.mjs
Aaron Powell dc1b93354c Combine llms.txt generation with website data generation
- Move llms.txt generation into generate-website-data.mjs
- Remove standalone generate-llms-txt.mjs script
- Remove llms:generate npm script
- llms.txt now outputs to website/public/data/ alongside other generated files
2026-02-03 14:30:19 +11:00

909 lines
24 KiB
JavaScript

#!/usr/bin/env node
/**
* Generate JSON metadata files for the GitHub Pages website.
* This script extracts metadata from agents, prompts, instructions, skills, and collections
* and writes them to website/data/ for client-side search and display.
*/
import fs from "fs";
import path, { dirname } from "path";
import { fileURLToPath } from "url";
import {
AGENTS_DIR,
COLLECTIONS_DIR,
COOKBOOK_DIR,
INSTRUCTIONS_DIR,
PROMPTS_DIR,
ROOT_FOLDER,
SKILLS_DIR,
} from "./constants.mjs";
import {
parseCollectionYaml,
parseFrontmatter,
parseSkillMetadata,
parseYamlFile,
} from "./yaml-parser.mjs";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const WEBSITE_DATA_DIR = path.join(ROOT_FOLDER, "website", "public", "data");
const WEBSITE_SOURCE_DATA_DIR = path.join(ROOT_FOLDER, "website", "data");
/**
* Ensure the output directory exists
*/
function ensureDataDir() {
if (!fs.existsSync(WEBSITE_DATA_DIR)) {
fs.mkdirSync(WEBSITE_DATA_DIR, { recursive: true });
}
}
/**
* Extract title from filename or frontmatter
*/
function extractTitle(filePath, frontmatter) {
if (frontmatter?.title) return frontmatter.title;
if (frontmatter?.name) {
return frontmatter.name
.split("-")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(" ");
}
// Fallback to filename
const basename = path.basename(filePath);
const name = basename
.replace(/\.(agent|prompt|instructions)\.md$/, "")
.replace(/\.md$/, "");
return name
.split("-")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(" ");
}
/**
* Generate agents metadata
*/
function generateAgentsData() {
const agents = [];
const files = fs
.readdirSync(AGENTS_DIR)
.filter((f) => f.endsWith(".agent.md"));
// Track all unique values for filters
const allModels = new Set();
const allTools = new Set();
for (const file of files) {
const filePath = path.join(AGENTS_DIR, file);
const frontmatter = parseFrontmatter(filePath);
const relativePath = path
.relative(ROOT_FOLDER, filePath)
.replace(/\\/g, "/");
const model = frontmatter?.model || null;
const tools = frontmatter?.tools || [];
const handoffs = frontmatter?.handoffs || [];
// Track unique values
if (model) allModels.add(model);
tools.forEach((t) => allTools.add(t));
agents.push({
id: file.replace(".agent.md", ""),
title: extractTitle(filePath, frontmatter),
description: frontmatter?.description || "",
model: model,
tools: tools,
hasHandoffs: handoffs.length > 0,
handoffs: handoffs.map((h) => ({
label: h.label || "",
agent: h.agent || "",
})),
mcpServers: frontmatter?.["mcp-servers"]
? Object.keys(frontmatter["mcp-servers"])
: [],
path: relativePath,
filename: file,
});
}
// Sort and return with filter metadata
const sortedAgents = agents.sort((a, b) => a.title.localeCompare(b.title));
return {
items: sortedAgents,
filters: {
models: ["(none)", ...Array.from(allModels).sort()],
tools: Array.from(allTools).sort(),
},
};
}
/**
* Generate prompts metadata
*/
function generatePromptsData() {
const prompts = [];
const files = fs
.readdirSync(PROMPTS_DIR)
.filter((f) => f.endsWith(".prompt.md"));
// Track all unique tools for filters
const allTools = new Set();
for (const file of files) {
const filePath = path.join(PROMPTS_DIR, file);
const frontmatter = parseFrontmatter(filePath);
const relativePath = path
.relative(ROOT_FOLDER, filePath)
.replace(/\\/g, "/");
const tools = frontmatter?.tools || [];
tools.forEach((t) => allTools.add(t));
prompts.push({
id: file.replace(".prompt.md", ""),
title: extractTitle(filePath, frontmatter),
description: frontmatter?.description || "",
agent: frontmatter?.agent || null,
model: frontmatter?.model || null,
tools: tools,
path: relativePath,
filename: file,
});
}
const sortedPrompts = prompts.sort((a, b) => a.title.localeCompare(b.title));
return {
items: sortedPrompts,
filters: {
tools: Array.from(allTools).sort(),
},
};
}
/**
* Parse applyTo field into an array of patterns
*/
function parseApplyToPatterns(applyTo) {
if (!applyTo) return [];
// Handle array format
if (Array.isArray(applyTo)) {
return applyTo.map((p) => p.trim()).filter((p) => p.length > 0);
}
// Handle string format (comma-separated)
if (typeof applyTo === "string") {
return applyTo
.split(",")
.map((p) => p.trim())
.filter((p) => p.length > 0);
}
return [];
}
/**
* Extract file extension from a glob pattern
*/
function extractExtensionFromPattern(pattern) {
// Match patterns like **.ts, **/*.js, *.py, etc.
const match = pattern.match(/\*\.(\w+)$/);
if (match) return `.${match[1]}`;
// Match patterns like **/*.{ts,tsx}
const braceMatch = pattern.match(/\*\.\{([^}]+)\}$/);
if (braceMatch) {
return braceMatch[1].split(",").map((ext) => `.${ext.trim()}`);
}
return null;
}
/**
* Generate instructions metadata
*/
function generateInstructionsData() {
const instructions = [];
const files = fs
.readdirSync(INSTRUCTIONS_DIR)
.filter((f) => f.endsWith(".instructions.md"));
// Track all unique patterns and extensions for filters
const allPatterns = new Set();
const allExtensions = new Set();
for (const file of files) {
const filePath = path.join(INSTRUCTIONS_DIR, file);
const frontmatter = parseFrontmatter(filePath);
const relativePath = path
.relative(ROOT_FOLDER, filePath)
.replace(/\\/g, "/");
const applyToRaw = frontmatter?.applyTo || null;
const applyToPatterns = parseApplyToPatterns(applyToRaw);
// Extract extensions from patterns
const extensions = [];
for (const pattern of applyToPatterns) {
allPatterns.add(pattern);
const ext = extractExtensionFromPattern(pattern);
if (ext) {
if (Array.isArray(ext)) {
ext.forEach((e) => {
extensions.push(e);
allExtensions.add(e);
});
} else {
extensions.push(ext);
allExtensions.add(ext);
}
}
}
instructions.push({
id: file.replace(".instructions.md", ""),
title: extractTitle(filePath, frontmatter),
description: frontmatter?.description || "",
applyTo: applyToRaw,
applyToPatterns: applyToPatterns,
extensions: [...new Set(extensions)],
path: relativePath,
filename: file,
});
}
const sortedInstructions = instructions.sort((a, b) =>
a.title.localeCompare(b.title)
);
return {
items: sortedInstructions,
filters: {
patterns: Array.from(allPatterns).sort(),
extensions: ["(none)", ...Array.from(allExtensions).sort()],
},
};
}
/**
* Categorize a skill based on its name and description
*/
function categorizeSkill(name, description) {
const text = `${name} ${description}`.toLowerCase();
if (text.includes("azure") || text.includes("appinsights")) return "Azure";
if (
text.includes("github") ||
text.includes("gh-cli") ||
text.includes("git-commit") ||
text.includes("git ")
)
return "Git & GitHub";
if (text.includes("vscode") || text.includes("vs code")) return "VS Code";
if (
text.includes("test") ||
text.includes("qa") ||
text.includes("playwright")
)
return "Testing";
if (
text.includes("microsoft") ||
text.includes("m365") ||
text.includes("workiq")
)
return "Microsoft";
if (text.includes("cli") || text.includes("command")) return "CLI Tools";
if (
text.includes("diagram") ||
text.includes("plantuml") ||
text.includes("visual")
)
return "Diagrams";
if (
text.includes("nuget") ||
text.includes("dotnet") ||
text.includes(".net")
)
return ".NET";
return "Other";
}
/**
* Generate skills metadata
*/
function generateSkillsData() {
const skills = [];
if (!fs.existsSync(SKILLS_DIR)) {
return { items: [], filters: { categories: [], hasAssets: ["Yes", "No"] } };
}
const folders = fs
.readdirSync(SKILLS_DIR)
.filter((f) => fs.statSync(path.join(SKILLS_DIR, f)).isDirectory());
const allCategories = new Set();
for (const folder of folders) {
const skillPath = path.join(SKILLS_DIR, folder);
const metadata = parseSkillMetadata(skillPath);
if (metadata) {
const relativePath = path
.relative(ROOT_FOLDER, skillPath)
.replace(/\\/g, "/");
const category = categorizeSkill(metadata.name, metadata.description);
allCategories.add(category);
// Get all files in the skill folder recursively
const files = getSkillFiles(skillPath, relativePath);
skills.push({
id: folder,
name: metadata.name,
title: metadata.name
.split("-")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(" "),
description: metadata.description,
assets: metadata.assets,
hasAssets: metadata.assets.length > 0,
assetCount: metadata.assets.length,
category: category,
path: relativePath,
skillFile: `${relativePath}/SKILL.md`,
files: files,
});
}
}
const sortedSkills = skills.sort((a, b) => a.title.localeCompare(b.title));
return {
items: sortedSkills,
filters: {
categories: Array.from(allCategories).sort(),
hasAssets: ["Yes", "No"],
},
};
}
/**
* Get all files in a skill folder recursively
*/
function getSkillFiles(skillPath, relativePath) {
const files = [];
function walkDir(dir, relDir) {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
const relPath = relDir ? `${relDir}/${entry.name}` : entry.name;
if (entry.isDirectory()) {
walkDir(fullPath, relPath);
} else {
// Get file size
const stats = fs.statSync(fullPath);
files.push({
path: `${relativePath}/${relPath}`,
name: relPath,
size: stats.size,
});
}
}
}
walkDir(skillPath, "");
return files;
}
/**
* Generate collections metadata
*/
function generateCollectionsData() {
const collections = [];
if (!fs.existsSync(COLLECTIONS_DIR)) {
return collections;
}
const files = fs
.readdirSync(COLLECTIONS_DIR)
.filter((f) => f.endsWith(".collection.yml"));
// Track all unique tags
const allTags = new Set();
for (const file of files) {
const filePath = path.join(COLLECTIONS_DIR, file);
const data = parseCollectionYaml(filePath);
const relativePath = path
.relative(ROOT_FOLDER, filePath)
.replace(/\\/g, "/");
if (data) {
const tags = data.tags || [];
tags.forEach((t) => allTags.add(t));
// featured can be at top level or nested under display
const featured = data.featured || data.display?.featured || false;
collections.push({
id: file.replace(".collection.yml", ""),
name: data.name || file.replace(".collection.yml", ""),
description: data.description || "",
tags: tags,
featured: featured,
items: (data.items || []).map((item) => ({
path: item.path,
kind: item.kind,
usage: item.usage || null,
})),
path: relativePath,
filename: file,
});
}
}
// Sort with featured first, then alphabetically
const sortedCollections = collections.sort((a, b) => {
if (a.featured && !b.featured) return -1;
if (!a.featured && b.featured) return 1;
return a.name.localeCompare(b.name);
});
return {
items: sortedCollections,
filters: {
tags: Array.from(allTags).sort(),
},
};
}
/**
* Generate tools metadata from website/data/tools.yml
*/
function generateToolsData() {
const toolsFile = path.join(WEBSITE_SOURCE_DATA_DIR, "tools.yml");
if (!fs.existsSync(toolsFile)) {
console.warn("No tools.yml file found at", toolsFile);
return { items: [], filters: { categories: [], tags: [] } };
}
const data = parseYamlFile(toolsFile);
if (!data || !data.tools) {
return { items: [], filters: { categories: [], tags: [] } };
}
const allCategories = new Set();
const allTags = new Set();
const tools = data.tools.map((tool) => {
const category = tool.category || "Other";
allCategories.add(category);
const tags = tool.tags || [];
tags.forEach((t) => allTags.add(t));
return {
id: tool.id,
name: tool.name,
description: tool.description || "",
category: category,
featured: tool.featured || false,
requirements: tool.requirements || [],
features: tool.features || [],
links: tool.links || {},
configuration: tool.configuration || null,
tags: tags,
};
});
// Sort with featured first, then alphabetically
const sortedTools = tools.sort((a, b) => {
if (a.featured && !b.featured) return -1;
if (!a.featured && b.featured) return 1;
return a.name.localeCompare(b.name);
});
return {
items: sortedTools,
filters: {
categories: Array.from(allCategories).sort(),
tags: Array.from(allTags).sort(),
},
};
}
/**
* Generate a combined index for search
*/
function generateSearchIndex(
agents,
prompts,
instructions,
skills,
collections
) {
const index = [];
for (const agent of agents) {
index.push({
type: "agent",
id: agent.id,
title: agent.title,
description: agent.description,
path: agent.path,
searchText: `${agent.title} ${agent.description} ${agent.tools.join(
" "
)}`.toLowerCase(),
});
}
for (const prompt of prompts) {
index.push({
type: "prompt",
id: prompt.id,
title: prompt.title,
description: prompt.description,
path: prompt.path,
searchText: `${prompt.title} ${prompt.description}`.toLowerCase(),
});
}
for (const instruction of instructions) {
index.push({
type: "instruction",
id: instruction.id,
title: instruction.title,
description: instruction.description,
path: instruction.path,
searchText: `${instruction.title} ${instruction.description} ${
instruction.applyTo || ""
}`.toLowerCase(),
});
}
for (const skill of skills) {
index.push({
type: "skill",
id: skill.id,
title: skill.title,
description: skill.description,
path: skill.skillFile,
searchText: `${skill.title} ${skill.description}`.toLowerCase(),
});
}
for (const collection of collections) {
index.push({
type: "collection",
id: collection.id,
title: collection.name,
description: collection.description,
path: collection.path,
tags: collection.tags,
searchText: `${collection.name} ${
collection.description
} ${collection.tags.join(" ")}`.toLowerCase(),
});
}
return index;
}
/**
* Generate samples/cookbook data from cookbook.yml
*/
function generateSamplesData() {
const cookbookYamlPath = path.join(COOKBOOK_DIR, "cookbook.yml");
if (!fs.existsSync(cookbookYamlPath)) {
console.warn(
"Warning: cookbook/cookbook.yml not found, skipping samples generation"
);
return {
cookbooks: [],
totalRecipes: 0,
totalCookbooks: 0,
filters: { languages: [], tags: [] },
};
}
const cookbookManifest = parseYamlFile(cookbookYamlPath);
if (!cookbookManifest || !cookbookManifest.cookbooks) {
console.warn("Warning: Invalid cookbook.yml format");
return {
cookbooks: [],
totalRecipes: 0,
totalCookbooks: 0,
filters: { languages: [], tags: [] },
};
}
const allLanguages = new Set();
const allTags = new Set();
let totalRecipes = 0;
const cookbooks = cookbookManifest.cookbooks.map((cookbook) => {
// Collect languages
cookbook.languages.forEach((lang) => allLanguages.add(lang.id));
// Process recipes and add file paths
const recipes = cookbook.recipes.map((recipe) => {
// Collect tags
if (recipe.tags) {
recipe.tags.forEach((tag) => allTags.add(tag));
}
// Build variants with file paths for each language
const variants = {};
cookbook.languages.forEach((lang) => {
const docPath = `${cookbook.path}/${lang.id}/${recipe.id}.md`;
const examplePath = `${cookbook.path}/${lang.id}/recipe/${recipe.id}${lang.extension}`;
// Check if files exist
const docFullPath = path.join(ROOT_FOLDER, docPath);
const exampleFullPath = path.join(ROOT_FOLDER, examplePath);
if (fs.existsSync(docFullPath)) {
variants[lang.id] = {
doc: docPath,
example: fs.existsSync(exampleFullPath) ? examplePath : null,
};
}
});
totalRecipes++;
return {
id: recipe.id,
name: recipe.name,
description: recipe.description,
tags: recipe.tags || [],
variants,
};
});
return {
id: cookbook.id,
name: cookbook.name,
description: cookbook.description,
path: cookbook.path,
featured: cookbook.featured || false,
languages: cookbook.languages,
recipes,
};
});
return {
cookbooks,
totalRecipes,
totalCookbooks: cookbooks.length,
filters: {
languages: Array.from(allLanguages).sort(),
tags: Array.from(allTags).sort(),
},
};
}
/**
* Generate llms.txt content according to the llmstxt.org specification
*/
function generateLlmsTxt(agents, prompts, instructions, skills) {
let content = "";
// H1 header (required)
content += "# Awesome GitHub Copilot\n\n";
// Summary blockquote (optional but recommended)
content +=
"> A community-driven collection of custom agents, prompts, instructions, and skills to enhance GitHub Copilot experiences across various domains, languages, and use cases.\n\n";
// Add overview section
content += "## Overview\n\n";
content +=
"This repository provides resources to customize and enhance GitHub Copilot:\n\n";
content +=
"- **Agents**: Specialized GitHub Copilot agents that integrate with MCP servers\n";
content +=
"- **Prompts**: Task-specific prompts for code generation and problem-solving\n";
content +=
"- **Instructions**: Coding standards and best practices applied to specific file patterns\n";
content +=
"- **Skills**: Self-contained folders with instructions and bundled resources for specialized tasks\n\n";
// Process Agents
content += "## Agents\n\n";
for (const agent of agents) {
const description = (agent.description || "No description available").replace(/\s+/g, " ").trim();
content += `- [${agent.title}](${agent.path}): ${description}\n`;
}
content += "\n";
// Process Prompts
content += "## Prompts\n\n";
for (const prompt of prompts) {
const description = (prompt.description || "No description available").replace(/\s+/g, " ").trim();
content += `- [${prompt.title}](${prompt.path}): ${description}\n`;
}
content += "\n";
// Process Instructions
content += "## Instructions\n\n";
for (const instruction of instructions) {
const description = (instruction.description || "No description available").replace(/\s+/g, " ").trim();
content += `- [${instruction.title}](${instruction.path}): ${description}\n`;
}
content += "\n";
// Process Skills
content += "## Skills\n\n";
for (const skill of skills) {
const description = (skill.description || "No description available").replace(/\s+/g, " ").trim();
content += `- [${skill.title}](${skill.skillFile}): ${description}\n`;
}
content += "\n";
// Add documentation links
content += "## Documentation\n\n";
content +=
"- [README.md](README.md): Main documentation and getting started guide\n";
content +=
"- [CONTRIBUTING.md](CONTRIBUTING.md): Guidelines for contributing to this repository\n";
content +=
"- [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md): Community standards and expectations\n";
content += "- [SECURITY.md](SECURITY.md): Security policies and reporting\n";
content += "- [AGENTS.md](AGENTS.md): Project overview and setup commands\n\n";
// Add repository information
content += "## Repository\n\n";
content += "- **GitHub**: https://github.com/github/awesome-copilot\n";
content += "- **License**: MIT\n";
content += "- **Website**: https://github.github.io/awesome-copilot\n";
return content;
}
/**
* Main function
*/
async function main() {
console.log("Generating website data...\n");
ensureDataDir();
// Generate all data
const agentsData = generateAgentsData();
const agents = agentsData.items;
console.log(
`✓ Generated ${agents.length} agents (${agentsData.filters.models.length} models, ${agentsData.filters.tools.length} tools)`
);
const promptsData = generatePromptsData();
const prompts = promptsData.items;
console.log(
`✓ Generated ${prompts.length} prompts (${promptsData.filters.tools.length} tools)`
);
const instructionsData = generateInstructionsData();
const instructions = instructionsData.items;
console.log(
`✓ Generated ${instructions.length} instructions (${instructionsData.filters.extensions.length} extensions)`
);
const skillsData = generateSkillsData();
const skills = skillsData.items;
console.log(
`✓ Generated ${skills.length} skills (${skillsData.filters.categories.length} categories)`
);
const collectionsData = generateCollectionsData();
const collections = collectionsData.items;
console.log(
`✓ Generated ${collections.length} collections (${collectionsData.filters.tags.length} tags)`
);
const toolsData = generateToolsData();
const tools = toolsData.items;
console.log(
`✓ Generated ${tools.length} tools (${toolsData.filters.categories.length} categories)`
);
const samplesData = generateSamplesData();
console.log(
`✓ Generated ${samplesData.totalRecipes} recipes in ${samplesData.totalCookbooks} cookbooks (${samplesData.filters.languages.length} languages, ${samplesData.filters.tags.length} tags)`
);
const searchIndex = generateSearchIndex(
agents,
prompts,
instructions,
skills,
collections
);
console.log(`✓ Generated search index with ${searchIndex.length} items`);
// Write JSON files
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "agents.json"),
JSON.stringify(agentsData, null, 2)
);
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "prompts.json"),
JSON.stringify(promptsData, null, 2)
);
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "instructions.json"),
JSON.stringify(instructionsData, null, 2)
);
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "skills.json"),
JSON.stringify(skillsData, null, 2)
);
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "collections.json"),
JSON.stringify(collectionsData, null, 2)
);
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "tools.json"),
JSON.stringify(toolsData, null, 2)
);
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "samples.json"),
JSON.stringify(samplesData, null, 2)
);
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "search-index.json"),
JSON.stringify(searchIndex, null, 2)
);
// Generate a manifest with counts and timestamps
const manifest = {
generated: new Date().toISOString(),
counts: {
agents: agents.length,
prompts: prompts.length,
instructions: instructions.length,
skills: skills.length,
collections: collections.length,
tools: tools.length,
samples: samplesData.totalRecipes,
total: searchIndex.length,
},
};
fs.writeFileSync(
path.join(WEBSITE_DATA_DIR, "manifest.json"),
JSON.stringify(manifest, null, 2)
);
console.log(`\n✓ All data written to website/public/data/`);
// Generate llms.txt
const llmsTxtContent = generateLlmsTxt(agents, prompts, instructions, skills);
fs.writeFileSync(path.join(WEBSITE_DATA_DIR, "llms.txt"), llmsTxtContent, "utf8");
console.log(`✓ llms.txt generated with ${agents.length} agents, ${prompts.length} prompts, ${instructions.length} instructions, ${skills.length} skills`);
}
main().catch((err) => {
console.error("Error generating website data:", err);
process.exit(1);
});