Files
awesome-copilot/skills/code-tour/scripts/generate_from_docs.py
Srinivas Vaddi 09049e3b78 feat: add code-tour skill — AI-generated CodeTour walkthroughs (#1277)
* feat: add code-tour skill for AI-generated CodeTour walkthroughs

* fix: trim SKILL.md from 645 to 432 lines (under 500 limit)

Reduce persona table to top 10, condense verbose examples and notes,
trim redundant anti-patterns, compress step type docs and PR recipe.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: run npm run build to update README with code-tour skill

Addresses review feedback from @aaronpowell

* fix: add missing scripts/ and references/ files referenced in SKILL.md

Addresses reviewer feedback — SKILL.md referenced bundled files
(validate_tour.py, generate_from_docs.py, codetour-schema.json,
examples.md) that were not included in the PR.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: run npm run build to update skills README with new assets

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:52:59 +10:00

287 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Generate a tour skeleton from repo documentation (README, CONTRIBUTING, docs/).
Reads README.md (and optionally CONTRIBUTING.md, docs/) to extract:
- File and directory references
- Architecture / structure sections
- Setup instructions (becomes an orientation step)
- External links (becomes uri steps)
Outputs a skeleton .tour JSON that the code-tour skill fills in with descriptions.
The skill reads this skeleton and enriches it — it does NOT replace the skill's judgment.
Usage:
python generate_from_docs.py [--repo-root <path>] [--persona <persona>] [--output <file>]
Examples:
python generate_from_docs.py
python generate_from_docs.py --persona new-joiner --output .tours/from-readme.tour
python generate_from_docs.py --repo-root /path/to/repo --persona vibecoder
"""
import json
import re
import sys
import os
from pathlib import Path
from typing import Optional
# ── Markdown extraction helpers ──────────────────────────────────────────────
# Matches inline code that looks like a file/directory path
_CODE_PATH = re.compile(r"`([^`]{2,80})`")
# Matches headings
_HEADING = re.compile(r"^(#{1,3})\s+(.+)$", re.MULTILINE)
# Matches markdown links: [text](url)
_LINK = re.compile(r"\[([^\]]+)\]\((https?://[^)]+)\)")
# Patterns that suggest a path (contains / or . with extension)
_LOOKS_LIKE_PATH = re.compile(r"^\.?[\w\-]+(/[\w\-\.]+)+$|^\./|^[\w]+\.[a-z]{1,5}$")
# Architecture / structure section keywords
_STRUCT_KEYWORDS = re.compile(
r"\b(structure|architecture|layout|overview|directory|folder|module|component|"
r"design|system|organization|getting.started|quick.start|setup|installation)\b",
re.IGNORECASE,
)
def _extract_paths_from_text(text: str, repo_root: Path) -> list[str]:
"""Extract inline code that looks like real file/directory paths."""
candidates = _CODE_PATH.findall(text)
found = []
for c in candidates:
c = c.strip().lstrip("./")
if not c:
continue
if not _LOOKS_LIKE_PATH.match(c) and "/" not in c and "." not in c:
continue
# check if path actually exists
full = repo_root / c
if full.exists():
found.append(c)
return found
def _extract_external_links(text: str) -> list[tuple[str, str]]:
"""Extract [label](url) pairs for URI steps."""
links = _LINK.findall(text)
# filter out image links and very generic anchors
return [
(label, url)
for label, url in links
if not url.endswith((".png", ".jpg", ".gif", ".svg"))
and label.lower() not in ("here", "this", "link", "click", "see")
]
def _split_into_sections(text: str) -> list[tuple[str, str]]:
"""Split markdown into (heading, body) pairs."""
headings = list(_HEADING.finditer(text))
sections = []
for i, m in enumerate(headings):
heading = m.group(2).strip()
start = m.end()
end = headings[i + 1].start() if i + 1 < len(headings) else len(text)
body = text[start:end].strip()
sections.append((heading, body))
return sections
def _is_structure_section(heading: str) -> bool:
return bool(_STRUCT_KEYWORDS.search(heading))
# ── Step builders ─────────────────────────────────────────────────────────────
def _make_content_step(title: str, hint: str) -> dict:
return {
"title": title,
"description": f"[TODO: {hint}]",
}
def _make_file_step(path: str, hint: str = "") -> dict:
step = {
"file": path,
"title": f"[TODO: title for {path}]",
"description": f"[TODO: {hint or 'explain this file for the persona'}]",
}
return step
def _make_dir_step(path: str, hint: str = "") -> dict:
return {
"directory": path,
"title": f"[TODO: title for {path}/]",
"description": f"[TODO: {hint or 'explain what lives here'}]",
}
def _make_uri_step(url: str, label: str) -> dict:
return {
"uri": url,
"title": label,
"description": "[TODO: explain why this link is relevant and what the reader should notice]",
}
# ── Core generator ────────────────────────────────────────────────────────────
def generate_skeleton(repo_root: str = ".", persona: str = "new-joiner") -> dict:
repo = Path(repo_root).resolve()
# ── Read documentation files ─────────────────────────────────────────
doc_files = ["README.md", "readme.md", "Readme.md"]
extra_docs = ["CONTRIBUTING.md", "ARCHITECTURE.md", "docs/architecture.md", "docs/README.md"]
readme_text = ""
for name in doc_files:
p = repo / name
if p.exists():
readme_text = p.read_text(errors="replace")
break
extra_texts = []
for name in extra_docs:
p = repo / name
if p.exists():
extra_texts.append((name, p.read_text(errors="replace")))
all_text = readme_text + "\n".join(t for _, t in extra_texts)
# ── Collect steps ─────────────────────────────────────────────────────
steps = []
seen_paths: set[str] = set()
# 1. Intro step
steps.append(
_make_content_step(
"Welcome",
f"Introduce the repo: what it does, who this {persona} tour is for, what they'll understand after finishing.",
)
)
# 2. Parse README sections
if readme_text:
sections = _split_into_sections(readme_text)
for heading, body in sections:
# structure / architecture sections → directory steps
if _is_structure_section(heading):
paths = _extract_paths_from_text(body, repo)
for p in paths:
if p in seen_paths:
continue
seen_paths.add(p)
full = repo / p
if full.is_dir():
steps.append(_make_dir_step(p, f"mentioned under '{heading}' in README"))
elif full.is_file():
steps.append(_make_file_step(p, f"mentioned under '{heading}' in README"))
# 3. Scan all text for file/dir references not yet captured
all_paths = _extract_paths_from_text(all_text, repo)
for p in all_paths:
if p in seen_paths:
continue
seen_paths.add(p)
full = repo / p
if full.is_dir():
steps.append(_make_dir_step(p))
elif full.is_file():
steps.append(_make_file_step(p))
# 4. If very few file steps found, fall back to top-level directory scan
file_and_dir_steps = [s for s in steps if "file" in s or "directory" in s]
if len(file_and_dir_steps) < 3:
# add top-level directories
for item in sorted(repo.iterdir()):
if item.name.startswith(".") or item.name in ("node_modules", "__pycache__", ".git"):
continue
rel = str(item.relative_to(repo))
if rel in seen_paths:
continue
seen_paths.add(rel)
if item.is_dir():
steps.append(_make_dir_step(rel, "top-level directory"))
elif item.is_file() and item.suffix in (".ts", ".js", ".py", ".go", ".rs", ".java", ".rb"):
steps.append(_make_file_step(rel, "top-level source file"))
# 5. URI steps from external links in README
links = _extract_external_links(readme_text)
# Only include links that look like architecture / design references
for label, url in links[:3]: # cap at 3 to avoid noise
steps.append(_make_uri_step(url, label))
# 6. Closing step
steps.append(
_make_content_step(
"What to Explore Next",
"Summarize what the reader now understands. List 23 follow-up tours they should read next.",
)
)
# Deduplicate steps by (file/directory/uri key)
seen_keys: set = set()
deduped = []
for s in steps:
key = s.get("file") or s.get("directory") or s.get("uri") or s.get("title")
if key in seen_keys:
continue
seen_keys.add(key)
deduped.append(s)
return {
"$schema": "https://aka.ms/codetour-schema",
"title": f"[TODO: descriptive title for {persona} tour]",
"description": f"[TODO: one sentence — who this is for and what they'll understand]",
"_skeleton_generated_by": "generate_from_docs.py",
"_instructions": (
"This is a skeleton. Fill in every [TODO: ...] with real content. "
"Read each referenced file before writing its description. "
"Remove this _skeleton_generated_by and _instructions field before saving."
),
"steps": deduped,
}
def main():
args = sys.argv[1:]
if "--help" in args or "-h" in args:
print(__doc__)
sys.exit(0)
repo_root = "."
persona = "new-joiner"
output: Optional[str] = None
i = 0
while i < len(args):
if args[i] == "--repo-root" and i + 1 < len(args):
repo_root = args[i + 1]
i += 2
elif args[i] == "--persona" and i + 1 < len(args):
persona = args[i + 1]
i += 2
elif args[i] == "--output" and i + 1 < len(args):
output = args[i + 1]
i += 2
else:
i += 1
skeleton = generate_skeleton(repo_root, persona)
out_json = json.dumps(skeleton, indent=2)
if output:
Path(output).parent.mkdir(parents=True, exist_ok=True)
Path(output).write_text(out_json)
print(f"✅ Skeleton written to {output}")
print(f" {len(skeleton['steps'])} steps generated from docs")
print(f" Fill in all [TODO: ...] entries before sharing")
else:
print(out_json)
if __name__ == "__main__":
main()