mirror of
https://github.com/github/awesome-copilot.git
synced 2026-04-13 11:45:56 +00:00
feat: add code-tour skill — AI-generated CodeTour walkthroughs (#1277)
* feat: add code-tour skill for AI-generated CodeTour walkthroughs * fix: trim SKILL.md from 645 to 432 lines (under 500 limit) Reduce persona table to top 10, condense verbose examples and notes, trim redundant anti-patterns, compress step type docs and PR recipe. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: run npm run build to update README with code-tour skill Addresses review feedback from @aaronpowell * fix: add missing scripts/ and references/ files referenced in SKILL.md Addresses reviewer feedback — SKILL.md referenced bundled files (validate_tour.py, generate_from_docs.py, codetour-schema.json, examples.md) that were not included in the PR. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: run npm run build to update skills README with new assets Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
286
skills/code-tour/scripts/generate_from_docs.py
Normal file
286
skills/code-tour/scripts/generate_from_docs.py
Normal file
@@ -0,0 +1,286 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a tour skeleton from repo documentation (README, CONTRIBUTING, docs/).
|
||||
|
||||
Reads README.md (and optionally CONTRIBUTING.md, docs/) to extract:
|
||||
- File and directory references
|
||||
- Architecture / structure sections
|
||||
- Setup instructions (becomes an orientation step)
|
||||
- External links (becomes uri steps)
|
||||
|
||||
Outputs a skeleton .tour JSON that the code-tour skill fills in with descriptions.
|
||||
The skill reads this skeleton and enriches it — it does NOT replace the skill's judgment.
|
||||
|
||||
Usage:
|
||||
python generate_from_docs.py [--repo-root <path>] [--persona <persona>] [--output <file>]
|
||||
|
||||
Examples:
|
||||
python generate_from_docs.py
|
||||
python generate_from_docs.py --persona new-joiner --output .tours/from-readme.tour
|
||||
python generate_from_docs.py --repo-root /path/to/repo --persona vibecoder
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ── Markdown extraction helpers ──────────────────────────────────────────────
|
||||
|
||||
# Matches inline code that looks like a file/directory path
|
||||
_CODE_PATH = re.compile(r"`([^`]{2,80})`")
|
||||
# Matches headings
|
||||
_HEADING = re.compile(r"^(#{1,3})\s+(.+)$", re.MULTILINE)
|
||||
# Matches markdown links: [text](url)
|
||||
_LINK = re.compile(r"\[([^\]]+)\]\((https?://[^)]+)\)")
|
||||
# Patterns that suggest a path (contains / or . with extension)
|
||||
_LOOKS_LIKE_PATH = re.compile(r"^\.?[\w\-]+(/[\w\-\.]+)+$|^\./|^[\w]+\.[a-z]{1,5}$")
|
||||
# Architecture / structure section keywords
|
||||
_STRUCT_KEYWORDS = re.compile(
|
||||
r"\b(structure|architecture|layout|overview|directory|folder|module|component|"
|
||||
r"design|system|organization|getting.started|quick.start|setup|installation)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _extract_paths_from_text(text: str, repo_root: Path) -> list[str]:
|
||||
"""Extract inline code that looks like real file/directory paths."""
|
||||
candidates = _CODE_PATH.findall(text)
|
||||
found = []
|
||||
for c in candidates:
|
||||
c = c.strip().lstrip("./")
|
||||
if not c:
|
||||
continue
|
||||
if not _LOOKS_LIKE_PATH.match(c) and "/" not in c and "." not in c:
|
||||
continue
|
||||
# check if path actually exists
|
||||
full = repo_root / c
|
||||
if full.exists():
|
||||
found.append(c)
|
||||
return found
|
||||
|
||||
|
||||
def _extract_external_links(text: str) -> list[tuple[str, str]]:
|
||||
"""Extract [label](url) pairs for URI steps."""
|
||||
links = _LINK.findall(text)
|
||||
# filter out image links and very generic anchors
|
||||
return [
|
||||
(label, url)
|
||||
for label, url in links
|
||||
if not url.endswith((".png", ".jpg", ".gif", ".svg"))
|
||||
and label.lower() not in ("here", "this", "link", "click", "see")
|
||||
]
|
||||
|
||||
|
||||
def _split_into_sections(text: str) -> list[tuple[str, str]]:
|
||||
"""Split markdown into (heading, body) pairs."""
|
||||
headings = list(_HEADING.finditer(text))
|
||||
sections = []
|
||||
for i, m in enumerate(headings):
|
||||
heading = m.group(2).strip()
|
||||
start = m.end()
|
||||
end = headings[i + 1].start() if i + 1 < len(headings) else len(text)
|
||||
body = text[start:end].strip()
|
||||
sections.append((heading, body))
|
||||
return sections
|
||||
|
||||
|
||||
def _is_structure_section(heading: str) -> bool:
|
||||
return bool(_STRUCT_KEYWORDS.search(heading))
|
||||
|
||||
|
||||
# ── Step builders ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _make_content_step(title: str, hint: str) -> dict:
|
||||
return {
|
||||
"title": title,
|
||||
"description": f"[TODO: {hint}]",
|
||||
}
|
||||
|
||||
|
||||
def _make_file_step(path: str, hint: str = "") -> dict:
|
||||
step = {
|
||||
"file": path,
|
||||
"title": f"[TODO: title for {path}]",
|
||||
"description": f"[TODO: {hint or 'explain this file for the persona'}]",
|
||||
}
|
||||
return step
|
||||
|
||||
|
||||
def _make_dir_step(path: str, hint: str = "") -> dict:
|
||||
return {
|
||||
"directory": path,
|
||||
"title": f"[TODO: title for {path}/]",
|
||||
"description": f"[TODO: {hint or 'explain what lives here'}]",
|
||||
}
|
||||
|
||||
|
||||
def _make_uri_step(url: str, label: str) -> dict:
|
||||
return {
|
||||
"uri": url,
|
||||
"title": label,
|
||||
"description": "[TODO: explain why this link is relevant and what the reader should notice]",
|
||||
}
|
||||
|
||||
|
||||
# ── Core generator ────────────────────────────────────────────────────────────
|
||||
|
||||
def generate_skeleton(repo_root: str = ".", persona: str = "new-joiner") -> dict:
|
||||
repo = Path(repo_root).resolve()
|
||||
|
||||
# ── Read documentation files ─────────────────────────────────────────
|
||||
doc_files = ["README.md", "readme.md", "Readme.md"]
|
||||
extra_docs = ["CONTRIBUTING.md", "ARCHITECTURE.md", "docs/architecture.md", "docs/README.md"]
|
||||
|
||||
readme_text = ""
|
||||
for name in doc_files:
|
||||
p = repo / name
|
||||
if p.exists():
|
||||
readme_text = p.read_text(errors="replace")
|
||||
break
|
||||
|
||||
extra_texts = []
|
||||
for name in extra_docs:
|
||||
p = repo / name
|
||||
if p.exists():
|
||||
extra_texts.append((name, p.read_text(errors="replace")))
|
||||
|
||||
all_text = readme_text + "\n".join(t for _, t in extra_texts)
|
||||
|
||||
# ── Collect steps ─────────────────────────────────────────────────────
|
||||
steps = []
|
||||
seen_paths: set[str] = set()
|
||||
|
||||
# 1. Intro step
|
||||
steps.append(
|
||||
_make_content_step(
|
||||
"Welcome",
|
||||
f"Introduce the repo: what it does, who this {persona} tour is for, what they'll understand after finishing.",
|
||||
)
|
||||
)
|
||||
|
||||
# 2. Parse README sections
|
||||
if readme_text:
|
||||
sections = _split_into_sections(readme_text)
|
||||
for heading, body in sections:
|
||||
# structure / architecture sections → directory steps
|
||||
if _is_structure_section(heading):
|
||||
paths = _extract_paths_from_text(body, repo)
|
||||
for p in paths:
|
||||
if p in seen_paths:
|
||||
continue
|
||||
seen_paths.add(p)
|
||||
full = repo / p
|
||||
if full.is_dir():
|
||||
steps.append(_make_dir_step(p, f"mentioned under '{heading}' in README"))
|
||||
elif full.is_file():
|
||||
steps.append(_make_file_step(p, f"mentioned under '{heading}' in README"))
|
||||
|
||||
# 3. Scan all text for file/dir references not yet captured
|
||||
all_paths = _extract_paths_from_text(all_text, repo)
|
||||
for p in all_paths:
|
||||
if p in seen_paths:
|
||||
continue
|
||||
seen_paths.add(p)
|
||||
full = repo / p
|
||||
if full.is_dir():
|
||||
steps.append(_make_dir_step(p))
|
||||
elif full.is_file():
|
||||
steps.append(_make_file_step(p))
|
||||
|
||||
# 4. If very few file steps found, fall back to top-level directory scan
|
||||
file_and_dir_steps = [s for s in steps if "file" in s or "directory" in s]
|
||||
if len(file_and_dir_steps) < 3:
|
||||
# add top-level directories
|
||||
for item in sorted(repo.iterdir()):
|
||||
if item.name.startswith(".") or item.name in ("node_modules", "__pycache__", ".git"):
|
||||
continue
|
||||
rel = str(item.relative_to(repo))
|
||||
if rel in seen_paths:
|
||||
continue
|
||||
seen_paths.add(rel)
|
||||
if item.is_dir():
|
||||
steps.append(_make_dir_step(rel, "top-level directory"))
|
||||
elif item.is_file() and item.suffix in (".ts", ".js", ".py", ".go", ".rs", ".java", ".rb"):
|
||||
steps.append(_make_file_step(rel, "top-level source file"))
|
||||
|
||||
# 5. URI steps from external links in README
|
||||
links = _extract_external_links(readme_text)
|
||||
# Only include links that look like architecture / design references
|
||||
for label, url in links[:3]: # cap at 3 to avoid noise
|
||||
steps.append(_make_uri_step(url, label))
|
||||
|
||||
# 6. Closing step
|
||||
steps.append(
|
||||
_make_content_step(
|
||||
"What to Explore Next",
|
||||
"Summarize what the reader now understands. List 2–3 follow-up tours they should read next.",
|
||||
)
|
||||
)
|
||||
|
||||
# Deduplicate steps by (file/directory/uri key)
|
||||
seen_keys: set = set()
|
||||
deduped = []
|
||||
for s in steps:
|
||||
key = s.get("file") or s.get("directory") or s.get("uri") or s.get("title")
|
||||
if key in seen_keys:
|
||||
continue
|
||||
seen_keys.add(key)
|
||||
deduped.append(s)
|
||||
|
||||
return {
|
||||
"$schema": "https://aka.ms/codetour-schema",
|
||||
"title": f"[TODO: descriptive title for {persona} tour]",
|
||||
"description": f"[TODO: one sentence — who this is for and what they'll understand]",
|
||||
"_skeleton_generated_by": "generate_from_docs.py",
|
||||
"_instructions": (
|
||||
"This is a skeleton. Fill in every [TODO: ...] with real content. "
|
||||
"Read each referenced file before writing its description. "
|
||||
"Remove this _skeleton_generated_by and _instructions field before saving."
|
||||
),
|
||||
"steps": deduped,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
args = sys.argv[1:]
|
||||
if "--help" in args or "-h" in args:
|
||||
print(__doc__)
|
||||
sys.exit(0)
|
||||
|
||||
repo_root = "."
|
||||
persona = "new-joiner"
|
||||
output: Optional[str] = None
|
||||
|
||||
i = 0
|
||||
while i < len(args):
|
||||
if args[i] == "--repo-root" and i + 1 < len(args):
|
||||
repo_root = args[i + 1]
|
||||
i += 2
|
||||
elif args[i] == "--persona" and i + 1 < len(args):
|
||||
persona = args[i + 1]
|
||||
i += 2
|
||||
elif args[i] == "--output" and i + 1 < len(args):
|
||||
output = args[i + 1]
|
||||
i += 2
|
||||
else:
|
||||
i += 1
|
||||
|
||||
skeleton = generate_skeleton(repo_root, persona)
|
||||
out_json = json.dumps(skeleton, indent=2)
|
||||
|
||||
if output:
|
||||
Path(output).parent.mkdir(parents=True, exist_ok=True)
|
||||
Path(output).write_text(out_json)
|
||||
print(f"✅ Skeleton written to {output}")
|
||||
print(f" {len(skeleton['steps'])} steps generated from docs")
|
||||
print(f" Fill in all [TODO: ...] entries before sharing")
|
||||
else:
|
||||
print(out_json)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user