feat: add code-tour skill — AI-generated CodeTour walkthroughs (#1277)

* feat: add code-tour skill for AI-generated CodeTour walkthroughs

* fix: trim SKILL.md from 645 to 432 lines (under 500 limit)

Reduce persona table to top 10, condense verbose examples and notes,
trim redundant anti-patterns, compress step type docs and PR recipe.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: run npm run build to update README with code-tour skill

Addresses review feedback from @aaronpowell

* fix: add missing scripts/ and references/ files referenced in SKILL.md

Addresses reviewer feedback — SKILL.md referenced bundled files
(validate_tour.py, generate_from_docs.py, codetour-schema.json,
examples.md) that were not included in the PR.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: run npm run build to update skills README with new assets

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Srinivas Vaddi
2026-04-12 19:52:59 -04:00
committed by GitHub
parent 10fda505b7
commit 09049e3b78
6 changed files with 1376 additions and 0 deletions

View File

@@ -0,0 +1,286 @@
#!/usr/bin/env python3
"""
Generate a tour skeleton from repo documentation (README, CONTRIBUTING, docs/).
Reads README.md (and optionally CONTRIBUTING.md, docs/) to extract:
- File and directory references
- Architecture / structure sections
- Setup instructions (becomes an orientation step)
- External links (becomes uri steps)
Outputs a skeleton .tour JSON that the code-tour skill fills in with descriptions.
The skill reads this skeleton and enriches it — it does NOT replace the skill's judgment.
Usage:
python generate_from_docs.py [--repo-root <path>] [--persona <persona>] [--output <file>]
Examples:
python generate_from_docs.py
python generate_from_docs.py --persona new-joiner --output .tours/from-readme.tour
python generate_from_docs.py --repo-root /path/to/repo --persona vibecoder
"""
import json
import re
import sys
import os
from pathlib import Path
from typing import Optional
# ── Markdown extraction helpers ──────────────────────────────────────────────
# Matches inline code that looks like a file/directory path
_CODE_PATH = re.compile(r"`([^`]{2,80})`")
# Matches headings
_HEADING = re.compile(r"^(#{1,3})\s+(.+)$", re.MULTILINE)
# Matches markdown links: [text](url)
_LINK = re.compile(r"\[([^\]]+)\]\((https?://[^)]+)\)")
# Patterns that suggest a path (contains / or . with extension)
_LOOKS_LIKE_PATH = re.compile(r"^\.?[\w\-]+(/[\w\-\.]+)+$|^\./|^[\w]+\.[a-z]{1,5}$")
# Architecture / structure section keywords
_STRUCT_KEYWORDS = re.compile(
r"\b(structure|architecture|layout|overview|directory|folder|module|component|"
r"design|system|organization|getting.started|quick.start|setup|installation)\b",
re.IGNORECASE,
)
def _extract_paths_from_text(text: str, repo_root: Path) -> list[str]:
"""Extract inline code that looks like real file/directory paths."""
candidates = _CODE_PATH.findall(text)
found = []
for c in candidates:
c = c.strip().lstrip("./")
if not c:
continue
if not _LOOKS_LIKE_PATH.match(c) and "/" not in c and "." not in c:
continue
# check if path actually exists
full = repo_root / c
if full.exists():
found.append(c)
return found
def _extract_external_links(text: str) -> list[tuple[str, str]]:
"""Extract [label](url) pairs for URI steps."""
links = _LINK.findall(text)
# filter out image links and very generic anchors
return [
(label, url)
for label, url in links
if not url.endswith((".png", ".jpg", ".gif", ".svg"))
and label.lower() not in ("here", "this", "link", "click", "see")
]
def _split_into_sections(text: str) -> list[tuple[str, str]]:
"""Split markdown into (heading, body) pairs."""
headings = list(_HEADING.finditer(text))
sections = []
for i, m in enumerate(headings):
heading = m.group(2).strip()
start = m.end()
end = headings[i + 1].start() if i + 1 < len(headings) else len(text)
body = text[start:end].strip()
sections.append((heading, body))
return sections
def _is_structure_section(heading: str) -> bool:
return bool(_STRUCT_KEYWORDS.search(heading))
# ── Step builders ─────────────────────────────────────────────────────────────
def _make_content_step(title: str, hint: str) -> dict:
return {
"title": title,
"description": f"[TODO: {hint}]",
}
def _make_file_step(path: str, hint: str = "") -> dict:
step = {
"file": path,
"title": f"[TODO: title for {path}]",
"description": f"[TODO: {hint or 'explain this file for the persona'}]",
}
return step
def _make_dir_step(path: str, hint: str = "") -> dict:
return {
"directory": path,
"title": f"[TODO: title for {path}/]",
"description": f"[TODO: {hint or 'explain what lives here'}]",
}
def _make_uri_step(url: str, label: str) -> dict:
return {
"uri": url,
"title": label,
"description": "[TODO: explain why this link is relevant and what the reader should notice]",
}
# ── Core generator ────────────────────────────────────────────────────────────
def generate_skeleton(repo_root: str = ".", persona: str = "new-joiner") -> dict:
repo = Path(repo_root).resolve()
# ── Read documentation files ─────────────────────────────────────────
doc_files = ["README.md", "readme.md", "Readme.md"]
extra_docs = ["CONTRIBUTING.md", "ARCHITECTURE.md", "docs/architecture.md", "docs/README.md"]
readme_text = ""
for name in doc_files:
p = repo / name
if p.exists():
readme_text = p.read_text(errors="replace")
break
extra_texts = []
for name in extra_docs:
p = repo / name
if p.exists():
extra_texts.append((name, p.read_text(errors="replace")))
all_text = readme_text + "\n".join(t for _, t in extra_texts)
# ── Collect steps ─────────────────────────────────────────────────────
steps = []
seen_paths: set[str] = set()
# 1. Intro step
steps.append(
_make_content_step(
"Welcome",
f"Introduce the repo: what it does, who this {persona} tour is for, what they'll understand after finishing.",
)
)
# 2. Parse README sections
if readme_text:
sections = _split_into_sections(readme_text)
for heading, body in sections:
# structure / architecture sections → directory steps
if _is_structure_section(heading):
paths = _extract_paths_from_text(body, repo)
for p in paths:
if p in seen_paths:
continue
seen_paths.add(p)
full = repo / p
if full.is_dir():
steps.append(_make_dir_step(p, f"mentioned under '{heading}' in README"))
elif full.is_file():
steps.append(_make_file_step(p, f"mentioned under '{heading}' in README"))
# 3. Scan all text for file/dir references not yet captured
all_paths = _extract_paths_from_text(all_text, repo)
for p in all_paths:
if p in seen_paths:
continue
seen_paths.add(p)
full = repo / p
if full.is_dir():
steps.append(_make_dir_step(p))
elif full.is_file():
steps.append(_make_file_step(p))
# 4. If very few file steps found, fall back to top-level directory scan
file_and_dir_steps = [s for s in steps if "file" in s or "directory" in s]
if len(file_and_dir_steps) < 3:
# add top-level directories
for item in sorted(repo.iterdir()):
if item.name.startswith(".") or item.name in ("node_modules", "__pycache__", ".git"):
continue
rel = str(item.relative_to(repo))
if rel in seen_paths:
continue
seen_paths.add(rel)
if item.is_dir():
steps.append(_make_dir_step(rel, "top-level directory"))
elif item.is_file() and item.suffix in (".ts", ".js", ".py", ".go", ".rs", ".java", ".rb"):
steps.append(_make_file_step(rel, "top-level source file"))
# 5. URI steps from external links in README
links = _extract_external_links(readme_text)
# Only include links that look like architecture / design references
for label, url in links[:3]: # cap at 3 to avoid noise
steps.append(_make_uri_step(url, label))
# 6. Closing step
steps.append(
_make_content_step(
"What to Explore Next",
"Summarize what the reader now understands. List 23 follow-up tours they should read next.",
)
)
# Deduplicate steps by (file/directory/uri key)
seen_keys: set = set()
deduped = []
for s in steps:
key = s.get("file") or s.get("directory") or s.get("uri") or s.get("title")
if key in seen_keys:
continue
seen_keys.add(key)
deduped.append(s)
return {
"$schema": "https://aka.ms/codetour-schema",
"title": f"[TODO: descriptive title for {persona} tour]",
"description": f"[TODO: one sentence — who this is for and what they'll understand]",
"_skeleton_generated_by": "generate_from_docs.py",
"_instructions": (
"This is a skeleton. Fill in every [TODO: ...] with real content. "
"Read each referenced file before writing its description. "
"Remove this _skeleton_generated_by and _instructions field before saving."
),
"steps": deduped,
}
def main():
args = sys.argv[1:]
if "--help" in args or "-h" in args:
print(__doc__)
sys.exit(0)
repo_root = "."
persona = "new-joiner"
output: Optional[str] = None
i = 0
while i < len(args):
if args[i] == "--repo-root" and i + 1 < len(args):
repo_root = args[i + 1]
i += 2
elif args[i] == "--persona" and i + 1 < len(args):
persona = args[i + 1]
i += 2
elif args[i] == "--output" and i + 1 < len(args):
output = args[i + 1]
i += 2
else:
i += 1
skeleton = generate_skeleton(repo_root, persona)
out_json = json.dumps(skeleton, indent=2)
if output:
Path(output).parent.mkdir(parents=True, exist_ok=True)
Path(output).write_text(out_json)
print(f"✅ Skeleton written to {output}")
print(f" {len(skeleton['steps'])} steps generated from docs")
print(f" Fill in all [TODO: ...] entries before sharing")
else:
print(out_json)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,346 @@
#!/usr/bin/env python3
"""
CodeTour validator — bundled with the code-tour skill.
Checks a .tour file for:
- Valid JSON
- Required fields (title, steps, description per step)
- File paths that actually exist in the repo
- Line numbers within file bounds
- Selection ranges within file bounds
- Directory paths that exist
- Pattern regexes that compile AND match at least one line
- URI format (must start with https://)
- nextTour matches an existing tour title in .tours/
- Content-only step count (max 2 recommended)
- Narrative arc (first step should orient, last step should close)
Usage:
python validate_tour.py <tour_file> [--repo-root <path>]
Examples:
python validate_tour.py .tours/new-joiner.tour
python validate_tour.py .tours/new-joiner.tour --repo-root /path/to/repo
"""
import json
import re
import sys
import os
from pathlib import Path
RESET = "\033[0m"
RED = "\033[31m"
YELLOW = "\033[33m"
GREEN = "\033[32m"
BOLD = "\033[1m"
DIM = "\033[2m"
def _line_count(path: Path) -> int:
try:
with open(path, errors="replace") as f:
return sum(1 for _ in f)
except Exception:
return 0
def _file_content(path: Path) -> str:
try:
return path.read_text(errors="replace")
except Exception:
return ""
def validate_tour(tour_path: str, repo_root: str = ".") -> dict:
repo = Path(repo_root).resolve()
errors = []
warnings = []
info = []
# ── 1. JSON validity ────────────────────────────────────────────────────
try:
with open(tour_path, errors="replace") as f:
tour = json.load(f)
except json.JSONDecodeError as e:
return {
"passed": False,
"errors": [f"Invalid JSON: {e}"],
"warnings": [],
"info": [],
"stats": {},
}
except FileNotFoundError:
return {
"passed": False,
"errors": [f"File not found: {tour_path}"],
"warnings": [],
"info": [],
"stats": {},
}
# ── 2. Required top-level fields ────────────────────────────────────────
if "title" not in tour:
errors.append("Missing required field: 'title'")
if "steps" not in tour:
errors.append("Missing required field: 'steps'")
return {"passed": False, "errors": errors, "warnings": warnings, "info": info, "stats": {}}
steps = tour["steps"]
if not isinstance(steps, list):
errors.append("'steps' must be an array")
return {"passed": False, "errors": errors, "warnings": warnings, "info": info, "stats": {}}
if len(steps) == 0:
errors.append("Tour has no steps")
return {"passed": False, "errors": errors, "warnings": warnings, "info": info, "stats": {}}
# ── 3. Tour-level optional fields ───────────────────────────────────────
if "nextTour" in tour:
tours_dir = Path(tour_path).parent
next_title = tour["nextTour"]
found_next = False
for tf in tours_dir.glob("*.tour"):
if tf.resolve() == Path(tour_path).resolve():
continue
try:
other = json.loads(tf.read_text())
if other.get("title") == next_title:
found_next = True
break
except Exception:
pass
if not found_next:
warnings.append(
f"nextTour '{next_title}' — no .tour file in .tours/ has a matching title"
)
# ── 4. Per-step validation ───────────────────────────────────────────────
content_only_count = 0
file_step_count = 0
dir_step_count = 0
uri_step_count = 0
for i, step in enumerate(steps):
label = f"Step {i + 1}"
if "title" in step:
label += f"{step['title']!r}"
# description required on every step
if "description" not in step:
errors.append(f"{label}: Missing required field 'description'")
has_file = "file" in step
has_dir = "directory" in step
has_uri = "uri" in step
has_selection = "selection" in step
if not has_file and not has_dir and not has_uri:
content_only_count += 1
# ── file ──────────────────────────────────────────────────────────
if has_file:
file_step_count += 1
raw_path = step["file"]
# must be relative — no leading slash, no ./
if raw_path.startswith("/"):
errors.append(f"{label}: File path must be relative (no leading /): {raw_path!r}")
elif raw_path.startswith("./"):
warnings.append(f"{label}: File path should not start with './': {raw_path!r}")
file_path = repo / raw_path
if not file_path.exists():
errors.append(f"{label}: File does not exist: {raw_path!r}")
elif not file_path.is_file():
errors.append(f"{label}: Path is not a file: {raw_path!r}")
else:
lc = _line_count(file_path)
# line number
if "line" in step:
ln = step["line"]
if not isinstance(ln, int):
errors.append(f"{label}: 'line' must be an integer, got {ln!r}")
elif ln < 1:
errors.append(f"{label}: Line number must be >= 1, got {ln}")
elif ln > lc:
errors.append(
f"{label}: Line {ln} exceeds file length ({lc} lines): {raw_path!r}"
)
# selection
if has_selection:
sel = step["selection"]
start = sel.get("start", {})
end = sel.get("end", {})
s_line = start.get("line", 0)
e_line = end.get("line", 0)
if s_line > lc:
errors.append(
f"{label}: Selection start line {s_line} exceeds file length ({lc})"
)
if e_line > lc:
errors.append(
f"{label}: Selection end line {e_line} exceeds file length ({lc})"
)
if s_line > e_line:
errors.append(
f"{label}: Selection start ({s_line}) is after end ({e_line})"
)
# pattern
if "pattern" in step:
try:
compiled = re.compile(step["pattern"], re.MULTILINE)
content = _file_content(file_path)
if not compiled.search(content):
errors.append(
f"{label}: Pattern {step['pattern']!r} matches nothing in {raw_path!r}"
)
except re.error as e:
errors.append(f"{label}: Invalid regex pattern: {e}")
# ── directory ─────────────────────────────────────────────────────
if has_dir:
dir_step_count += 1
raw_dir = step["directory"]
dir_path = repo / raw_dir
if not dir_path.exists():
errors.append(f"{label}: Directory does not exist: {raw_dir!r}")
elif not dir_path.is_dir():
errors.append(f"{label}: Path is not a directory: {raw_dir!r}")
# ── uri ───────────────────────────────────────────────────────────
if has_uri:
uri_step_count += 1
uri = step["uri"]
if not uri.startswith("https://") and not uri.startswith("http://"):
warnings.append(f"{label}: URI should start with https://: {uri!r}")
# ── commands ──────────────────────────────────────────────────────
if "commands" in step:
if not isinstance(step["commands"], list):
errors.append(f"{label}: 'commands' must be an array")
else:
for cmd in step["commands"]:
if not isinstance(cmd, str):
errors.append(f"{label}: Each command must be a string, got {cmd!r}")
# ── 5. Content-only step count ──────────────────────────────────────────
if content_only_count > 2:
warnings.append(
f"{content_only_count} content-only steps (no file/dir/uri). "
f"Recommended max: 2 (intro + closing)."
)
# ── 6. Narrative arc checks ─────────────────────────────────────────────
first = steps[0]
last = steps[-1]
first_is_orient = "file" not in first and "directory" not in first and "uri" not in first
last_is_closing = "file" not in last and "directory" not in last and "uri" not in last
if not first_is_orient and "directory" not in first:
info.append(
"First step is a file/uri step — consider starting with a content or directory "
"orientation step."
)
if not last_is_closing:
info.append(
"Last step is not a content step — consider ending with a closing/summary step."
)
stats = {
"total_steps": len(steps),
"file_steps": file_step_count,
"directory_steps": dir_step_count,
"content_steps": content_only_count,
"uri_steps": uri_step_count,
}
return {
"passed": len(errors) == 0,
"errors": errors,
"warnings": warnings,
"info": info,
"stats": stats,
}
def print_report(tour_path: str, result: dict) -> None:
title = f"{BOLD}{tour_path}{RESET}"
print(f"\n{title}")
print("" * 60)
stats = result.get("stats", {})
if stats:
parts = [
f"{stats.get('total_steps', 0)} steps",
f"{stats.get('file_steps', 0)} file",
f"{stats.get('directory_steps', 0)} dir",
f"{stats.get('content_steps', 0)} content",
f"{stats.get('uri_steps', 0)} uri",
]
print(f"{DIM} {' · '.join(parts)}{RESET}")
errors = result.get("errors", [])
warnings = result.get("warnings", [])
info = result.get("info", [])
for e in errors:
print(f" {RED}{e}{RESET}")
for w in warnings:
print(f" {YELLOW}{w}{RESET}")
for i in info:
print(f" {DIM} {i}{RESET}")
if result["passed"] and not warnings:
print(f" {GREEN}✓ All checks passed{RESET}")
elif result["passed"]:
print(f" {GREEN}✓ Passed{RESET} {YELLOW}(with warnings){RESET}")
else:
print(f" {RED}✗ Failed — {len(errors)} error(s){RESET}")
print()
def main():
args = sys.argv[1:]
if not args or args[0] in ("-h", "--help"):
print(__doc__)
sys.exit(0)
repo_root = "."
tour_files = []
i = 0
while i < len(args):
if args[i] == "--repo-root" and i + 1 < len(args):
repo_root = args[i + 1]
i += 2
else:
tour_files.append(args[i])
i += 1
if not tour_files:
# validate all tours in .tours/
tours_dir = Path(".tours")
if tours_dir.exists():
tour_files = [str(p) for p in sorted(tours_dir.glob("*.tour"))]
if not tour_files:
print("No .tour files found. Pass a file path or run from a repo with a .tours/ directory.")
sys.exit(1)
all_passed = True
for tf in tour_files:
result = validate_tour(tf, repo_root)
print_report(tf, result)
if not result["passed"]:
all_passed = False
sys.exit(0 if all_passed else 1)
if __name__ == "__main__":
main()