feat: add code-tour skill — AI-generated CodeTour walkthroughs (#1277)

* feat: add code-tour skill for AI-generated CodeTour walkthroughs * fix: trim SKILL.md from 645 to 432 lines (under 500 limit) Reduce persona table to top 10, condense verbose examples and notes, trim redundant anti-patterns, compress step type docs and PR recipe. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: run npm run build to update README with code-tour skill Addresses review feedback from @aaronpowell * fix: add missing scripts/ and references/ files referenced in SKILL.md Addresses reviewer feedback — SKILL.md referenced bundled files (validate_tour.py, generate_from_docs.py, codetour-schema.json, examples.md) that were not included in the PR. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: run npm run build to update skills README with new assets Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-29 18:11:45 +00:00 · 2026-04-12 19:52:59 -04:00
parent 10fda505b7
commit 09049e3b78
6 changed files with 1376 additions and 0 deletions
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+"""
+Generate a tour skeleton from repo documentation (README, CONTRIBUTING, docs/).
+
+Reads README.md (and optionally CONTRIBUTING.md, docs/) to extract:
+  - File and directory references
+  - Architecture / structure sections
+  - Setup instructions (becomes an orientation step)
+  - External links (becomes uri steps)
+
+Outputs a skeleton .tour JSON that the code-tour skill fills in with descriptions.
+The skill reads this skeleton and enriches it — it does NOT replace the skill's judgment.
+
+Usage:
+    python generate_from_docs.py [--repo-root <path>] [--persona <persona>] [--output <file>]
+
+Examples:
+    python generate_from_docs.py
+    python generate_from_docs.py --persona new-joiner --output .tours/from-readme.tour
+    python generate_from_docs.py --repo-root /path/to/repo --persona vibecoder
+"""
+
+import json
+import re
+import sys
+import os
+from pathlib import Path
+from typing import Optional
+
+
+# ── Markdown extraction helpers ──────────────────────────────────────────────
+
+# Matches inline code that looks like a file/directory path
+_CODE_PATH = re.compile(r"`([^`]{2,80})`")
+# Matches headings
+_HEADING = re.compile(r"^(#{1,3})\s+(.+)$", re.MULTILINE)
+# Matches markdown links: [text](url)
+_LINK = re.compile(r"\[([^\]]+)\]\((https?://[^)]+)\)")
+# Patterns that suggest a path (contains / or . with extension)
+_LOOKS_LIKE_PATH = re.compile(r"^\.?[\w\-]+(/[\w\-\.]+)+$|^\./|^[\w]+\.[a-z]{1,5}$")
+# Architecture / structure section keywords
+_STRUCT_KEYWORDS = re.compile(
+    r"\b(structure|architecture|layout|overview|directory|folder|module|component|"
+    r"design|system|organization|getting.started|quick.start|setup|installation)\b",
+    re.IGNORECASE,
+)
+
+
+def _extract_paths_from_text(text: str, repo_root: Path) -> list[str]:
+    """Extract inline code that looks like real file/directory paths."""
+    candidates = _CODE_PATH.findall(text)
+    found = []
+    for c in candidates:
+        c = c.strip().lstrip("./")
+        if not c:
+            continue
+        if not _LOOKS_LIKE_PATH.match(c) and "/" not in c and "." not in c:
+            continue
+        # check if path actually exists
+        full = repo_root / c
+        if full.exists():
+            found.append(c)
+    return found
+
+
+def _extract_external_links(text: str) -> list[tuple[str, str]]:
+    """Extract [label](url) pairs for URI steps."""
+    links = _LINK.findall(text)
+    # filter out image links and very generic anchors
+    return [
+        (label, url)
+        for label, url in links
+        if not url.endswith((".png", ".jpg", ".gif", ".svg"))
+        and label.lower() not in ("here", "this", "link", "click", "see")
+    ]
+
+
+def _split_into_sections(text: str) -> list[tuple[str, str]]:
+    """Split markdown into (heading, body) pairs."""
+    headings = list(_HEADING.finditer(text))
+    sections = []
+    for i, m in enumerate(headings):
+        heading = m.group(2).strip()
+        start = m.end()
+        end = headings[i + 1].start() if i + 1 < len(headings) else len(text)
+        body = text[start:end].strip()
+        sections.append((heading, body))
+    return sections
+
+
+def _is_structure_section(heading: str) -> bool:
+    return bool(_STRUCT_KEYWORDS.search(heading))
+
+
+# ── Step builders ─────────────────────────────────────────────────────────────
+
+def _make_content_step(title: str, hint: str) -> dict:
+    return {
+        "title": title,
+        "description": f"[TODO: {hint}]",
+    }
+
+
+def _make_file_step(path: str, hint: str = "") -> dict:
+    step = {
+        "file": path,
+        "title": f"[TODO: title for {path}]",
+        "description": f"[TODO: {hint or 'explain this file for the persona'}]",
+    }
+    return step
+
+
+def _make_dir_step(path: str, hint: str = "") -> dict:
+    return {
+        "directory": path,
+        "title": f"[TODO: title for {path}/]",
+        "description": f"[TODO: {hint or 'explain what lives here'}]",
+    }
+
+
+def _make_uri_step(url: str, label: str) -> dict:
+    return {
+        "uri": url,
+        "title": label,
+        "description": "[TODO: explain why this link is relevant and what the reader should notice]",
+    }
+
+
+# ── Core generator ────────────────────────────────────────────────────────────
+
+def generate_skeleton(repo_root: str = ".", persona: str = "new-joiner") -> dict:
+    repo = Path(repo_root).resolve()
+
+    # ── Read documentation files ─────────────────────────────────────────
+    doc_files = ["README.md", "readme.md", "Readme.md"]
+    extra_docs = ["CONTRIBUTING.md", "ARCHITECTURE.md", "docs/architecture.md", "docs/README.md"]
+
+    readme_text = ""
+    for name in doc_files:
+        p = repo / name
+        if p.exists():
+            readme_text = p.read_text(errors="replace")
+            break
+
+    extra_texts = []
+    for name in extra_docs:
+        p = repo / name
+        if p.exists():
+            extra_texts.append((name, p.read_text(errors="replace")))
+
+    all_text = readme_text + "\n".join(t for _, t in extra_texts)
+
+    # ── Collect steps ─────────────────────────────────────────────────────
+    steps = []
+    seen_paths: set[str] = set()
+
+    # 1. Intro step
+    steps.append(
+        _make_content_step(
+            "Welcome",
+            f"Introduce the repo: what it does, who this {persona} tour is for, what they'll understand after finishing.",
+        )
+    )
+
+    # 2. Parse README sections
+    if readme_text:
+        sections = _split_into_sections(readme_text)
+        for heading, body in sections:
+            # structure / architecture sections → directory steps
+            if _is_structure_section(heading):
+                paths = _extract_paths_from_text(body, repo)
+                for p in paths:
+                    if p in seen_paths:
+                        continue
+                    seen_paths.add(p)
+                    full = repo / p
+                    if full.is_dir():
+                        steps.append(_make_dir_step(p, f"mentioned under '{heading}' in README"))
+                    elif full.is_file():
+                        steps.append(_make_file_step(p, f"mentioned under '{heading}' in README"))
+
+    # 3. Scan all text for file/dir references not yet captured
+    all_paths = _extract_paths_from_text(all_text, repo)
+    for p in all_paths:
+        if p in seen_paths:
+            continue
+        seen_paths.add(p)
+        full = repo / p
+        if full.is_dir():
+            steps.append(_make_dir_step(p))
+        elif full.is_file():
+            steps.append(_make_file_step(p))
+
+    # 4. If very few file steps found, fall back to top-level directory scan
+    file_and_dir_steps = [s for s in steps if "file" in s or "directory" in s]
+    if len(file_and_dir_steps) < 3:
+        # add top-level directories
+        for item in sorted(repo.iterdir()):
+            if item.name.startswith(".") or item.name in ("node_modules", "__pycache__", ".git"):
+                continue
+            rel = str(item.relative_to(repo))
+            if rel in seen_paths:
+                continue
+            seen_paths.add(rel)
+            if item.is_dir():
+                steps.append(_make_dir_step(rel, "top-level directory"))
+            elif item.is_file() and item.suffix in (".ts", ".js", ".py", ".go", ".rs", ".java", ".rb"):
+                steps.append(_make_file_step(rel, "top-level source file"))
+
+    # 5. URI steps from external links in README
+    links = _extract_external_links(readme_text)
+    # Only include links that look like architecture / design references
+    for label, url in links[:3]:  # cap at 3 to avoid noise
+        steps.append(_make_uri_step(url, label))
+
+    # 6. Closing step
+    steps.append(
+        _make_content_step(
+            "What to Explore Next",
+            "Summarize what the reader now understands. List 2–3 follow-up tours they should read next.",
+        )
+    )
+
+    # Deduplicate steps by (file/directory/uri key)
+    seen_keys: set = set()
+    deduped = []
+    for s in steps:
+        key = s.get("file") or s.get("directory") or s.get("uri") or s.get("title")
+        if key in seen_keys:
+            continue
+        seen_keys.add(key)
+        deduped.append(s)
+
+    return {
+        "$schema": "https://aka.ms/codetour-schema",
+        "title": f"[TODO: descriptive title for {persona} tour]",
+        "description": f"[TODO: one sentence — who this is for and what they'll understand]",
+        "_skeleton_generated_by": "generate_from_docs.py",
+        "_instructions": (
+            "This is a skeleton. Fill in every [TODO: ...] with real content. "
+            "Read each referenced file before writing its description. "
+            "Remove this _skeleton_generated_by and _instructions field before saving."
+        ),
+        "steps": deduped,
+    }
+
+
+def main():
+    args = sys.argv[1:]
+    if "--help" in args or "-h" in args:
+        print(__doc__)
+        sys.exit(0)
+
+    repo_root = "."
+    persona = "new-joiner"
+    output: Optional[str] = None
+
+    i = 0
+    while i < len(args):
+        if args[i] == "--repo-root" and i + 1 < len(args):
+            repo_root = args[i + 1]
+            i += 2
+        elif args[i] == "--persona" and i + 1 < len(args):
+            persona = args[i + 1]
+            i += 2
+        elif args[i] == "--output" and i + 1 < len(args):
+            output = args[i + 1]
+            i += 2
+        else:
+            i += 1
+
+    skeleton = generate_skeleton(repo_root, persona)
+    out_json = json.dumps(skeleton, indent=2)
+
+    if output:
+        Path(output).parent.mkdir(parents=True, exist_ok=True)
+        Path(output).write_text(out_json)
+        print(f"✅ Skeleton written to {output}")
+        print(f"   {len(skeleton['steps'])} steps generated from docs")
+        print(f"   Fill in all [TODO: ...] entries before sharing")
+    else:
+        print(out_json)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,346 @@
+#!/usr/bin/env python3
+"""
+CodeTour validator — bundled with the code-tour skill.
+
+Checks a .tour file for:
+  - Valid JSON
+  - Required fields (title, steps, description per step)
+  - File paths that actually exist in the repo
+  - Line numbers within file bounds
+  - Selection ranges within file bounds
+  - Directory paths that exist
+  - Pattern regexes that compile AND match at least one line
+  - URI format (must start with https://)
+  - nextTour matches an existing tour title in .tours/
+  - Content-only step count (max 2 recommended)
+  - Narrative arc (first step should orient, last step should close)
+
+Usage:
+    python validate_tour.py <tour_file> [--repo-root <path>]
+
+Examples:
+    python validate_tour.py .tours/new-joiner.tour
+    python validate_tour.py .tours/new-joiner.tour --repo-root /path/to/repo
+"""
+
+import json
+import re
+import sys
+import os
+from pathlib import Path
+
+
+RESET = "\033[0m"
+RED = "\033[31m"
+YELLOW = "\033[33m"
+GREEN = "\033[32m"
+BOLD = "\033[1m"
+DIM = "\033[2m"
+
+
+def _line_count(path: Path) -> int:
+    try:
+        with open(path, errors="replace") as f:
+            return sum(1 for _ in f)
+    except Exception:
+        return 0
+
+
+def _file_content(path: Path) -> str:
+    try:
+        return path.read_text(errors="replace")
+    except Exception:
+        return ""
+
+
+def validate_tour(tour_path: str, repo_root: str = ".") -> dict:
+    repo = Path(repo_root).resolve()
+    errors = []
+    warnings = []
+    info = []
+
+    # ── 1. JSON validity ────────────────────────────────────────────────────
+    try:
+        with open(tour_path, errors="replace") as f:
+            tour = json.load(f)
+    except json.JSONDecodeError as e:
+        return {
+            "passed": False,
+            "errors": [f"Invalid JSON: {e}"],
+            "warnings": [],
+            "info": [],
+            "stats": {},
+        }
+    except FileNotFoundError:
+        return {
+            "passed": False,
+            "errors": [f"File not found: {tour_path}"],
+            "warnings": [],
+            "info": [],
+            "stats": {},
+        }
+
+    # ── 2. Required top-level fields ────────────────────────────────────────
+    if "title" not in tour:
+        errors.append("Missing required field: 'title'")
+    if "steps" not in tour:
+        errors.append("Missing required field: 'steps'")
+        return {"passed": False, "errors": errors, "warnings": warnings, "info": info, "stats": {}}
+
+    steps = tour["steps"]
+    if not isinstance(steps, list):
+        errors.append("'steps' must be an array")
+        return {"passed": False, "errors": errors, "warnings": warnings, "info": info, "stats": {}}
+
+    if len(steps) == 0:
+        errors.append("Tour has no steps")
+        return {"passed": False, "errors": errors, "warnings": warnings, "info": info, "stats": {}}
+
+    # ── 3. Tour-level optional fields ───────────────────────────────────────
+    if "nextTour" in tour:
+        tours_dir = Path(tour_path).parent
+        next_title = tour["nextTour"]
+        found_next = False
+        for tf in tours_dir.glob("*.tour"):
+            if tf.resolve() == Path(tour_path).resolve():
+                continue
+            try:
+                other = json.loads(tf.read_text())
+                if other.get("title") == next_title:
+                    found_next = True
+                    break
+            except Exception:
+                pass
+        if not found_next:
+            warnings.append(
+                f"nextTour '{next_title}' — no .tour file in .tours/ has a matching title"
+            )
+
+    # ── 4. Per-step validation ───────────────────────────────────────────────
+    content_only_count = 0
+    file_step_count = 0
+    dir_step_count = 0
+    uri_step_count = 0
+
+    for i, step in enumerate(steps):
+        label = f"Step {i + 1}"
+        if "title" in step:
+            label += f" — {step['title']!r}"
+
+        # description required on every step
+        if "description" not in step:
+            errors.append(f"{label}: Missing required field 'description'")
+
+        has_file = "file" in step
+        has_dir = "directory" in step
+        has_uri = "uri" in step
+        has_selection = "selection" in step
+
+        if not has_file and not has_dir and not has_uri:
+            content_only_count += 1
+
+        # ── file ──────────────────────────────────────────────────────────
+        if has_file:
+            file_step_count += 1
+            raw_path = step["file"]
+
+            # must be relative — no leading slash, no ./
+            if raw_path.startswith("/"):
+                errors.append(f"{label}: File path must be relative (no leading /): {raw_path!r}")
+            elif raw_path.startswith("./"):
+                warnings.append(f"{label}: File path should not start with './': {raw_path!r}")
+
+            file_path = repo / raw_path
+            if not file_path.exists():
+                errors.append(f"{label}: File does not exist: {raw_path!r}")
+            elif not file_path.is_file():
+                errors.append(f"{label}: Path is not a file: {raw_path!r}")
+            else:
+                lc = _line_count(file_path)
+
+                # line number
+                if "line" in step:
+                    ln = step["line"]
+                    if not isinstance(ln, int):
+                        errors.append(f"{label}: 'line' must be an integer, got {ln!r}")
+                    elif ln < 1:
+                        errors.append(f"{label}: Line number must be >= 1, got {ln}")
+                    elif ln > lc:
+                        errors.append(
+                            f"{label}: Line {ln} exceeds file length ({lc} lines): {raw_path!r}"
+                        )
+
+                # selection
+                if has_selection:
+                    sel = step["selection"]
+                    start = sel.get("start", {})
+                    end = sel.get("end", {})
+                    s_line = start.get("line", 0)
+                    e_line = end.get("line", 0)
+                    if s_line > lc:
+                        errors.append(
+                            f"{label}: Selection start line {s_line} exceeds file length ({lc})"
+                        )
+                    if e_line > lc:
+                        errors.append(
+                            f"{label}: Selection end line {e_line} exceeds file length ({lc})"
+                        )
+                    if s_line > e_line:
+                        errors.append(
+                            f"{label}: Selection start ({s_line}) is after end ({e_line})"
+                        )
+
+                # pattern
+                if "pattern" in step:
+                    try:
+                        compiled = re.compile(step["pattern"], re.MULTILINE)
+                        content = _file_content(file_path)
+                        if not compiled.search(content):
+                            errors.append(
+                                f"{label}: Pattern {step['pattern']!r} matches nothing in {raw_path!r}"
+                            )
+                    except re.error as e:
+                        errors.append(f"{label}: Invalid regex pattern: {e}")
+
+        # ── directory ─────────────────────────────────────────────────────
+        if has_dir:
+            dir_step_count += 1
+            raw_dir = step["directory"]
+            dir_path = repo / raw_dir
+            if not dir_path.exists():
+                errors.append(f"{label}: Directory does not exist: {raw_dir!r}")
+            elif not dir_path.is_dir():
+                errors.append(f"{label}: Path is not a directory: {raw_dir!r}")
+
+        # ── uri ───────────────────────────────────────────────────────────
+        if has_uri:
+            uri_step_count += 1
+            uri = step["uri"]
+            if not uri.startswith("https://") and not uri.startswith("http://"):
+                warnings.append(f"{label}: URI should start with https://: {uri!r}")
+
+        # ── commands ──────────────────────────────────────────────────────
+        if "commands" in step:
+            if not isinstance(step["commands"], list):
+                errors.append(f"{label}: 'commands' must be an array")
+            else:
+                for cmd in step["commands"]:
+                    if not isinstance(cmd, str):
+                        errors.append(f"{label}: Each command must be a string, got {cmd!r}")
+
+    # ── 5. Content-only step count ──────────────────────────────────────────
+    if content_only_count > 2:
+        warnings.append(
+            f"{content_only_count} content-only steps (no file/dir/uri). "
+            f"Recommended max: 2 (intro + closing)."
+        )
+
+    # ── 6. Narrative arc checks ─────────────────────────────────────────────
+    first = steps[0]
+    last = steps[-1]
+    first_is_orient = "file" not in first and "directory" not in first and "uri" not in first
+    last_is_closing = "file" not in last and "directory" not in last and "uri" not in last
+
+    if not first_is_orient and "directory" not in first:
+        info.append(
+            "First step is a file/uri step — consider starting with a content or directory "
+            "orientation step."
+        )
+    if not last_is_closing:
+        info.append(
+            "Last step is not a content step — consider ending with a closing/summary step."
+        )
+
+    stats = {
+        "total_steps": len(steps),
+        "file_steps": file_step_count,
+        "directory_steps": dir_step_count,
+        "content_steps": content_only_count,
+        "uri_steps": uri_step_count,
+    }
+
+    return {
+        "passed": len(errors) == 0,
+        "errors": errors,
+        "warnings": warnings,
+        "info": info,
+        "stats": stats,
+    }
+
+
+def print_report(tour_path: str, result: dict) -> None:
+    title = f"{BOLD}{tour_path}{RESET}"
+    print(f"\n{title}")
+    print("─" * 60)
+
+    stats = result.get("stats", {})
+    if stats:
+        parts = [
+            f"{stats.get('total_steps', 0)} steps",
+            f"{stats.get('file_steps', 0)} file",
+            f"{stats.get('directory_steps', 0)} dir",
+            f"{stats.get('content_steps', 0)} content",
+            f"{stats.get('uri_steps', 0)} uri",
+        ]
+        print(f"{DIM}  {' · '.join(parts)}{RESET}")
+
+    errors = result.get("errors", [])
+    warnings = result.get("warnings", [])
+    info = result.get("info", [])
+
+    for e in errors:
+        print(f"  {RED}✗ {e}{RESET}")
+    for w in warnings:
+        print(f"  {YELLOW}⚠ {w}{RESET}")
+    for i in info:
+        print(f"  {DIM}ℹ {i}{RESET}")
+
+    if result["passed"] and not warnings:
+        print(f"  {GREEN}✓ All checks passed{RESET}")
+    elif result["passed"]:
+        print(f"  {GREEN}✓ Passed{RESET} {YELLOW}(with warnings){RESET}")
+    else:
+        print(f"  {RED}✗ Failed — {len(errors)} error(s){RESET}")
+
+    print()
+
+
+def main():
+    args = sys.argv[1:]
+    if not args or args[0] in ("-h", "--help"):
+        print(__doc__)
+        sys.exit(0)
+
+    repo_root = "."
+    tour_files = []
+
+    i = 0
+    while i < len(args):
+        if args[i] == "--repo-root" and i + 1 < len(args):
+            repo_root = args[i + 1]
+            i += 2
+        else:
+            tour_files.append(args[i])
+            i += 1
+
+    if not tour_files:
+        # validate all tours in .tours/
+        tours_dir = Path(".tours")
+        if tours_dir.exists():
+            tour_files = [str(p) for p in sorted(tours_dir.glob("*.tour"))]
+        if not tour_files:
+            print("No .tour files found. Pass a file path or run from a repo with a .tours/ directory.")
+            sys.exit(1)
+
+    all_passed = True
+    for tf in tour_files:
+        result = validate_tour(tf, repo_root)
+        print_report(tf, result)
+        if not result["passed"]:
+            all_passed = False
+
+    sys.exit(0 if all_passed else 1)
+
+
+if __name__ == "__main__":
+    main()