awesome-copilot/eng/contributor-report.mjs

/**
 * Generate human-readable reports about missing contributors.
 * This module queries merged PRs via 'gh' and produces a markdown report.
 */
import { execSync } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { setupGracefulShutdown } from './utils/graceful-shutdown.mjs';

const DEFAULT_CMD_TIMEOUT = 30_000; // 30s

setupGracefulShutdown('contributor-report');

/**
 * Patterns that represent generated files; contributors should not be credited
 * for these files because they are not substantive authored content.
 */
export const AUTO_GENERATED_PATTERNS = [
  'README.md',
  'README.*.md',
  'collections/*.md',
  'collections/*.collection.md',
  'docs/README.*.md',
  'docs/*.generated.md'
];

/**
 * File globs used to infer contribution types from file paths.
 */
export const TYPE_PATTERNS = {
  instructions: [
    'instructions/*.instructions.md'
  ],
  prompts: [
    'prompts/*.prompt.md'
  ],
  agents: [
    'chatmodes/*.chatmode.md',
    'agents/*.agent.md'
  ],
  collections: [
    'collections/*.collection.yml'
  ],
  doc: [
    'docs/**/*.md',
    '.github/**/*.md',
    'CONTRIBUTING.md',
    'SECURITY.md',
    'SUPPORT.md',
    'LICENSE.md',
    'CHANGELOG.md',
    '*.md'
  ],
  infra: [
    '.github/workflows/**/*.yml',
    '.github/workflows/**/*.yaml',
    '**/*.yml',
    '**/*.yaml'
  ],
  maintenance: [
    'package*.json',
    '*.config.js',
    'tsconfig*.json'
  ],
  code: [
    '**/*.{js,ts,mjs,cjs}',
    '**/*.py'
  ]
};

const globCache = new Map();

/**
 * Convert a simple glob (with *, **) to a RegExp.
 * This is intentionally small and deterministic for our repo patterns.
 * @param {string} pattern
 * @returns {RegExp}
 */
export const globToRegExp = (pattern) => {
  const DOUBLE_WILDCARD_PLACEHOLDER = '§§DOUBLE§§';
  const replacements = [
    { pattern: /\\/g, replacement: '/' },
    { pattern: /\./g, replacement: String.raw`\.` },
    { pattern: /\*\*/g, replacement: DOUBLE_WILDCARD_PLACEHOLDER },
    { pattern: /\*/g, replacement: '[^/]*' },
    { pattern: new RegExp(DOUBLE_WILDCARD_PLACEHOLDER, 'g'), replacement: '.*' },
    { pattern: /\?/g, replacement: '.' },
    { pattern: /\//g, replacement: String.raw`\/` }
  ];

  const normalized = replacements.reduce((acc, { pattern, replacement }) => acc.replace(pattern, replacement), String(pattern));

  return new RegExp(`^${normalized}$`);
};

/**
 * Test whether a file path matches a glob pattern.
 * @param {string} filePath
 * @param {string} pattern
 * @returns {boolean}
 */
export const matchGlob = (filePath, pattern) => {
  if (!globCache.has(pattern)) {
    try {
      globCache.set(pattern, globToRegExp(pattern));
    } catch {
      globCache.set(pattern, null);
    }
  }

  const regexp = globCache.get(pattern);
  if (!regexp) {
    return false;
  }

  const normalized = filePath.replace(/\\/g, '/');
  return regexp.test(normalized);
};

/**
 * Return true if the given path matches one of the known auto-generated patterns.
 * @param {string} filePath
 * @returns {boolean}
 */
export const isAutoGeneratedFile = (filePath) => {
  return AUTO_GENERATED_PATTERNS.some((pattern) => matchGlob(filePath, pattern));
};

/**
 * Infer a contribution type string (e.g. 'prompts', 'agents', 'doc') for a file path.
 * Returns null if no specific type matched.
 * @param {string} filePath
 * @returns {string|null}
 */
export const getFileContributionType = (filePath) => {
  const normalized = filePath.replace(/\\/g, '/');

  for (const [type, patterns] of Object.entries(TYPE_PATTERNS)) {
    if (patterns.some((pattern) => matchGlob(normalized, pattern))) {
      return type;
    }
  }

  return null;
};

/**
 * Derive a comma-separated list of contribution type identifiers from a list of files.
 * Auto-generated files are ignored. Returns '' when no files to process.
 * @param {string[]} files
 * @returns {string}
 */
export const getContributionTypes = (files) => {
  const types = new Set();
  let processed = 0;

  for (const file of files) {
    if (isAutoGeneratedFile(file)) {
      continue;
    }

    processed += 1;
    const type = getFileContributionType(file);
    if (type) {
      types.add(type);
    }
  }

  if (processed === 0) {
    return '';
  }

  if (types.size === 0) {
    types.add('code');
  }

  return Array.from(types).sort((a, b) => a.localeCompare(b)).join(',');
};

/**
 * Check .all-contributors output to discover missing contributors.
 * This is the canonical implementation used by contributor tooling.
 * @returns {string[]}
 */
export const getMissingContributors = () => {
  try {
    console.log('🔍 Checking for missing contributors...');

    const configPath = path.join(process.cwd(), '.all-contributorsrc');
    const config = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
    const ignoreEntries = config.ignoreList || config.ignore || [];
    const ignoreSet = new Set(ignoreEntries.map((entry) => entry.toLowerCase()));

    if (ignoreSet.size > 0) {
      console.log(`📋 Loaded ignore list: ${Array.from(ignoreSet).join(', ')}`);
    }

    const output = execSync('npx all-contributors check', {
      encoding: 'utf8',
      stdio: ['pipe', 'pipe', 'pipe'],
      timeout: DEFAULT_CMD_TIMEOUT
    });

    const lines = output.split('\n');

    const headerLineIndex = lines.findIndex(line =>
      line.includes('Missing contributors in .all-contributorsrc:')
    );

    if (headerLineIndex === -1) {
      console.log('✅ No missing contributors found');
      return [];
    }

    let contributorsLine = '';
    for (let i = headerLineIndex + 1; i < lines.length; i++) {
      const line = lines[i].trim();

      if (line.includes('Unknown contributors') || line.includes('✨')) {
        break;
      }

      if (line && !line.startsWith('⠙') && !line.startsWith('✨')) {
        contributorsLine = line;
        break;
      }
    }

    if (!contributorsLine) {
      console.log('✅ No missing contributors found');
      return [];
    }

    const allUsernames = contributorsLine
      .split(',')
      .map(username => username.trim())
      .filter(username => username.length > 0);

    const filteredUsernames = allUsernames.filter(username => {
      const lowerUsername = username.toLowerCase();

      if (ignoreSet.has(lowerUsername)) {
        console.log(`⏭️  FILTERED: ${username} is in ignore list`);
        return false;
      }

      return true;
    });

    console.log(`📋 Found ${filteredUsernames.length} missing contributors after filtering: ${filteredUsernames.join(', ')}`);
    return filteredUsernames;

  } catch (error) {
    console.error('❌ Error checking for missing contributors:', error.message);
    if (error.message.includes('command not found') || error.message.includes('not recognized')) {
      console.error('💡 Make sure all-contributors-cli is installed: npm install all-contributors-cli');
    }
    return [];
  }
};

// --- REPORT GENERATION LOGIC ---

/**
 * Get the current GitHub repository in owner/repo format.
 * Tries upstream first, then origin.
 * @returns {string}
 */
const getGitHubRepo = () => {
  try {
    const upstreamUrl = execSync('git config --get remote.upstream.url', {
      encoding: 'utf8',
      stdio: ['pipe', 'pipe', 'pipe']
    }).trim();
    if (upstreamUrl) {
      const match = upstreamUrl.match(/github\.com:([^/]+)\/([^/]+?)(?:\.git)?$/);
      if (match) return `${match[1]}/${match[2]}`;
    }
  } catch (e) {
    console.debug('upstream not found, trying origin');
  }

  try {
    const originUrl = execSync('git config --get remote.origin.url', {
      encoding: 'utf8',
      stdio: ['pipe', 'pipe', 'pipe']
    }).trim();
    const match = originUrl.match(/github\.com:([^/]+)\/([^/]+?)(?:\.git)?$/);
    if (match) return `${match[1]}/${match[2]}`;
  } catch (e) {
    console.debug('origin not found, using default');
  }

  return 'github/awesome-copilot';
};

const CONTRIBUTION_TYPE_MAP = {
  'instructions': { symbol: '🧭', description: 'The big AI prompt recipes (Copilot instruction sets)' },
  'prompts': { symbol: '⌨️', description: 'One-shot or reusable user-level prompts' },
  'agents': { symbol: '🎭', description: 'Defined Copilot personalities / roles' },
  'collections': { symbol: '🎁', description: 'Bundled thematic sets (e.g., "Copilot for Docs")' }
};

/**
 * Fetch merged PRs for a GitHub username using the GH CLI and filter files.
 * @param {string} username
 * @param {{includeAllFiles?:boolean}} [opts]
 * @returns {Array<object>} Array of PR objects
 */
export const fetchContributorMergedPrs = (username, { includeAllFiles = false } = {}) => {
  try {
    const repo = getGitHubRepo();
    const result = execSync(
      `gh pr list --repo ${repo} --state merged --author ${username} --json number,title,mergedAt,files,url --limit 100`,
      { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: DEFAULT_CMD_TIMEOUT }
    );
    const prs = JSON.parse(result);

    if (includeAllFiles) {
      return prs;
    }

    return prs.filter(pr => {
      const hasNonConfigFiles = pr.files.some(file =>
        !isAutoGeneratedFile(file.path)
      );
      return hasNonConfigFiles;
    });
  } catch (error) {
    console.error(`Failed to fetch PRs for ${username}:`, error.message);
    return [];
  }
};

/**
 * Convert a PR object into a normalized report entry with types and file details.
 * @param {{login:string}} contributor
 * @param {object} pr
 * @param {{includeAllFiles?:boolean}} [opts]
 * @returns {object|null}
 */
const generatePRReport = (contributor, pr, { includeAllFiles = false } = {}) => {
  const types = new Set();
  const fileDetails = [];

  for (const file of pr.files) {
    if (!file?.path) {
      continue;
    }

    // Include generated files only if includeAllFiles is true
    if (!includeAllFiles && isAutoGeneratedFile(file.path)) {
      continue;
    }

    const type = getFileContributionType(file.path) || 'ideas';
    if (type) {
      types.add(type);
    }

    fileDetails.push({
      path: file.path,
      type: type || 'unknown',
      additions: file.additions,
      deletions: file.deletions
    });
  }

  // If no non-filtered files contributed to types, and we're not asked for all files, skip this PR
  if (types.size === 0 && !includeAllFiles) {
    return null;
  }

  // Fallback to 'code' if no types detected
  if (types.size === 0) {
    types.add('code');
  }

  const typeList = Array.from(types);

  return {
    prNumber: pr.number,
    prTitle: pr.title,
    prUrl: pr.url,
    mergedAt: pr.mergedAt,
    contributionTypes: typeList,
    files: fileDetails,
    commentSnippet: `@all-contributors please add @${contributor.login} for ${typeList.join(', ')}`
  };
};

/**
 * Build a contributor report by inspecting merged PRs and mapping files to types.
 * Returns null when no relevant PRs were found (unless includeAllFiles is true).
 * @param {string} username
 * @param {{includeAllFiles?:boolean}} [opts]
 * @returns {object|null}
 */
export const generateContributorReport = (username, { includeAllFiles = false } = {}) => {
  console.log(`Inspecting ${username}...`);

  const prs = fetchContributorMergedPrs(username, { includeAllFiles });
  const prReports = prs
    .map(pr => generatePRReport({ login: username }, pr, { includeAllFiles }))
    .filter(report => report !== null);

  // If no relevant PR reports and not explicitly including all files, skip the contributor entirely
  if (prReports.length === 0 && !includeAllFiles) {
    return null;
  }

  return {
    username,
    totalPRs: prs.length,
    prs: prReports
  };
};

/**
 * Render a set of contributor reports as markdown for human review.
 * @param {Array<object>} reports
 * @param {number} missingCount - number of missing contributors detected
 * @returns {string}
 */
export const generateMarkdownReport = (reports, missingCount = 0) => {
  // The report is intentionally minimal: a single list of affected PRs and
  // a single copy/paste command maintainers can run locally.
  // No timestamps, per-file breakdowns, or duplicated metadata.

  if (!missingCount) {
    return 'No missing contributors detected.\n';
  }

  // 1) Single list of affected PRs (deduped).
  const prEntries = new Map(); // key=prNumber or url, value={number,url,mergedAt}
  for (const report of reports) {
    for (const pr of report.prs) {
      const key = pr.prUrl || String(pr.prNumber);
      if (!prEntries.has(key)) {
        prEntries.set(key, {
          number: pr.prNumber,
          url: pr.prUrl,
          mergedAt: pr.mergedAt
        });
      }
    }
  }

  const prList = Array.from(prEntries.values()).sort((a, b) => {
    // Prefer chronological sort for stable “what happened” review.
    const aTime = a.mergedAt ? Date.parse(a.mergedAt) : 0;
    const bTime = b.mergedAt ? Date.parse(b.mergedAt) : 0;
    if (aTime !== bTime) return aTime - bTime;
    return (a.number ?? 0) - (b.number ?? 0);
  });

  // 2) One command (one line). If multiple users are missing, chain them.
  const commandParts = [];
  for (const report of reports) {
    const typeSet = new Set();
    for (const pr of report.prs) {
      for (const type of pr.contributionTypes || []) {
        typeSet.add(type);
      }
    }

    const types = Array.from(typeSet).filter(Boolean).sort((a, b) => a.localeCompare(b));
    const typesArg = types.length > 0 ? types.join(',') : 'code';
    commandParts.push(`npx all-contributors add ${report.username} ${typesArg}`);
  }

  let markdown = '';
  markdown += prList.map((pr) => `- #${pr.number} ${pr.url}`).join('\n');
  markdown += '\n\n';
  markdown += commandParts.join(' && ');
  markdown += '\n';
  return markdown;
};

/**
 * Check whether a PR already contains an all-contributors bot comment.
 * @param {number} prNumber
 * @returns {boolean}
 */
export const hasExistingAllContributorsComment = (prNumber) => {
  try {
    const repo = getGitHubRepo();
    const json = execSync(`gh pr view ${prNumber} --repo ${repo} --json comments`, {
      encoding: 'utf8',
      stdio: ['pipe', 'pipe', 'pipe'],
      timeout: DEFAULT_CMD_TIMEOUT
    });

    const data = JSON.parse(json);
    const comments = data?.comments?.nodes || data?.comments || [];
    return comments.some((comment) => comment?.body?.includes(`@all-contributors`));
  } catch (error) {
    console.warn(`⚠️  Unable to inspect comments for PR #${prNumber}: ${error.message}`);
    return false;
  }
};

/**
 * Post a comment to a PR using the GH CLI.
 * @param {number} prNumber
 * @param {string} body
 * @returns {boolean}
 */
export const postCommentOnPr = (prNumber, body) => {
  try {
    const repo = getGitHubRepo();
    execSync(`gh pr comment ${prNumber} --repo ${repo} --body "${body.replace(/"/g, '\\"')}"`, {
      encoding: 'utf8',
      stdio: ['pipe', 'inherit', 'inherit'],
      timeout: DEFAULT_CMD_TIMEOUT
    });

    console.log(`💬 Posted recommendation comment on PR #${prNumber}`);
    return true;
  } catch (error) {
    console.warn(`⚠️  Failed to post comment on PR #${prNumber}: ${error.message}`);
    return false;
  }
};

/**
 * Post suggested all-contributors comments to PRs for a collection of reports.
 * @param {Array<object>} reports
 */
export const autoAddCommentsToReports = (reports) => {
  for (const report of reports) {
    for (const pr of report.prs) {
      if (hasExistingAllContributorsComment(pr.prNumber)) {
        console.log(`💬 Skipping PR #${pr.prNumber} for @${report.username} — comment already present`);
        continue;
      }

      const types = pr.contributionTypes.map(t => '`' + t + '`').join(', ');
      const commentLines = [
        `Thanks for the contribution @${report.username}!`,
        '',
        `We detected contribution categories for this PR: ${types || '`code`'}.`,
        '',
        `@all-contributors please add @${report.username} for ${pr.contributionTypes.join(', ')}`
      ];

      const body = commentLines.join('\n');
      postCommentOnPr(pr.prNumber, body);
    }
  }
};

const main = () => {
  try {
    const ghToken = process.env.GITHUB_TOKEN || process.env.PRIVATE_TOKEN;
    if (!ghToken) {
      console.error('❌ GITHUB_TOKEN or PRIVATE_TOKEN environment variable is required for GitHub CLI operations');
      process.exit(1);
    }

    // gh CLI only reads GITHUB_TOKEN or GH_TOKEN, so ensure it's set
    if (process.env.PRIVATE_TOKEN && !process.env.GITHUB_TOKEN) {
      process.env.GITHUB_TOKEN = process.env.PRIVATE_TOKEN;
    }

    const args = new Set(process.argv.slice(2));
    const autoAdd = args.has('--auto-add-pr-comments');
    const includeAllFiles = args.has('--include-all-pr-files');

    const contributors = getMissingContributors();
    console.log(`Inspecting ${contributors.length} missing contributors...\n`);

    const reports = [];
    for (const contributor of contributors) {
      const report = generateContributorReport(contributor, { includeAllFiles });
      reports.push(report || { username: contributor, totalPRs: 0, prs: [] });
    }

    const markdown = generateMarkdownReport(reports, contributors.length);
    const outputPath = path.join(process.cwd(), 'reports', 'contributor-report.md');
    fs.writeFileSync(outputPath, markdown);

    console.log(`Report saved to: ${outputPath}`);

    if (autoAdd) {
      autoAddCommentsToReports(reports);
    }

  } catch (error) {
    console.error('Error generating report:', error);
    process.exit(1);
  }
};

if (process.argv[1] === (new URL(import.meta.url)).pathname) {
  main();
}