From 2c17006ce181af773d58c072e10727d16ad9d8d2 Mon Sep 17 00:00:00 2001 From: xcrong Date: Sat, 21 Feb 2026 09:16:07 +0800 Subject: [PATCH] Convert Chinese comments and text to English in check_links.py --- check_links.py | 118 ++++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/check_links.py b/check_links.py index 3fb0a7f..11aa53c 100644 --- a/check_links.py +++ b/check_links.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ -检测 awesome-openclaw-skills README.md 中链接的有效性。 -使用 HEAD 请求检测状态码,支持通过 GITHUB_TOKEN 环境变量提高 GitHub API 并发限制。 +Check the validity of links in awesome-openclaw-skills README.md. +Uses HEAD requests to check status codes, supports increasing GitHub API rate limits via GITHUB_TOKEN environment variable. """ import argparse @@ -21,7 +21,7 @@ from urllib.parse import urlparse @dataclass class LinkResult: - """链接检测结果""" + """Link check result""" name: str url: str line_num: int @@ -33,9 +33,9 @@ class LinkResult: def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]: """ - 从 README.md 中提取所有技能链接。 + Extract all skill links from README.md. - 返回: [(skill_name, url, line_num, original_line), ...] + Returns: [(skill_name, url, line_num, original_line), ...] """ pattern = re.compile(r'-\s+\[([^\]]+)\]\((https://github\.com/openclaw/skills/[^\)]+)\)') @@ -52,26 +52,26 @@ def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]: def check_link(name: str, url: str, github_token: Optional[str], timeout: int = 10) -> tuple[Optional[int], Optional[str], bool]: """ - 使用 HEAD 请求检测单个链接的有效性。 + Check a single link's validity using HEAD request. - 对于 GitHub 链接,使用 GITHUB_TOKEN 进行认证以提高 API 限制。 - GitHub API 限制: - - 未认证:60 次/小时 - - 认证后:5000 次/小时 + For GitHub links, use GITHUB_TOKEN for authentication to increase API limits. + GitHub API limits: + - Unauthenticated: 60 requests/hour + - Authenticated: 5000 requests/hour - 返回: (status_code, error_msg, is_valid) + Returns: (status_code, error_msg, is_valid) """ - # 构建请求 + # Build request parsed = urlparse(url) - # 将 github.com 链接转换为 API 调用以获取更准确的状态 - # 例如:https://github.com/openclaw/skills/tree/main/skills/xxx/SKILL.md - # 转换为:https://api.github.com/repos/openclaw/skills/contents/skills/xxx/SKILL.md?ref=main + # Convert github.com links to API calls for more accurate status + # e.g.: https://github.com/openclaw/skills/tree/main/skills/xxx/SKILL.md + # -> https://api.github.com/repos/openclaw/skills/contents/skills/xxx/SKILL.md?ref=main is_github = parsed.netloc == 'github.com' if is_github: - # 解析 GitHub URL 路径 + # Parse GitHub URL path path_parts = parsed.path.split('/') # /openclaw/skills/tree/main/skills/author/skill-name/SKILL.md if len(path_parts) >= 6 and path_parts[3] == 'tree': @@ -80,7 +80,7 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int = branch = path_parts[4] file_path = '/'.join(path_parts[5:]) - # 构建 GitHub API URL + # Build GitHub API URL api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}?ref={branch}" check_url = api_url else: @@ -88,23 +88,23 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int = else: check_url = url - # 创建请求 + # Create request req = urllib.request.Request(check_url, method='HEAD') - # 设置请求头 + # Set headers req.add_header('User-Agent', 'awesome-openclaw-skills-link-checker/1.0') if is_github and github_token: req.add_header('Authorization', f'token {github_token}') - # GitHub API 需要 Accept 头 + # GitHub API requires Accept header req.add_header('Accept', 'application/vnd.github.v3+json') - # 创建 SSL 上下文 + # Create SSL context ssl_context = ssl.create_default_context() try: with urllib.request.urlopen(req, timeout=timeout, context=ssl_context) as response: - # 对于 GitHub API,HEAD 请求可能不支持,需要处理 + # For GitHub API, HEAD requests may not be supported, handle it if isinstance(response, HTTPResponse): status_code = response.status else: @@ -123,11 +123,11 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int = is_valid = False elif status_code == 403: error_msg = "Forbidden (rate limited?)" - # 速率限制表示资源存在,只是暂时无法访问 + # Rate limiting means the resource exists but is temporarily inaccessible is_valid = True elif status_code == 429: error_msg = "Too Many Requests" - # 速率限制表示资源存在,只是暂时无法访问 + # Rate limiting means the resource exists but is temporarily inaccessible is_valid = True else: error_msg = f"HTTP {status_code}" @@ -152,26 +152,26 @@ def check_all_links( rate_limit_delay: float = 0.1 ) -> list[LinkResult]: """ - 并发检测所有链接。 + Check all links concurrently. - 参数: + Args: links: [(name, url, line_num, original_line), ...] - github_token: GitHub 个人访问令牌 - max_workers: 最大并发数 - rate_limit_delay: 每次请求之间的延迟(秒) + github_token: GitHub personal access token + max_workers: Maximum number of concurrent workers + rate_limit_delay: Delay between requests (in seconds) """ results = [] total = len(links) - print(f"开始检测 {total} 个链接...") - print(f"并发数: {max_workers}") - print(f"GITHUB_TOKEN: {'已设置' if github_token else '未设置 (限制: 60次/小时)'}") + print(f"Checking {total} links...") + print(f"Concurrency: {max_workers}") + print(f"GITHUB_TOKEN: {'set' if github_token else 'not set (limit: 60/hour)'}") print("-" * 60) def check_with_delay(link_tuple): name, url, line_num, original_line = link_tuple status_code, error, is_valid = check_link(name, url, github_token) - time.sleep(rate_limit_delay) # 添加延迟以避免触发速率限制 + time.sleep(rate_limit_delay) # Add delay to avoid triggering rate limits return LinkResult( name=name, url=url, @@ -194,7 +194,7 @@ def check_all_links( result = future.result() results.append(result) - # 显示进度(始终打印 URL) + # Show progress (always print URL) status_icon = "✓" if result.is_valid else "✗" if result.is_valid: print(f"[{completed}/{total}] {status_icon} {result.name}") @@ -209,27 +209,27 @@ def check_all_links( def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int: """ - 删除 README.md 中无效链接所在的行。 + Delete lines with invalid links from README.md. - 返回: 删除的行数 + Returns: Number of deleted lines """ - # 收集需要删除的行号 + # Collect line numbers to delete invalid_lines = {r.line_num for r in results if not r.is_valid} if not invalid_lines: return 0 - # 读取所有行 + # Read all lines with open(readme_path, 'r', encoding='utf-8') as f: lines = f.readlines() - # 过滤掉无效行 + # Filter out invalid lines new_lines = [ line for line_num, line in enumerate(lines, 1) if line_num not in invalid_lines ] - # 写回文件 + # Write back to file with open(readme_path, 'w', encoding='utf-8') as f: f.writelines(new_lines) @@ -237,35 +237,35 @@ def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int: def main(): - # 解析命令行参数 - parser = argparse.ArgumentParser(description='检测 README.md 中链接的有效性') - parser.add_argument('--delete', action='store_true', help='删除无效链接所在的行') + # Parse command line arguments + parser = argparse.ArgumentParser(description='Check the validity of links in README.md') + parser.add_argument('--delete', action='store_true', help='Delete lines with invalid links') args = parser.parse_args() - # 获取 README.md 路径 + # Get README.md path script_dir = os.path.dirname(os.path.abspath(__file__)) readme_path = os.path.join(script_dir, "README.md") if not os.path.exists(readme_path): - print(f"错误: 找不到 README.md 文件: {readme_path}") + print(f"Error: Cannot find README.md file: {readme_path}") sys.exit(1) - # 获取 GITHUB_TOKEN + # Get GITHUB_TOKEN github_token = os.environ.get("GITHUB_TOKEN") - # 提取链接 - print(f"正在读取 {readme_path}...") + # Extract links + print(f"Reading {readme_path}...") links = extract_links_from_readme(readme_path) - print(f"找到 {len(links)} 个链接") + print(f"Found {len(links)} links") print() if not links: - print("没有找到任何链接") + print("No links found") sys.exit(0) - # 检测链接 - # 对于 GitHub API,如果使用 token,可以更高并发 - # 未使用 token 时,降低并发以避免触发速率限制 + # Check links + # For GitHub API, higher concurrency is possible with token + # Without token, reduce concurrency to avoid triggering rate limits max_workers = 20 if github_token else 5 rate_limit_delay = 0.05 if github_token else 0.5 @@ -276,21 +276,21 @@ def main(): rate_limit_delay=rate_limit_delay ) - # 统计结果 + # Print summary print() print("=" * 60) valid_count = sum(1 for r in results if r.is_valid) invalid_count = len(results) - valid_count - print(f"检测完成: 有效 {valid_count}, 无效 {invalid_count}") + print(f"Check complete: {valid_count} valid, {invalid_count} invalid") - # 如果需要,删除无效行 + # Delete invalid lines if requested if args.delete and invalid_count > 0: print() - print("正在删除无效链接...") + print("Deleting invalid links...") deleted = delete_invalid_lines(readme_path, results) - print(f"已删除 {deleted} 行") + print(f"Deleted {deleted} lines") - # 返回退出码 + # Return exit code if invalid_count > 0: sys.exit(1) sys.exit(0)