Convert Chinese comments and text to English in check_links.py

This commit is contained in:
xcrong
2026-02-21 09:16:07 +08:00
parent 4335e775c0
commit 2c17006ce1

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
检测 awesome-openclaw-skills README.md 中链接的有效性。 Check the validity of links in awesome-openclaw-skills README.md.
使用 HEAD 请求检测状态码,支持通过 GITHUB_TOKEN 环境变量提高 GitHub API 并发限制。 Uses HEAD requests to check status codes, supports increasing GitHub API rate limits via GITHUB_TOKEN environment variable.
""" """
import argparse import argparse
@@ -21,7 +21,7 @@ from urllib.parse import urlparse
@dataclass @dataclass
class LinkResult: class LinkResult:
"""链接检测结果""" """Link check result"""
name: str name: str
url: str url: str
line_num: int line_num: int
@@ -33,9 +33,9 @@ class LinkResult:
def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]: def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]:
""" """
从 README.md 中提取所有技能链接。 Extract all skill links from README.md.
返回: [(skill_name, url, line_num, original_line), ...] Returns: [(skill_name, url, line_num, original_line), ...]
""" """
pattern = re.compile(r'-\s+\[([^\]]+)\]\((https://github\.com/openclaw/skills/[^\)]+)\)') pattern = re.compile(r'-\s+\[([^\]]+)\]\((https://github\.com/openclaw/skills/[^\)]+)\)')
@@ -52,26 +52,26 @@ def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]:
def check_link(name: str, url: str, github_token: Optional[str], timeout: int = 10) -> tuple[Optional[int], Optional[str], bool]: def check_link(name: str, url: str, github_token: Optional[str], timeout: int = 10) -> tuple[Optional[int], Optional[str], bool]:
""" """
使用 HEAD 请求检测单个链接的有效性。 Check a single link's validity using HEAD request.
对于 GitHub 链接,使用 GITHUB_TOKEN 进行认证以提高 API 限制。 For GitHub links, use GITHUB_TOKEN for authentication to increase API limits.
GitHub API 限制: GitHub API limits:
- 未认证60 次/小时 - Unauthenticated: 60 requests/hour
- 认证后5000 次/小时 - Authenticated: 5000 requests/hour
返回: (status_code, error_msg, is_valid) Returns: (status_code, error_msg, is_valid)
""" """
# 构建请求 # Build request
parsed = urlparse(url) parsed = urlparse(url)
# github.com 链接转换为 API 调用以获取更准确的状态 # Convert github.com links to API calls for more accurate status
# 例如:https://github.com/openclaw/skills/tree/main/skills/xxx/SKILL.md # e.g.: https://github.com/openclaw/skills/tree/main/skills/xxx/SKILL.md
# 转换为:https://api.github.com/repos/openclaw/skills/contents/skills/xxx/SKILL.md?ref=main # -> https://api.github.com/repos/openclaw/skills/contents/skills/xxx/SKILL.md?ref=main
is_github = parsed.netloc == 'github.com' is_github = parsed.netloc == 'github.com'
if is_github: if is_github:
# 解析 GitHub URL 路径 # Parse GitHub URL path
path_parts = parsed.path.split('/') path_parts = parsed.path.split('/')
# /openclaw/skills/tree/main/skills/author/skill-name/SKILL.md # /openclaw/skills/tree/main/skills/author/skill-name/SKILL.md
if len(path_parts) >= 6 and path_parts[3] == 'tree': if len(path_parts) >= 6 and path_parts[3] == 'tree':
@@ -80,7 +80,7 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int =
branch = path_parts[4] branch = path_parts[4]
file_path = '/'.join(path_parts[5:]) file_path = '/'.join(path_parts[5:])
# 构建 GitHub API URL # Build GitHub API URL
api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}?ref={branch}" api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}?ref={branch}"
check_url = api_url check_url = api_url
else: else:
@@ -88,23 +88,23 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int =
else: else:
check_url = url check_url = url
# 创建请求 # Create request
req = urllib.request.Request(check_url, method='HEAD') req = urllib.request.Request(check_url, method='HEAD')
# 设置请求头 # Set headers
req.add_header('User-Agent', 'awesome-openclaw-skills-link-checker/1.0') req.add_header('User-Agent', 'awesome-openclaw-skills-link-checker/1.0')
if is_github and github_token: if is_github and github_token:
req.add_header('Authorization', f'token {github_token}') req.add_header('Authorization', f'token {github_token}')
# GitHub API 需要 Accept # GitHub API requires Accept header
req.add_header('Accept', 'application/vnd.github.v3+json') req.add_header('Accept', 'application/vnd.github.v3+json')
# 创建 SSL 上下文 # Create SSL context
ssl_context = ssl.create_default_context() ssl_context = ssl.create_default_context()
try: try:
with urllib.request.urlopen(req, timeout=timeout, context=ssl_context) as response: with urllib.request.urlopen(req, timeout=timeout, context=ssl_context) as response:
# 对于 GitHub APIHEAD 请求可能不支持,需要处理 # For GitHub API, HEAD requests may not be supported, handle it
if isinstance(response, HTTPResponse): if isinstance(response, HTTPResponse):
status_code = response.status status_code = response.status
else: else:
@@ -123,11 +123,11 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int =
is_valid = False is_valid = False
elif status_code == 403: elif status_code == 403:
error_msg = "Forbidden (rate limited?)" error_msg = "Forbidden (rate limited?)"
# 速率限制表示资源存在,只是暂时无法访问 # Rate limiting means the resource exists but is temporarily inaccessible
is_valid = True is_valid = True
elif status_code == 429: elif status_code == 429:
error_msg = "Too Many Requests" error_msg = "Too Many Requests"
# 速率限制表示资源存在,只是暂时无法访问 # Rate limiting means the resource exists but is temporarily inaccessible
is_valid = True is_valid = True
else: else:
error_msg = f"HTTP {status_code}" error_msg = f"HTTP {status_code}"
@@ -152,26 +152,26 @@ def check_all_links(
rate_limit_delay: float = 0.1 rate_limit_delay: float = 0.1
) -> list[LinkResult]: ) -> list[LinkResult]:
""" """
并发检测所有链接。 Check all links concurrently.
参数: Args:
links: [(name, url, line_num, original_line), ...] links: [(name, url, line_num, original_line), ...]
github_token: GitHub 个人访问令牌 github_token: GitHub personal access token
max_workers: 最大并发数 max_workers: Maximum number of concurrent workers
rate_limit_delay: 每次请求之间的延迟(秒) rate_limit_delay: Delay between requests (in seconds)
""" """
results = [] results = []
total = len(links) total = len(links)
print(f"开始检测 {total} 个链接...") print(f"Checking {total} links...")
print(f"并发数: {max_workers}") print(f"Concurrency: {max_workers}")
print(f"GITHUB_TOKEN: {'已设置' if github_token else '未设置 (限制: 60次/小时)'}") print(f"GITHUB_TOKEN: {'set' if github_token else 'not set (limit: 60/hour)'}")
print("-" * 60) print("-" * 60)
def check_with_delay(link_tuple): def check_with_delay(link_tuple):
name, url, line_num, original_line = link_tuple name, url, line_num, original_line = link_tuple
status_code, error, is_valid = check_link(name, url, github_token) status_code, error, is_valid = check_link(name, url, github_token)
time.sleep(rate_limit_delay) # 添加延迟以避免触发速率限制 time.sleep(rate_limit_delay) # Add delay to avoid triggering rate limits
return LinkResult( return LinkResult(
name=name, name=name,
url=url, url=url,
@@ -194,7 +194,7 @@ def check_all_links(
result = future.result() result = future.result()
results.append(result) results.append(result)
# 显示进度(始终打印 URL # Show progress (always print URL)
status_icon = "" if result.is_valid else "" status_icon = "" if result.is_valid else ""
if result.is_valid: if result.is_valid:
print(f"[{completed}/{total}] {status_icon} {result.name}") print(f"[{completed}/{total}] {status_icon} {result.name}")
@@ -209,27 +209,27 @@ def check_all_links(
def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int: def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int:
""" """
删除 README.md 中无效链接所在的行。 Delete lines with invalid links from README.md.
返回: 删除的行数 Returns: Number of deleted lines
""" """
# 收集需要删除的行号 # Collect line numbers to delete
invalid_lines = {r.line_num for r in results if not r.is_valid} invalid_lines = {r.line_num for r in results if not r.is_valid}
if not invalid_lines: if not invalid_lines:
return 0 return 0
# 读取所有行 # Read all lines
with open(readme_path, 'r', encoding='utf-8') as f: with open(readme_path, 'r', encoding='utf-8') as f:
lines = f.readlines() lines = f.readlines()
# 过滤掉无效行 # Filter out invalid lines
new_lines = [ new_lines = [
line for line_num, line in enumerate(lines, 1) line for line_num, line in enumerate(lines, 1)
if line_num not in invalid_lines if line_num not in invalid_lines
] ]
# 写回文件 # Write back to file
with open(readme_path, 'w', encoding='utf-8') as f: with open(readme_path, 'w', encoding='utf-8') as f:
f.writelines(new_lines) f.writelines(new_lines)
@@ -237,35 +237,35 @@ def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int:
def main(): def main():
# 解析命令行参数 # Parse command line arguments
parser = argparse.ArgumentParser(description='检测 README.md 中链接的有效性') parser = argparse.ArgumentParser(description='Check the validity of links in README.md')
parser.add_argument('--delete', action='store_true', help='删除无效链接所在的行') parser.add_argument('--delete', action='store_true', help='Delete lines with invalid links')
args = parser.parse_args() args = parser.parse_args()
# 获取 README.md 路径 # Get README.md path
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
readme_path = os.path.join(script_dir, "README.md") readme_path = os.path.join(script_dir, "README.md")
if not os.path.exists(readme_path): if not os.path.exists(readme_path):
print(f"错误: 找不到 README.md 文件: {readme_path}") print(f"Error: Cannot find README.md file: {readme_path}")
sys.exit(1) sys.exit(1)
# 获取 GITHUB_TOKEN # Get GITHUB_TOKEN
github_token = os.environ.get("GITHUB_TOKEN") github_token = os.environ.get("GITHUB_TOKEN")
# 提取链接 # Extract links
print(f"正在读取 {readme_path}...") print(f"Reading {readme_path}...")
links = extract_links_from_readme(readme_path) links = extract_links_from_readme(readme_path)
print(f"找到 {len(links)} 个链接") print(f"Found {len(links)} links")
print() print()
if not links: if not links:
print("没有找到任何链接") print("No links found")
sys.exit(0) sys.exit(0)
# 检测链接 # Check links
# 对于 GitHub API,如果使用 token可以更高并发 # For GitHub API, higher concurrency is possible with token
# 未使用 token 时,降低并发以避免触发速率限制 # Without token, reduce concurrency to avoid triggering rate limits
max_workers = 20 if github_token else 5 max_workers = 20 if github_token else 5
rate_limit_delay = 0.05 if github_token else 0.5 rate_limit_delay = 0.05 if github_token else 0.5
@@ -276,21 +276,21 @@ def main():
rate_limit_delay=rate_limit_delay rate_limit_delay=rate_limit_delay
) )
# 统计结果 # Print summary
print() print()
print("=" * 60) print("=" * 60)
valid_count = sum(1 for r in results if r.is_valid) valid_count = sum(1 for r in results if r.is_valid)
invalid_count = len(results) - valid_count invalid_count = len(results) - valid_count
print(f"检测完成: 有效 {valid_count}, 无效 {invalid_count}") print(f"Check complete: {valid_count} valid, {invalid_count} invalid")
# 如果需要,删除无效行 # Delete invalid lines if requested
if args.delete and invalid_count > 0: if args.delete and invalid_count > 0:
print() print()
print("正在删除无效链接...") print("Deleting invalid links...")
deleted = delete_invalid_lines(readme_path, results) deleted = delete_invalid_lines(readme_path, results)
print(f"已删除 {deleted} ") print(f"Deleted {deleted} lines")
# 返回退出码 # Return exit code
if invalid_count > 0: if invalid_count > 0:
sys.exit(1) sys.exit(1)
sys.exit(0) sys.exit(0)