mirror of
https://github.com/VoltAgent/awesome-openclaw-skills.git
synced 2026-03-14 06:35:13 +00:00
Convert Chinese comments and text to English in check_links.py
This commit is contained in:
118
check_links.py
118
check_links.py
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
检测 awesome-openclaw-skills README.md 中链接的有效性。
|
Check the validity of links in awesome-openclaw-skills README.md.
|
||||||
使用 HEAD 请求检测状态码,支持通过 GITHUB_TOKEN 环境变量提高 GitHub API 并发限制。
|
Uses HEAD requests to check status codes, supports increasing GitHub API rate limits via GITHUB_TOKEN environment variable.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@@ -21,7 +21,7 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class LinkResult:
|
class LinkResult:
|
||||||
"""链接检测结果"""
|
"""Link check result"""
|
||||||
name: str
|
name: str
|
||||||
url: str
|
url: str
|
||||||
line_num: int
|
line_num: int
|
||||||
@@ -33,9 +33,9 @@ class LinkResult:
|
|||||||
|
|
||||||
def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]:
|
def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]:
|
||||||
"""
|
"""
|
||||||
从 README.md 中提取所有技能链接。
|
Extract all skill links from README.md.
|
||||||
|
|
||||||
返回: [(skill_name, url, line_num, original_line), ...]
|
Returns: [(skill_name, url, line_num, original_line), ...]
|
||||||
"""
|
"""
|
||||||
pattern = re.compile(r'-\s+\[([^\]]+)\]\((https://github\.com/openclaw/skills/[^\)]+)\)')
|
pattern = re.compile(r'-\s+\[([^\]]+)\]\((https://github\.com/openclaw/skills/[^\)]+)\)')
|
||||||
|
|
||||||
@@ -52,26 +52,26 @@ def extract_links_from_readme(filepath: str) -> list[tuple[str, str, int, str]]:
|
|||||||
|
|
||||||
def check_link(name: str, url: str, github_token: Optional[str], timeout: int = 10) -> tuple[Optional[int], Optional[str], bool]:
|
def check_link(name: str, url: str, github_token: Optional[str], timeout: int = 10) -> tuple[Optional[int], Optional[str], bool]:
|
||||||
"""
|
"""
|
||||||
使用 HEAD 请求检测单个链接的有效性。
|
Check a single link's validity using HEAD request.
|
||||||
|
|
||||||
对于 GitHub 链接,使用 GITHUB_TOKEN 进行认证以提高 API 限制。
|
For GitHub links, use GITHUB_TOKEN for authentication to increase API limits.
|
||||||
GitHub API 限制:
|
GitHub API limits:
|
||||||
- 未认证:60 次/小时
|
- Unauthenticated: 60 requests/hour
|
||||||
- 认证后:5000 次/小时
|
- Authenticated: 5000 requests/hour
|
||||||
|
|
||||||
返回: (status_code, error_msg, is_valid)
|
Returns: (status_code, error_msg, is_valid)
|
||||||
"""
|
"""
|
||||||
# 构建请求
|
# Build request
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
|
|
||||||
# 将 github.com 链接转换为 API 调用以获取更准确的状态
|
# Convert github.com links to API calls for more accurate status
|
||||||
# 例如:https://github.com/openclaw/skills/tree/main/skills/xxx/SKILL.md
|
# e.g.: https://github.com/openclaw/skills/tree/main/skills/xxx/SKILL.md
|
||||||
# 转换为:https://api.github.com/repos/openclaw/skills/contents/skills/xxx/SKILL.md?ref=main
|
# -> https://api.github.com/repos/openclaw/skills/contents/skills/xxx/SKILL.md?ref=main
|
||||||
|
|
||||||
is_github = parsed.netloc == 'github.com'
|
is_github = parsed.netloc == 'github.com'
|
||||||
|
|
||||||
if is_github:
|
if is_github:
|
||||||
# 解析 GitHub URL 路径
|
# Parse GitHub URL path
|
||||||
path_parts = parsed.path.split('/')
|
path_parts = parsed.path.split('/')
|
||||||
# /openclaw/skills/tree/main/skills/author/skill-name/SKILL.md
|
# /openclaw/skills/tree/main/skills/author/skill-name/SKILL.md
|
||||||
if len(path_parts) >= 6 and path_parts[3] == 'tree':
|
if len(path_parts) >= 6 and path_parts[3] == 'tree':
|
||||||
@@ -80,7 +80,7 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int =
|
|||||||
branch = path_parts[4]
|
branch = path_parts[4]
|
||||||
file_path = '/'.join(path_parts[5:])
|
file_path = '/'.join(path_parts[5:])
|
||||||
|
|
||||||
# 构建 GitHub API URL
|
# Build GitHub API URL
|
||||||
api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}?ref={branch}"
|
api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}?ref={branch}"
|
||||||
check_url = api_url
|
check_url = api_url
|
||||||
else:
|
else:
|
||||||
@@ -88,23 +88,23 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int =
|
|||||||
else:
|
else:
|
||||||
check_url = url
|
check_url = url
|
||||||
|
|
||||||
# 创建请求
|
# Create request
|
||||||
req = urllib.request.Request(check_url, method='HEAD')
|
req = urllib.request.Request(check_url, method='HEAD')
|
||||||
|
|
||||||
# 设置请求头
|
# Set headers
|
||||||
req.add_header('User-Agent', 'awesome-openclaw-skills-link-checker/1.0')
|
req.add_header('User-Agent', 'awesome-openclaw-skills-link-checker/1.0')
|
||||||
|
|
||||||
if is_github and github_token:
|
if is_github and github_token:
|
||||||
req.add_header('Authorization', f'token {github_token}')
|
req.add_header('Authorization', f'token {github_token}')
|
||||||
# GitHub API 需要 Accept 头
|
# GitHub API requires Accept header
|
||||||
req.add_header('Accept', 'application/vnd.github.v3+json')
|
req.add_header('Accept', 'application/vnd.github.v3+json')
|
||||||
|
|
||||||
# 创建 SSL 上下文
|
# Create SSL context
|
||||||
ssl_context = ssl.create_default_context()
|
ssl_context = ssl.create_default_context()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(req, timeout=timeout, context=ssl_context) as response:
|
with urllib.request.urlopen(req, timeout=timeout, context=ssl_context) as response:
|
||||||
# 对于 GitHub API,HEAD 请求可能不支持,需要处理
|
# For GitHub API, HEAD requests may not be supported, handle it
|
||||||
if isinstance(response, HTTPResponse):
|
if isinstance(response, HTTPResponse):
|
||||||
status_code = response.status
|
status_code = response.status
|
||||||
else:
|
else:
|
||||||
@@ -123,11 +123,11 @@ def check_link(name: str, url: str, github_token: Optional[str], timeout: int =
|
|||||||
is_valid = False
|
is_valid = False
|
||||||
elif status_code == 403:
|
elif status_code == 403:
|
||||||
error_msg = "Forbidden (rate limited?)"
|
error_msg = "Forbidden (rate limited?)"
|
||||||
# 速率限制表示资源存在,只是暂时无法访问
|
# Rate limiting means the resource exists but is temporarily inaccessible
|
||||||
is_valid = True
|
is_valid = True
|
||||||
elif status_code == 429:
|
elif status_code == 429:
|
||||||
error_msg = "Too Many Requests"
|
error_msg = "Too Many Requests"
|
||||||
# 速率限制表示资源存在,只是暂时无法访问
|
# Rate limiting means the resource exists but is temporarily inaccessible
|
||||||
is_valid = True
|
is_valid = True
|
||||||
else:
|
else:
|
||||||
error_msg = f"HTTP {status_code}"
|
error_msg = f"HTTP {status_code}"
|
||||||
@@ -152,26 +152,26 @@ def check_all_links(
|
|||||||
rate_limit_delay: float = 0.1
|
rate_limit_delay: float = 0.1
|
||||||
) -> list[LinkResult]:
|
) -> list[LinkResult]:
|
||||||
"""
|
"""
|
||||||
并发检测所有链接。
|
Check all links concurrently.
|
||||||
|
|
||||||
参数:
|
Args:
|
||||||
links: [(name, url, line_num, original_line), ...]
|
links: [(name, url, line_num, original_line), ...]
|
||||||
github_token: GitHub 个人访问令牌
|
github_token: GitHub personal access token
|
||||||
max_workers: 最大并发数
|
max_workers: Maximum number of concurrent workers
|
||||||
rate_limit_delay: 每次请求之间的延迟(秒)
|
rate_limit_delay: Delay between requests (in seconds)
|
||||||
"""
|
"""
|
||||||
results = []
|
results = []
|
||||||
total = len(links)
|
total = len(links)
|
||||||
|
|
||||||
print(f"开始检测 {total} 个链接...")
|
print(f"Checking {total} links...")
|
||||||
print(f"并发数: {max_workers}")
|
print(f"Concurrency: {max_workers}")
|
||||||
print(f"GITHUB_TOKEN: {'已设置' if github_token else '未设置 (限制: 60次/小时)'}")
|
print(f"GITHUB_TOKEN: {'set' if github_token else 'not set (limit: 60/hour)'}")
|
||||||
print("-" * 60)
|
print("-" * 60)
|
||||||
|
|
||||||
def check_with_delay(link_tuple):
|
def check_with_delay(link_tuple):
|
||||||
name, url, line_num, original_line = link_tuple
|
name, url, line_num, original_line = link_tuple
|
||||||
status_code, error, is_valid = check_link(name, url, github_token)
|
status_code, error, is_valid = check_link(name, url, github_token)
|
||||||
time.sleep(rate_limit_delay) # 添加延迟以避免触发速率限制
|
time.sleep(rate_limit_delay) # Add delay to avoid triggering rate limits
|
||||||
return LinkResult(
|
return LinkResult(
|
||||||
name=name,
|
name=name,
|
||||||
url=url,
|
url=url,
|
||||||
@@ -194,7 +194,7 @@ def check_all_links(
|
|||||||
result = future.result()
|
result = future.result()
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
# 显示进度(始终打印 URL)
|
# Show progress (always print URL)
|
||||||
status_icon = "✓" if result.is_valid else "✗"
|
status_icon = "✓" if result.is_valid else "✗"
|
||||||
if result.is_valid:
|
if result.is_valid:
|
||||||
print(f"[{completed}/{total}] {status_icon} {result.name}")
|
print(f"[{completed}/{total}] {status_icon} {result.name}")
|
||||||
@@ -209,27 +209,27 @@ def check_all_links(
|
|||||||
|
|
||||||
def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int:
|
def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int:
|
||||||
"""
|
"""
|
||||||
删除 README.md 中无效链接所在的行。
|
Delete lines with invalid links from README.md.
|
||||||
|
|
||||||
返回: 删除的行数
|
Returns: Number of deleted lines
|
||||||
"""
|
"""
|
||||||
# 收集需要删除的行号
|
# Collect line numbers to delete
|
||||||
invalid_lines = {r.line_num for r in results if not r.is_valid}
|
invalid_lines = {r.line_num for r in results if not r.is_valid}
|
||||||
|
|
||||||
if not invalid_lines:
|
if not invalid_lines:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# 读取所有行
|
# Read all lines
|
||||||
with open(readme_path, 'r', encoding='utf-8') as f:
|
with open(readme_path, 'r', encoding='utf-8') as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
|
|
||||||
# 过滤掉无效行
|
# Filter out invalid lines
|
||||||
new_lines = [
|
new_lines = [
|
||||||
line for line_num, line in enumerate(lines, 1)
|
line for line_num, line in enumerate(lines, 1)
|
||||||
if line_num not in invalid_lines
|
if line_num not in invalid_lines
|
||||||
]
|
]
|
||||||
|
|
||||||
# 写回文件
|
# Write back to file
|
||||||
with open(readme_path, 'w', encoding='utf-8') as f:
|
with open(readme_path, 'w', encoding='utf-8') as f:
|
||||||
f.writelines(new_lines)
|
f.writelines(new_lines)
|
||||||
|
|
||||||
@@ -237,35 +237,35 @@ def delete_invalid_lines(readme_path: str, results: list[LinkResult]) -> int:
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# 解析命令行参数
|
# Parse command line arguments
|
||||||
parser = argparse.ArgumentParser(description='检测 README.md 中链接的有效性')
|
parser = argparse.ArgumentParser(description='Check the validity of links in README.md')
|
||||||
parser.add_argument('--delete', action='store_true', help='删除无效链接所在的行')
|
parser.add_argument('--delete', action='store_true', help='Delete lines with invalid links')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# 获取 README.md 路径
|
# Get README.md path
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
readme_path = os.path.join(script_dir, "README.md")
|
readme_path = os.path.join(script_dir, "README.md")
|
||||||
|
|
||||||
if not os.path.exists(readme_path):
|
if not os.path.exists(readme_path):
|
||||||
print(f"错误: 找不到 README.md 文件: {readme_path}")
|
print(f"Error: Cannot find README.md file: {readme_path}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# 获取 GITHUB_TOKEN
|
# Get GITHUB_TOKEN
|
||||||
github_token = os.environ.get("GITHUB_TOKEN")
|
github_token = os.environ.get("GITHUB_TOKEN")
|
||||||
|
|
||||||
# 提取链接
|
# Extract links
|
||||||
print(f"正在读取 {readme_path}...")
|
print(f"Reading {readme_path}...")
|
||||||
links = extract_links_from_readme(readme_path)
|
links = extract_links_from_readme(readme_path)
|
||||||
print(f"找到 {len(links)} 个链接")
|
print(f"Found {len(links)} links")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
if not links:
|
if not links:
|
||||||
print("没有找到任何链接")
|
print("No links found")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# 检测链接
|
# Check links
|
||||||
# 对于 GitHub API,如果使用 token,可以更高并发
|
# For GitHub API, higher concurrency is possible with token
|
||||||
# 未使用 token 时,降低并发以避免触发速率限制
|
# Without token, reduce concurrency to avoid triggering rate limits
|
||||||
max_workers = 20 if github_token else 5
|
max_workers = 20 if github_token else 5
|
||||||
rate_limit_delay = 0.05 if github_token else 0.5
|
rate_limit_delay = 0.05 if github_token else 0.5
|
||||||
|
|
||||||
@@ -276,21 +276,21 @@ def main():
|
|||||||
rate_limit_delay=rate_limit_delay
|
rate_limit_delay=rate_limit_delay
|
||||||
)
|
)
|
||||||
|
|
||||||
# 统计结果
|
# Print summary
|
||||||
print()
|
print()
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
valid_count = sum(1 for r in results if r.is_valid)
|
valid_count = sum(1 for r in results if r.is_valid)
|
||||||
invalid_count = len(results) - valid_count
|
invalid_count = len(results) - valid_count
|
||||||
print(f"检测完成: 有效 {valid_count}, 无效 {invalid_count}")
|
print(f"Check complete: {valid_count} valid, {invalid_count} invalid")
|
||||||
|
|
||||||
# 如果需要,删除无效行
|
# Delete invalid lines if requested
|
||||||
if args.delete and invalid_count > 0:
|
if args.delete and invalid_count > 0:
|
||||||
print()
|
print()
|
||||||
print("正在删除无效链接...")
|
print("Deleting invalid links...")
|
||||||
deleted = delete_invalid_lines(readme_path, results)
|
deleted = delete_invalid_lines(readme_path, results)
|
||||||
print(f"已删除 {deleted} 行")
|
print(f"Deleted {deleted} lines")
|
||||||
|
|
||||||
# 返回退出码
|
# Return exit code
|
||||||
if invalid_count > 0:
|
if invalid_count > 0:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|||||||
Reference in New Issue
Block a user