diff --git a/scripts/update_readme.py b/scripts/update_readme.py new file mode 100644 index 0000000..22fb59a --- /dev/null +++ b/scripts/update_readme.py @@ -0,0 +1,121 @@ +import feedparser +import os +import re + +# ================= 配置区域 ================= +RSS_URL = "https://mrxn.net/rss.php" +README_PATH = "README.md" + +# 关键词列表 +WEB_KEYWORDS = [ + 'rce', 'sql', 'xss', 'csrf', 'upload', 'injection', 'web', 'cms', + '文件上传', '文件读取', 'sql注入', '信息泄露', '命令执行', + '目录遍历', '目录穿越', 'xxe', 'bypass', 'auth' +] + +# README定位标记 (请确保这些标记在你README的HTML源码中完全匹配) +START_MARKER_REGEX = r'id="head4">Web APP' +END_MARKER_REGEX = r'id="head5">' +# =========================================== + +def fetch_rss_entries(): + """获取 RSS 并返回解析后的数据列表""" + print(f"Fetching RSS from {RSS_URL}...") + try: + feed = feedparser.parse(RSS_URL) + entries = [] + for entry in feed.entries: + entries.append({ + "title": entry.title, + "link": entry.link + }) + return entries + except Exception as e: + print(f"Error fetching RSS: {e}") + return [] + +def is_relevant(title): + """关键词过滤""" + title_lower = title.lower() + if any(keyword in title_lower for keyword in WEB_KEYWORDS): + return True + return False + +def get_existing_urls(content_lines): + """提取现有链接用于去重""" + urls = set() + link_pattern = re.compile(r'\]\((http[s]?://.*?)\)') + for line in content_lines: + found = link_pattern.findall(line) + for url in found: + urls.add(url.strip()) + return urls + +def update_readme(): + if not os.path.exists(README_PATH): + print(f"Error: {README_PATH} not found.") + return + + with open(README_PATH, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + + # 1. 定位区块 + start_index = -1 + end_index = -1 + + for i, line in enumerate(lines): + if re.search(START_MARKER_REGEX, line): + start_index = i + elif start_index != -1 and re.search(END_MARKER_REGEX, line): + end_index = i + break + + if start_index == -1 or end_index == -1: + print("Error: Markers not found in README.md") + return + + # 2. 获取该区块内现有的 URL (去重) + existing_urls = get_existing_urls(lines[start_index:end_index]) + print(f"Existing links count: {len(existing_urls)}") + + # 3. 处理 RSS 数据 + rss_data = fetch_rss_entries() + entries_to_add = [] + + for item in rss_data: + title = item['title'] + link = item['link'] + + if not is_relevant(title): + continue + + if link.strip() in existing_urls: + print(f"Skipping duplicate: {title}") + continue + + # 格式化 + entries_to_add.append(f"- [{title}]({link})") + + if not entries_to_add: + print("No new entries.") + return + + print(f"Adding {len(entries_to_add)} new entries...") + + # 4. 插入位置:end_index 之前的最后一个非空行之后 + insert_pos = end_index + while insert_pos > start_index and lines[insert_pos-1].strip() == "": + insert_pos -= 1 + + # 倒序插入,保持RSS顺序 + for entry in reversed(entries_to_add): + lines.insert(insert_pos, entry) + + with open(README_PATH, 'w', encoding='utf-8') as f: + f.write("\n".join(lines)) + print("Update successful.") + +if __name__ == "__main__": + update_readme()