Rename to hkt.sh

This commit is contained in:
mango
2026-03-21 01:10:53 +08:00
parent 76a263d0f9
commit 8f1171fe99
6676 changed files with 1724268 additions and 0 deletions

146
projects/news-bot/README.md Normal file
View File

@@ -0,0 +1,146 @@
# 📰 财经快讯 Telegram 机器人
自动抓取金十、华尔街见闻、36氪、新浪财经快讯AI 评分过滤,推送到 Telegram。
---
## 功能特性
- 🔴 **重磅即时推送** — 评分 ≥8 立即发送
- 🟡 **普通汇总推送** — 评分 6-7每 N 分钟汇总一条
- 📊 **定时总结** — 08:00 / 11:30 / 20:00 三次 AI 总结
- 🔑 **关键词过滤** — 自定义关注词,精准推送
- 📡 **多源管理** — 一键开关各信息源
- ⚙️ **灵活设置** — 推送阈值、汇总频率均可调节
- 🔄 **去重** — 标题相似度 >70% 自动合并
---
## 环境变量
| 变量 | 说明 | 必填 |
|------|------|------|
| `BOT_TOKEN` | Telegram Bot Token@BotFather 获取) | ✅ |
| `ADMIN_ID` | 管理员 Telegram 用户 ID | ❌(默认 165067365 |
| `DATA_DIR` | 数据目录 | ❌(默认 `/opt/news-bot/data` |
---
## 快速部署
### 方法一:直接运行
```bash
# 1. 进入项目目录
cd /path/to/news-bot
# 2. 安装依赖
pip3 install -r requirements.txt
# 3. 创建数据目录
mkdir -p /opt/news-bot/data
# 4. 启动
export BOT_TOKEN="your_bot_token_here"
export ADMIN_ID="165067365"
python3 bot.py
```
### 方法二systemd 服务Linux
```ini
# /etc/systemd/system/news-bot.service
[Unit]
Description=Telegram 财经快讯机器人
After=network.target
[Service]
Type=simple
WorkingDirectory=/opt/news-bot
ExecStart=/usr/bin/python3 /opt/news-bot/bot.py
Environment=BOT_TOKEN=your_bot_token_here
Environment=ADMIN_ID=165067365
Environment=DATA_DIR=/opt/news-bot/data
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
```
```bash
sudo systemctl daemon-reload
sudo systemctl enable news-bot
sudo systemctl start news-bot
sudo systemctl status news-bot
```
### 方法三部署到服务器38.76.204.161
```bash
# 上传文件
scp -r news-bot/ root@38.76.204.161:/opt/news-bot/
# SSH 登录
ssh root@38.76.204.161
# 安装依赖
pip3 install -r /opt/news-bot/requirements.txt
# 创建数据目录
mkdir -p /opt/news-bot/data
# 配置 systemd见上方然后启动
systemctl start news-bot
```
---
## 文件结构
```
news-bot/
├── bot.py # 主入口Telegram bot + 调度器
├── sources.py # 信息源抓取(金十/华尔街/36氪/新浪)
├── scorer.py # 评分引擎 + 去重
├── storage.py # JSON 数据持久化
├── summarizer.py # 新闻总结生成
├── requirements.txt
├── README.md
└── data/ # 运行时数据settings.json / news.json / pushed.json
```
---
## 命令菜单
| 命令 | 功能 |
|------|------|
| `/start` | 欢迎页 + 主菜单按钮 |
| `/news` | 最新快讯(分页浏览) |
| `/summary` | 手动触发总结 |
| `/sources` | 订阅源开关管理 |
| `/keywords` | 关键词添加/删除 |
| `/settings` | 推送阈值/频率设置 |
---
## 评分规则
| 分数 | 含义 | 处理 |
|------|------|------|
| ≥ 8 | 重磅/突发 | 🔥 即时推送 |
| 67 | 值得关注 | 🟡 汇总推送 |
| < 6 | 普通资讯 | ❌ 不推送 |
评分依据:关键词权重 + 来源重要性标记 + 规则引擎(无需外部 AI API
---
## 数据文件说明
- `data/settings.json` — 用户配置(关键词、源开关、阈值)
- `data/news.json` — 新闻历史(最近 2000 条)
- `data/pushed.json` — 已推送 ID 集合(防重复,最近 5000 条)

776
projects/news-bot/bot.py Normal file
View File

@@ -0,0 +1,776 @@
"""
Telegram 财经快讯机器人 - 主入口
功能:自动抓取财经/科技快讯AI 评分过滤,推送到 Telegram
"""
import os
import time
import logging
import asyncio
from datetime import datetime
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
from telegram.ext import (
Application, CommandHandler, CallbackQueryHandler,
ContextTypes, MessageHandler, filters,
)
from telegram.constants import ParseMode
import sources
import scorer
import storage
import summarizer
# 日志配置
logging.basicConfig(
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
level=logging.INFO,
)
logger = logging.getLogger(__name__)
# 配置
BOT_TOKEN = os.environ.get("BOT_TOKEN", "")
ADMIN_ID = int(os.environ.get("ADMIN_ID", "165067365"))
# 分页状态缓存
_page_cache = {}
# ========== 工具函数 ==========
def _check_admin(user_id: int) -> bool:
"""检查是否为管理员"""
return user_id == ADMIN_ID
def _format_news_item(item: dict) -> str:
"""格式化单条新闻(支持 HTML 链接)"""
score = item.get("score", 0)
if score >= 9:
emoji = "🔥"
elif score >= 8:
emoji = ""
elif score >= 7:
emoji = "📌"
elif score >= 6:
emoji = "🔹"
else:
emoji = ""
src = item.get("source_name", "")
ts = item.get("time_str", "")
title = item.get("title", "")
url = item.get("url", "")
# 转义 HTML 特殊字符
import html as html_mod
title_safe = html_mod.escape(title)
if url:
title_display = f'<a href="{url}">{title_safe}</a>'
else:
title_display = title_safe
return f"{emoji} {title_display}\n 📡 {src} 🕐 {ts} 评分:{score}"
# ========== /start 命令 ==========
async def cmd_start(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""欢迎消息 + 主菜单"""
if not _check_admin(update.effective_user.id):
await update.message.reply_text("⛔ 仅限管理员使用")
return
keyboard = [
[InlineKeyboardButton("📰 最新快讯", callback_data="news_0"),
InlineKeyboardButton("📊 新闻总结", callback_data="summary_menu")],
[InlineKeyboardButton("📡 订阅源管理", callback_data="sources_menu"),
InlineKeyboardButton("🔑 关键词管理", callback_data="keywords_menu")],
[InlineKeyboardButton("⚙️ 设置", callback_data="settings_menu")],
]
text = (
"👋 <b>欢迎使用财经快讯机器人</b>\n\n"
"🤖 自动抓取金十、华尔街见闻、36氪、新浪财经快讯\n"
"📊 AI 评分过滤,只推送相关内容\n"
"⏰ 定时总结,不错过重要新闻\n\n"
"请选择功能:"
)
await update.message.reply_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
# ========== /news 命令 ==========
async def cmd_news(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""查看最新快讯"""
if not _check_admin(update.effective_user.id):
return
await _show_news_page(update.message, 0)
async def _show_news_page(message_or_query, page: int):
"""显示新闻分页"""
settings = storage.get_settings()
news_list = storage.get_news(limit=100)
# 评分
scorer.score_and_filter(news_list, settings.get("keywords", []))
# 只显示评分 >= 4 的
news_list = [n for n in news_list if n.get("score", 0) >= 4]
news_list.sort(key=lambda x: x.get("timestamp", 0), reverse=True)
page_size = 10
total_pages = max(1, (len(news_list) + page_size - 1) // page_size)
page = max(0, min(page, total_pages - 1))
start = page * page_size
page_items = news_list[start:start + page_size]
if not page_items:
text = "📭 暂无快讯,等待抓取中..."
else:
lines = [f"📰 <b>最新快讯</b> (第{page+1}/{total_pages}页)\n"]
for item in page_items:
lines.append(_format_news_item(item))
text = "\n".join(lines)
# 分页按钮
buttons = []
if page > 0:
buttons.append(InlineKeyboardButton("⬅️ 上一页", callback_data=f"news_{page-1}"))
buttons.append(InlineKeyboardButton("🔄 刷新", callback_data=f"news_{page}"))
if page < total_pages - 1:
buttons.append(InlineKeyboardButton("➡️ 下一页", callback_data=f"news_{page+1}"))
keyboard = [buttons, [InlineKeyboardButton("🏠 主菜单", callback_data="main_menu")]]
if hasattr(message_or_query, "edit_message_text"):
try:
await message_or_query.edit_message_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
except Exception:
await message_or_query.edit_message_text(
text, reply_markup=InlineKeyboardMarkup(keyboard),
)
else:
await message_or_query.reply_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
# ========== /summary 命令 ==========
async def cmd_summary(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""手动触发新闻总结"""
if not _check_admin(update.effective_user.id):
return
await _show_summary_menu(update.message)
async def _show_summary_menu(message_or_query):
"""显示总结时间范围选择"""
keyboard = [
[InlineKeyboardButton("⏱ 最近1小时", callback_data="summary_最近1小时")],
[InlineKeyboardButton("🌅 上午", callback_data="summary_上午")],
[InlineKeyboardButton("📅 全天", callback_data="summary_全天")],
[InlineKeyboardButton("🏠 主菜单", callback_data="main_menu")],
]
text = "📊 <b>新闻总结</b>\n\n请选择时间范围:"
if hasattr(message_or_query, "edit_message_text"):
await message_or_query.edit_message_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
else:
await message_or_query.reply_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
# ========== /sources 命令 ==========
async def cmd_sources(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""查看/管理订阅源"""
if not _check_admin(update.effective_user.id):
return
await _show_sources_menu(update.message)
async def _show_sources_menu(message_or_query):
"""显示订阅源管理菜单"""
settings = storage.get_settings()
srcs = settings.get("sources", {})
source_info = {
"jin10": "金十数据",
"wallstreet": "华尔街见闻",
"kr36": "36氪",
"sina": "新浪财经",
}
keyboard = []
for key, name in source_info.items():
enabled = srcs.get(key, True)
status = "" if enabled else ""
keyboard.append([InlineKeyboardButton(
f"{status} {name}", callback_data=f"toggle_src_{key}"
)])
keyboard.append([InlineKeyboardButton("🏠 主菜单", callback_data="main_menu")])
text = "📡 <b>订阅源管理</b>\n\n点击切换开/关:"
if hasattr(message_or_query, "edit_message_text"):
await message_or_query.edit_message_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
else:
await message_or_query.reply_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
# ========== /keywords 命令 ==========
async def cmd_keywords(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""管理关注关键词"""
if not _check_admin(update.effective_user.id):
return
await _show_keywords_menu(update.message)
async def _show_keywords_menu(message_or_query):
"""显示关键词管理菜单"""
settings = storage.get_settings()
kw_list = settings.get("keywords", [])
# 每行显示3个关键词删除按钮
keyboard = []
row = []
for kw in kw_list[:30]:
row.append(InlineKeyboardButton(f"{kw}", callback_data=f"del_kw_{kw}"))
if len(row) == 3:
keyboard.append(row)
row = []
if row:
keyboard.append(row)
keyboard.append([InlineKeyboardButton(" 添加关键词", callback_data="add_kw_prompt")])
keyboard.append([InlineKeyboardButton("🏠 主菜单", callback_data="main_menu")])
text = f"🔑 <b>关键词管理</b>\n\n当前 {len(kw_list)} 个关键词\n(点击关键词可删除)"
if hasattr(message_or_query, "edit_message_text"):
await message_or_query.edit_message_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
else:
await message_or_query.reply_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
# ========== /settings 命令 ==========
async def cmd_settings(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""设置菜单"""
if not _check_admin(update.effective_user.id):
return
await _show_settings_menu(update.message)
async def _show_settings_menu(message_or_query):
"""显示设置菜单"""
settings = storage.get_settings()
push_on = settings.get("push_enabled", True)
min_score = settings.get("min_score", 6)
instant_score = settings.get("instant_score", 8)
interval = settings.get("batch_interval", 30)
push_txt = "✅ 推送已开" if push_on else "❌ 推送已关"
keyboard = [
[InlineKeyboardButton(push_txt, callback_data="toggle_push")],
[
InlineKeyboardButton(f"最低评分: {min_score} ", callback_data="score_min_down"),
InlineKeyboardButton(f"最低评分: {min_score} ", callback_data="score_min_up"),
],
[
InlineKeyboardButton(f"即时阈值: {instant_score} ", callback_data="score_instant_down"),
InlineKeyboardButton(f"即时阈值: {instant_score} ", callback_data="score_instant_up"),
],
[
InlineKeyboardButton(f"汇总间隔: {interval}min ", callback_data="interval_down"),
InlineKeyboardButton(f"汇总间隔: {interval}min ", callback_data="interval_up"),
],
[InlineKeyboardButton("🏠 主菜单", callback_data="main_menu")],
]
text = (
f"⚙️ <b>设置</b>\n\n"
f"推送状态: {'开启' if push_on else '关闭'}\n"
f"最低推送评分: {min_score}\n"
f"即时推送阈值: {instant_score}\n"
f"汇总推送间隔: {interval} 分钟"
)
if hasattr(message_or_query, "edit_message_text"):
await message_or_query.edit_message_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
else:
await message_or_query.reply_text(
text, parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
# ========== CallbackQuery 处理器 ==========
# 等待用户输入关键词的状态
_waiting_kw_add = set()
async def handle_callback(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""统一处理所有按钮回调"""
query = update.callback_query
await query.answer()
data = query.data
user_id = query.from_user.id
if not _check_admin(user_id):
return
# ---------- 主菜单 ----------
if data == "main_menu":
keyboard = [
[InlineKeyboardButton("📰 最新快讯", callback_data="news_0"),
InlineKeyboardButton("📊 新闻总结", callback_data="summary_menu")],
[InlineKeyboardButton("📡 订阅源管理", callback_data="sources_menu"),
InlineKeyboardButton("🔑 关键词管理", callback_data="keywords_menu")],
[InlineKeyboardButton("⚙️ 设置", callback_data="settings_menu")],
]
await query.edit_message_text(
"🏠 <b>主菜单</b>\n\n请选择功能:",
parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup(keyboard),
)
# ---------- 新闻分页 ----------
elif data.startswith("news_"):
page = int(data.split("_", 1)[1])
await _show_news_page(query, page)
# ---------- 总结菜单 ----------
elif data == "summary_menu":
await _show_summary_menu(query)
elif data.startswith("summary_"):
period = data[len("summary_"):]
start_ts, end_ts = summarizer.get_period_range(period)
settings = storage.get_settings()
news_list = storage.get_news(limit=500, since_ts=start_ts)
news_list = [n for n in news_list if n.get("timestamp", 0) <= end_ts]
scorer.score_and_filter(news_list, settings.get("keywords", []))
news_list = [n for n in news_list if n.get("score", 0) >= settings.get("min_score", 6)]
text = summarizer.generate_summary(news_list, period)
back_btn = InlineKeyboardMarkup([[
InlineKeyboardButton("🔙 返回", callback_data="summary_menu"),
InlineKeyboardButton("🏠 主菜单", callback_data="main_menu"),
]])
try:
await query.edit_message_text(text, parse_mode=ParseMode.HTML, reply_markup=back_btn)
except Exception:
await query.edit_message_text(text, reply_markup=back_btn)
# ---------- 订阅源 ----------
elif data == "sources_menu":
await _show_sources_menu(query)
elif data.startswith("toggle_src_"):
src = data[len("toggle_src_"):]
new_state = storage.toggle_source(src)
state_txt = "开启" if new_state else "关闭"
await query.answer(f"{state_txt} {sources.SOURCE_NAMES.get(src, src)}", show_alert=False)
await _show_sources_menu(query)
# ---------- 关键词 ----------
elif data == "keywords_menu":
await _show_keywords_menu(query)
elif data.startswith("del_kw_"):
kw = data[len("del_kw_"):]
storage.remove_keyword(kw)
await query.answer(f"已删除关键词: {kw}", show_alert=False)
await _show_keywords_menu(query)
elif data == "add_kw_prompt":
_waiting_kw_add.add(user_id)
await query.edit_message_text(
"🔑 <b>添加关键词</b>\n\n请直接发送关键词文字(可空格分隔多个):",
parse_mode=ParseMode.HTML,
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("取消", callback_data="keywords_menu")
]]),
)
# ---------- 设置 ----------
elif data == "settings_menu":
await _show_settings_menu(query)
elif data == "toggle_push":
settings = storage.get_settings()
storage.update_settings({"push_enabled": not settings.get("push_enabled", True)})
await _show_settings_menu(query)
elif data == "score_min_up":
s = storage.get_settings()
storage.update_settings({"min_score": min(10, s.get("min_score", 6) + 1)})
await _show_settings_menu(query)
elif data == "score_min_down":
s = storage.get_settings()
storage.update_settings({"min_score": max(1, s.get("min_score", 6) - 1)})
await _show_settings_menu(query)
elif data == "score_instant_up":
s = storage.get_settings()
storage.update_settings({"instant_score": min(10, s.get("instant_score", 8) + 1)})
await _show_settings_menu(query)
elif data == "score_instant_down":
s = storage.get_settings()
storage.update_settings({"instant_score": max(1, s.get("instant_score", 8) - 1)})
await _show_settings_menu(query)
elif data == "interval_up":
s = storage.get_settings()
storage.update_settings({"batch_interval": min(120, s.get("batch_interval", 30) + 5)})
await _show_settings_menu(query)
elif data == "interval_down":
s = storage.get_settings()
storage.update_settings({"batch_interval": max(5, s.get("batch_interval", 30) - 5)})
await _show_settings_menu(query)
async def handle_text(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""处理普通文本消息(用于添加关键词)"""
user_id = update.effective_user.id
if not _check_admin(user_id):
return
if user_id not in _waiting_kw_add:
return
_waiting_kw_add.discard(user_id)
text = update.message.text.strip()
added = []
for kw in text.split():
if storage.add_keyword(kw):
added.append(kw)
if added:
await update.message.reply_text(
f"✅ 已添加关键词: {', '.join(added)}",
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("🔑 关键词管理", callback_data="keywords_menu")
]]),
)
else:
await update.message.reply_text("⚠️ 关键词已存在或无效")
# ========== 后台抓取任务 ==========
# 汇总队列评分6-7的新闻暂存
_batch_queue: list = []
_last_batch_push: float = 0.0
_pushed_in_memory: set = set() # 内存去重,防止并发重复推送
async def _fetch_and_process(app: Application):
"""核心抓取+处理循环"""
settings = storage.get_settings()
enabled = settings.get("sources", {})
push_on = settings.get("push_enabled", True)
min_score = settings.get("min_score", 6)
instant_score = settings.get("instant_score", 8)
logger.info("开始抓取新闻...")
try:
all_news = await sources.fetch_all(enabled)
except Exception as e:
logger.error(f"fetch_all 异常: {e}")
return
if not all_news:
return
# 评分
scorer.score_and_filter(all_news, settings.get("keywords", []))
# 去重(全局标题去重)
all_news = scorer.dedup_news(all_news)
# 过滤已推送(内存+文件双重去重)
pushed_ids = storage.get_pushed_ids() | _pushed_in_memory
new_items = [n for n in all_news if n["id"] not in pushed_ids and n.get("score", 0) >= min_score]
if not new_items:
return
# 保存到存储
storage.save_news(new_items)
if not push_on:
return
instant_items = [n for n in new_items if n.get("score", 0) >= instant_score]
batch_items = [n for n in new_items if min_score <= n.get("score", 0) < instant_score]
# 即时推送
for item in instant_items:
await _push_instant(app, item)
# 加入汇总队列(去重)
existing_ids = {n["id"] for n in _batch_queue}
for item in batch_items:
if item["id"] not in existing_ids:
_batch_queue.append(item)
existing_ids.add(item["id"])
# 标记已推送(即时+批量都标记,防止重复)
pushed = [n["id"] for n in instant_items + batch_items]
_pushed_in_memory.update(pushed)
storage.mark_pushed(pushed)
async def _push_instant(app: Application, item: dict):
"""立即推送单条重磅新闻"""
score = item.get("score", 0)
if score >= 9:
emoji = "🔥🔥🔥"
elif score >= 8:
emoji = "⚡ 重磅"
else:
emoji = "📌"
src = item.get("source_name", "")
title = item.get("title", "")
ts = item.get("time_str", "")
url = item.get("url", "")
import html as html_mod
title_safe = html_mod.escape(title)
if url:
title_display = f'<a href="{url}">{title_safe}</a>'
else:
title_display = f"<b>{title_safe}</b>"
text = f"{emoji}\n\n{title_display}\n\n📡 {src} 🕐 {ts} 评分:{score}"
try:
await app.bot.send_message(
chat_id=ADMIN_ID, text=text, parse_mode=ParseMode.HTML,
)
except Exception as e:
logger.error(f"即时推送失败: {e}")
try:
await app.bot.send_message(chat_id=ADMIN_ID, text=f"{emoji}\n\n{title}\n\n{src} {ts}")
except Exception as e2:
logger.error(f"即时推送备用失败: {e2}")
async def _push_batch(app: Application):
"""汇总推送每N分钟一次"""
global _batch_queue, _last_batch_push
if not _batch_queue:
return
settings = storage.get_settings()
interval = settings.get("batch_interval", 30) * 60
now = time.time()
if now - _last_batch_push < interval:
return
items = _batch_queue[:]
_batch_queue.clear()
_last_batch_push = now
# 标记已推送
storage.mark_pushed([n["id"] for n in items])
if not items:
return
import html as html_mod
from collections import defaultdict
# 按来源分组
source_map = defaultdict(list)
for item in sorted(items, key=lambda x: x.get("score", 0), reverse=True):
src = item.get("source_name", "未知")
source_map[src].append(item)
source_emoji = {
"金十数据": "💰", "华尔街见闻": "📈", "36氪": "🚀",
"新浪财经": "📊", "Google News": "🌐",
"Finviz": "📉", "TechCrunch": "💻",
}
lines = [f"🟡 <b>快讯汇总</b>{len(items)}条)"]
lines.append("")
for src, src_items in source_map.items():
emoji = source_emoji.get(src, "📰")
lines.append(f"{emoji} <b>{src}</b>{len(src_items)}")
for item in src_items[:10]:
title = item.get("title", "")
url = item.get("url", "")
title_safe = html_mod.escape(title)
if url:
lines.append(f" • <a href=\"{url}\">{title_safe}</a>")
else:
lines.append(f"{title_safe}")
if len(src_items) > 10:
lines.append(f" ... 还有{len(src_items) - 10}")
lines.append("")
lines.append(f"{datetime.now().strftime('%H:%M')}")
text = "\n".join(lines)
try:
await app.bot.send_message(
chat_id=ADMIN_ID, text=text, parse_mode=ParseMode.HTML,
)
except Exception as e:
logger.error(f"汇总推送失败: {e}")
try:
await app.bot.send_message(chat_id=ADMIN_ID, text="\n".join(lines[:15]))
except Exception as e2:
logger.error(f"汇总推送备用失败: {e2}")
async def _send_scheduled_summary(app: Application, period: str):
"""发送定时总结"""
settings = storage.get_settings()
start_ts, end_ts = summarizer.get_period_range(period)
news_list = storage.get_news(limit=500, since_ts=start_ts)
news_list = [n for n in news_list if n.get("timestamp", 0) <= end_ts]
scorer.score_and_filter(news_list, settings.get("keywords", []))
news_list = [n for n in news_list if n.get("score", 0) >= settings.get("min_score", 6)]
text = summarizer.generate_summary(news_list, period)
try:
await app.bot.send_message(
chat_id=ADMIN_ID, text=text, parse_mode=ParseMode.HTML,
)
except Exception as e:
logger.error(f"定时总结推送失败: {e}")
try:
await app.bot.send_message(chat_id=ADMIN_ID, text=text)
except Exception:
pass
# ========== 调度器 ==========
class Scheduler:
"""轻量调度器,基于 asyncio"""
def __init__(self, app: Application):
self.app = app
self._tasks: list[asyncio.Task] = []
def start(self):
self._tasks.append(asyncio.create_task(self._fast_loop()))
self._tasks.append(asyncio.create_task(self._slow_loop()))
self._tasks.append(asyncio.create_task(self._batch_loop()))
self._tasks.append(asyncio.create_task(self._summary_loop()))
logger.info("调度器已启动")
async def _fast_loop(self):
"""金十/华尔街见闻 每2分钟抓取"""
while True:
try:
await _fetch_and_process(self.app)
except Exception as e:
logger.error(f"fast_loop 异常: {e}")
await asyncio.sleep(120)
async def _slow_loop(self):
"""36氪/新浪 每5分钟额外触发fast_loop已包含全部这里可扩展"""
await asyncio.sleep(60) # 错开启动时间
while True:
try:
await _fetch_and_process(self.app)
except Exception as e:
logger.error(f"slow_loop 异常: {e}")
await asyncio.sleep(300)
async def _batch_loop(self):
"""每分钟检查是否需要汇总推送"""
await asyncio.sleep(30)
while True:
try:
await _push_batch(self.app)
except Exception as e:
logger.error(f"batch_loop 异常: {e}")
await asyncio.sleep(60)
async def _summary_loop(self):
"""定时总结 08:00 / 11:30 / 20:00"""
schedule = [
(8, 0, "昨晚到今早"),
(11, 30, "上午"),
(20, 0, "全天"),
]
triggered = set()
while True:
now = datetime.now()
key = f"{now.date()}"
for hour, minute, period in schedule:
tid = f"{key}_{hour}_{minute}"
if tid not in triggered:
# 在目标时间 ±2 分钟内触发
target = now.replace(hour=hour, minute=minute, second=0)
diff = abs((now - target).total_seconds())
if diff <= 120:
triggered.add(tid)
try:
await _send_scheduled_summary(self.app, period)
except Exception as e:
logger.error(f"定时总结异常: {e}")
# 每分钟检查一次
await asyncio.sleep(60)
def stop(self):
for t in self._tasks:
t.cancel()
# ========== 主函数 ==========
def main():
if not BOT_TOKEN:
logger.error("请设置 BOT_TOKEN 环境变量")
return
logger.info(f"机器人启动ADMIN_ID={ADMIN_ID}")
app = Application.builder().token(BOT_TOKEN).build()
# 注册命令
app.add_handler(CommandHandler("start", cmd_start))
app.add_handler(CommandHandler("news", cmd_news))
app.add_handler(CommandHandler("summary", cmd_summary))
app.add_handler(CommandHandler("sources", cmd_sources))
app.add_handler(CommandHandler("keywords", cmd_keywords))
app.add_handler(CommandHandler("settings", cmd_settings))
# 注册回调
app.add_handler(CallbackQueryHandler(handle_callback))
# 注册文本消息(用于添加关键词)
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_text))
# 启动调度器(在 post_init 中启动,确保 event loop 已就绪)
scheduler = Scheduler(app)
async def post_init(application: Application):
scheduler.start()
logger.info("调度器已在 post_init 中启动")
app.post_init = post_init
logger.info("开始 polling...")
app.run_polling(drop_pending_updates=True)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,3 @@
python-telegram-bot==21.3
httpx==0.27.0
apscheduler==3.10.4

122
projects/news-bot/scorer.py Normal file
View File

@@ -0,0 +1,122 @@
"""
评分和过滤模块 - 基于关键词权重 + 规则引擎
不调用外部 AI API纯本地规则评分
"""
import re
import logging
logger = logging.getLogger(__name__)
# 高权重关键词(出现即加分)
HIGH_WEIGHT_KEYWORDS = {
# AI / 算力相关
"英伟达": 3, "NVIDIA": 3, "AMD": 2, "算力": 2, "AI": 2,
"光模块": 2, "存储": 1, "芯片": 2, "GPU": 2, "大模型": 2,
"ChatGPT": 2, "OpenAI": 2, "Anthropic": 2, "DeepSeek": 2,
# 中国科技公司
"华为": 2, "腾讯": 2, "阿里": 2, "字节": 2, "小米": 2,
"百度": 1, "京东": 1, "美团": 1, "拼多多": 1, "比亚迪": 2,
# 重磅事件
"收购": 2, "突破": 2, "泄露": 3, "内幕": 3, "传闻": 2,
"重磅": 3, "突发": 3, "暴涨": 2, "暴跌": 2, "崩盘": 3,
"熔断": 3, "制裁": 2, "禁令": 2, "封锁": 2,
# 股市相关
"A股": 1, "港股": 1, "美股": 1, "涨停": 2, "跌停": 2,
"IPO": 2, "上市": 1, "退市": 2, "回购": 1,
# 宏观
"降息": 2, "加息": 2, "美联储": 2, "央行": 1, "GDP": 1,
"CPI": 1, "非农": 2, "失业率": 1,
}
# 低价值关键词(出现则减分)
LOW_VALUE_PATTERNS = [
r"广告", r"推广", r"优惠券", r"免费领",
r"点击.*查看", r"扫码", r"关注.*公众号",
]
def score_news(news_item: dict, user_keywords: list = None) -> int:
"""
对单条新闻评分,返回 0-10 分
评分规则:
- 基础分 3 分
- 匹配高权重关键词加分
- 匹配用户自定义关键词加分
- 信息源标记为重要加分
- 匹配低价值模式减分
- 最终分数限制在 0-10
"""
title = news_item.get("title", "")
if not title:
return 0
score = 3 # 基础分
# 1. 高权重关键词匹配
for keyword, weight in HIGH_WEIGHT_KEYWORDS.items():
if keyword.lower() in title.lower():
score += weight
# 2. 用户自定义关键词匹配
if user_keywords:
for kw in user_keywords:
if kw.lower() in title.lower():
score += 1
# 3. 信息源标记为重要
if news_item.get("important"):
score += 2
# 4. 低价值内容减分
for pattern in LOW_VALUE_PATTERNS:
if re.search(pattern, title):
score -= 3
# 5. 标题太短可能是无效内容
if len(title) < 10:
score -= 2
return max(0, min(10, score))
def is_similar(title1: str, title2: str, threshold: float = 0.7) -> bool:
"""
简单的标题相似度判断
使用字符级别的 Jaccard 相似度
"""
if not title1 or not title2:
return False
# 去除标点和空格
clean1 = re.sub(r"[^\w]", "", title1)
clean2 = re.sub(r"[^\w]", "", title2)
if not clean1 or not clean2:
return False
# 2-gram 集合
set1 = set(clean1[i:i+2] for i in range(len(clean1)-1))
set2 = set(clean2[i:i+2] for i in range(len(clean2)-1))
if not set1 or not set2:
return clean1 == clean2
intersection = set1 & set2
union = set1 | set2
return len(intersection) / len(union) >= threshold
def dedup_news(news_list: list) -> list:
"""去重:基于标题相似度,保留最早的一条"""
result = []
for item in news_list:
is_dup = False
for existing in result:
if is_similar(item.get("title", ""), existing.get("title", "")):
is_dup = True
break
if not is_dup:
result.append(item)
return result
def score_and_filter(news_list: list, user_keywords: list = None) -> list:
"""批量评分并过滤,返回带评分的新闻列表"""
for item in news_list:
item["score"] = score_news(item, user_keywords)
return news_list

View File

@@ -0,0 +1,374 @@
"""
信息源抓取模块 - 支持金十、华尔街见闻、36氪、新浪财经、Google News、Finviz、TechCrunch
"""
import re
import time
import hashlib
import logging
import httpx
import xml.etree.ElementTree as ET
from datetime import datetime
from email.utils import parsedate_to_datetime
logger = logging.getLogger(__name__)
# 请求超时设置
TIMEOUT = 10
# 翻译缓存,避免重复翻译
_translate_cache = {}
async def translate_to_zh(text: str) -> str:
"""用 Google Translate 免费接口将英文翻译为中文"""
if not text:
return text
# 检测是否主要是中文,是则跳过
zh_count = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
if zh_count > len(text) * 0.3:
return text
# 查缓存
cache_key = text[:100]
if cache_key in _translate_cache:
return _translate_cache[cache_key]
try:
async with httpx.AsyncClient(timeout=8) as client:
resp = await client.get(
"https://translate.googleapis.com/translate_a/single",
params={"client": "gtx", "sl": "en", "tl": "zh-CN", "dt": "t", "q": text[:500]},
)
result = resp.json()
translated = "".join(seg[0] for seg in result[0] if seg[0])
_translate_cache[cache_key] = translated
# 限制缓存大小
if len(_translate_cache) > 500:
keys = list(_translate_cache.keys())[:200]
for k in keys:
del _translate_cache[k]
return translated
except Exception as e:
logger.error(f"翻译失败: {e}")
return text
def _make_id(source: str, title: str) -> str:
"""生成新闻唯一 ID"""
raw = f"{source}:{title}"
return hashlib.md5(raw.encode()).hexdigest()[:16]
def _safe_ts(val, default=0) -> int:
"""安全转换时间戳"""
try:
if isinstance(val, (int, float)):
# 如果是毫秒级时间戳,转为秒
return int(val) if val < 2000000000 else int(val / 1000)
if isinstance(val, str):
# 尝试解析常见格式
for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"]:
try:
return int(datetime.strptime(val[:19], fmt[:len(val)+2]).timestamp())
except ValueError:
continue
except Exception:
pass
return default or int(time.time())
async def fetch_jin10() -> list:
"""抓取金十数据快讯"""
url = "https://flash-api.jin10.com/get_flash_list"
params = {"channel": "-8200", "vip": "1", "max_time": "", "t": "1"}
headers = {"x-app-id": "bVBF4FyRTn5NJF5n", "x-version": "1.0.0"}
results = []
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, params=params, headers=headers)
data = resp.json().get("data", [])
for item in data:
# 金十的 data 字段可能是 dict 或直接文本
content = ""
if isinstance(item.get("data"), dict):
content = item["data"].get("content", "") or item["data"].get("title", "")
elif isinstance(item.get("data"), str):
content = item["data"]
# 备用:用 content 字段
if not content:
content = item.get("content", "")
if not content:
continue
# 清理 HTML 标签
import re
content = re.sub(r"<[^>]+>", "", content).strip()
if not content:
continue
ts = _safe_ts(item.get("time", ""))
news_id = item.get("id", "")
news_url = f"https://www.jin10.com/flash_detail/{news_id}.html" if news_id else ""
results.append({
"id": _make_id("jin10", content[:80]),
"source": "jin10",
"source_name": "金十数据",
"title": content[:200],
"url": news_url,
"timestamp": ts,
"time_str": item.get("time", ""),
"important": item.get("important", 0) == 1,
})
except Exception as e:
logger.error(f"金十数据抓取失败: {e}")
return results
async def fetch_wallstreet() -> list:
"""抓取华尔街见闻快讯"""
url = "https://api-one.wallstcn.com/apiv1/content/lives"
params = {"channel": "global-channel", "limit": "20"}
results = []
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, params=params)
items = resp.json().get("data", {}).get("items", [])
for item in items:
title = item.get("content_text", "") or item.get("title", "")
if not title:
continue
import re
title = re.sub(r"<[^>]+>", "", title).strip()
if not title:
continue
ts = _safe_ts(item.get("display_time", 0))
news_url = item.get("uri", "") or ""
results.append({
"id": _make_id("wallstreet", title[:80]),
"source": "wallstreet",
"source_name": "华尔街见闻",
"title": title[:200],
"url": news_url,
"timestamp": ts,
"time_str": datetime.fromtimestamp(ts).strftime("%H:%M:%S") if ts else "",
"important": item.get("is_important", False),
})
except Exception as e:
logger.error(f"华尔街见闻抓取失败: {e}")
return results
async def fetch_kr36() -> list:
"""抓取36氪快讯"""
url = "https://36kr.com/api/newsflash"
params = {"per_page": "20"}
results = []
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, params=params)
items = resp.json().get("data", {}).get("items", [])
for item in items:
title = item.get("title", "") or item.get("entity_name", "")
if not title:
continue
ts = _safe_ts(item.get("published_at", ""))
news_id = item.get("id", "")
news_url = f"https://36kr.com/newsflashes/{news_id}" if news_id else ""
results.append({
"id": _make_id("kr36", title[:80]),
"source": "kr36",
"source_name": "36氪",
"title": title[:200],
"url": news_url,
"timestamp": ts,
"time_str": datetime.fromtimestamp(ts).strftime("%H:%M:%S") if ts else "",
"important": False,
})
except Exception as e:
logger.error(f"36氪抓取失败: {e}")
return results
async def fetch_sina() -> list:
"""抓取新浪财经快讯"""
url = "https://feed.mix.sina.com.cn/api/roll/get"
params = {"pageid": "153", "lid": "2516", "k": "", "num": "20", "page": "1"}
results = []
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, params=params)
data = resp.json().get("result", {}).get("data", [])
for item in data:
title = item.get("title", "")
if not title:
continue
import re
title = re.sub(r"<[^>]+>", "", title).strip()
if not title:
continue
ts = _safe_ts(item.get("ctime", item.get("createtime", 0)))
news_url = item.get("url", "") or item.get("link", "") or ""
results.append({
"id": _make_id("sina", title[:80]),
"source": "sina",
"source_name": "新浪财经",
"title": title[:200],
"url": news_url,
"timestamp": ts,
"time_str": datetime.fromtimestamp(ts).strftime("%H:%M:%S") if ts else "",
"important": False,
})
except Exception as e:
logger.error(f"新浪财经抓取失败: {e}")
return results
async def fetch_google_news() -> list:
"""抓取 Google News 科技频道 RSS聚合路透社/彭博社等)"""
url = "https://news.google.com/rss/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRGRqTVhZU0FtVnVHZ0pWVXlnQVAB"
params = {"hl": "en-US", "gl": "US", "ceid": "US:en"}
results = []
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, params=params, headers={"User-Agent": "Mozilla/5.0"})
root = ET.fromstring(resp.text)
for item in root.findall(".//item")[:20]:
title = item.findtext("title", "").strip()
if not title:
continue
# 去掉来源后缀 " - Bloomberg.com" 等
source_tag = title.rsplit(" - ", 1)[-1] if " - " in title else ""
news_url = item.findtext("link", "").strip()
ts = 0
pub = item.findtext("pubDate", "")
if pub:
try:
ts = int(parsedate_to_datetime(pub).timestamp())
except Exception:
ts = int(time.time())
title_zh = await translate_to_zh(title)
results.append({
"id": _make_id("google", title[:80]),
"source": "google",
"source_name": f"Google News ({source_tag})" if source_tag else "Google News",
"title": title_zh[:200],
"url": news_url,
"timestamp": ts,
"time_str": datetime.fromtimestamp(ts).strftime("%H:%M:%S") if ts else "",
"important": False,
})
except Exception as e:
logger.error(f"Google News 抓取失败: {e}")
return results
async def fetch_finviz() -> list:
"""抓取 Finviz 美股财经新闻"""
url = "https://finviz.com/news.ashx"
results = []
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
# 解析 HTML 中的新闻标题和链接
matches = re.findall(r'class="nn-tab-link"[^>]*href="([^"]*)"[^>]*>([^<]+)', resp.text)
if not matches:
# 备用:只提取标题
titles = re.findall(r'class="nn-tab-link"[^>]*>([^<]+)', resp.text)
matches = [("", t) for t in titles]
for link, title in matches[:20]:
title = title.strip()
if not title:
continue
title_zh = await translate_to_zh(title)
results.append({
"id": _make_id("finviz", title[:80]),
"source": "finviz",
"source_name": "Finviz",
"title": title_zh[:200],
"url": link or "",
"timestamp": int(time.time()),
"time_str": datetime.now().strftime("%H:%M:%S"),
"important": False,
})
except Exception as e:
logger.error(f"Finviz 抓取失败: {e}")
return results
async def fetch_techcrunch() -> list:
"""抓取 TechCrunch RSS 科技新闻"""
url = "https://techcrunch.com/feed/"
results = []
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
root = ET.fromstring(resp.text)
for item in root.findall(".//item")[:20]:
title = item.findtext("title", "").strip()
if not title:
continue
news_url = item.findtext("link", "").strip()
ts = 0
pub = item.findtext("pubDate", "")
if pub:
try:
ts = int(parsedate_to_datetime(pub).timestamp())
except Exception:
ts = int(time.time())
title_zh = await translate_to_zh(title)
results.append({
"id": _make_id("techcrunch", title[:80]),
"source": "techcrunch",
"source_name": "TechCrunch",
"title": title_zh[:200],
"url": news_url,
"timestamp": ts,
"time_str": datetime.fromtimestamp(ts).strftime("%H:%M:%S") if ts else "",
"important": False,
})
except Exception as e:
logger.error(f"TechCrunch 抓取失败: {e}")
return results
# 源名称 → 抓取函数映射
SOURCE_FETCHERS = {
"jin10": fetch_jin10,
"wallstreet": fetch_wallstreet,
"kr36": fetch_kr36,
"sina": fetch_sina,
"google": fetch_google_news,
"finviz": fetch_finviz,
"techcrunch": fetch_techcrunch,
}
SOURCE_NAMES = {
"jin10": "金十数据",
"wallstreet": "华尔街见闻",
"kr36": "36氪",
"sina": "新浪财经",
"google": "Google News",
"finviz": "Finviz",
"techcrunch": "TechCrunch",
}
async def fetch_all(enabled_sources: dict = None) -> list:
"""抓取所有启用的信息源,返回合并后的新闻列表"""
import asyncio
if enabled_sources is None:
enabled_sources = {k: True for k in SOURCE_FETCHERS}
tasks = []
for name, fetcher in SOURCE_FETCHERS.items():
if enabled_sources.get(name, True):
tasks.append(fetcher())
all_news = []
results = await asyncio.gather(*tasks, return_exceptions=True)
for result in results:
if isinstance(result, Exception):
logger.error(f"抓取异常: {result}")
continue
if isinstance(result, list):
all_news.extend(result)
# 按时间排序,最新的在前
all_news.sort(key=lambda x: x.get("timestamp", 0), reverse=True)
return all_news

View File

@@ -0,0 +1,187 @@
"""
数据存储模块 - 使用 JSON 文件持久化
"""
import json
import os
import time
import logging
from pathlib import Path
from threading import Lock
logger = logging.getLogger(__name__)
DATA_DIR = os.environ.get("DATA_DIR", "/opt/news-bot/data")
# 文件锁,防止并发写入
_locks = {}
def _get_lock(name: str) -> Lock:
if name not in _locks:
_locks[name] = Lock()
return _locks[name]
def _ensure_dir():
"""确保数据目录存在"""
Path(DATA_DIR).mkdir(parents=True, exist_ok=True)
def _read_json(filename: str, default=None):
"""读取 JSON 文件"""
filepath = os.path.join(DATA_DIR, filename)
try:
if os.path.exists(filepath):
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.error(f"读取 {filename} 失败: {e}")
return default if default is not None else {}
def _write_json(filename: str, data):
"""写入 JSON 文件"""
_ensure_dir()
filepath = os.path.join(DATA_DIR, filename)
try:
with open(filepath, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
except Exception as e:
logger.error(f"写入 {filename} 失败: {e}")
# ========== 新闻存储 ==========
def save_news(news_list: list):
"""保存新闻列表(追加,自动去重)"""
with _get_lock("news"):
existing = _read_json("news.json", [])
existing_ids = {n.get("id") for n in existing}
added = 0
for item in news_list:
if item.get("id") not in existing_ids:
existing.append(item)
existing_ids.add(item["id"])
added += 1
# 只保留最近 2000 条
if len(existing) > 2000:
existing = existing[-2000:]
_write_json("news.json", existing)
return added
def get_news(limit=50, since_ts=0) -> list:
"""获取新闻,可按时间过滤"""
news = _read_json("news.json", [])
if since_ts:
news = [n for n in news if n.get("timestamp", 0) >= since_ts]
return news[-limit:]
def get_news_by_score(min_score=6, since_ts=0) -> list:
"""按评分获取新闻"""
news = get_news(limit=500, since_ts=since_ts)
return [n for n in news if n.get("score", 0) >= min_score]
# ========== 已推送记录 ==========
def mark_pushed(news_ids: list):
"""标记新闻为已推送"""
with _get_lock("pushed"):
pushed = set(_read_json("pushed.json", []))
pushed.update(news_ids)
# 只保留最近 5000 条
pushed_list = list(pushed)[-5000:]
_write_json("pushed.json", pushed_list)
def is_pushed(news_id: str) -> bool:
"""检查新闻是否已推送"""
pushed = set(_read_json("pushed.json", []))
return news_id in pushed
def get_pushed_ids() -> set:
"""获取所有已推送 ID"""
return set(_read_json("pushed.json", []))
# ========== 用户设置 ==========
DEFAULT_SETTINGS = {
"push_enabled": True,
"batch_interval": 30, # 汇总推送间隔(分钟)
"min_score": 6, # 最低推送评分
"instant_score": 8, # 即时推送评分阈值
"sources": {
"jin10": True,
"wallstreet": True,
"kr36": True,
"sina": True,
},
"keywords": [
"AI", "英伟达", "NVIDIA", "AMD", "存储", "光模块", "算力",
"华为", "腾讯", "阿里", "字节", "小米",
"收购", "突破", "泄露", "内幕", "传闻", "重磅",
"A股", "港股", "美股",
],
}
def get_settings() -> dict:
"""获取用户设置"""
settings = _read_json("settings.json", None)
if settings is None:
settings = DEFAULT_SETTINGS.copy()
_write_json("settings.json", settings)
# 补全缺失字段
for k, v in DEFAULT_SETTINGS.items():
if k not in settings:
settings[k] = v
return settings
def update_settings(updates: dict):
"""更新用户设置"""
with _get_lock("settings"):
settings = get_settings()
settings.update(updates)
_write_json("settings.json", settings)
return settings
def toggle_source(source_name: str) -> bool:
"""切换信息源开关,返回新状态"""
with _get_lock("settings"):
settings = get_settings()
current = settings.get("sources", {}).get(source_name, True)
settings.setdefault("sources", {})[source_name] = not current
_write_json("settings.json", settings)
return not current
def add_keyword(keyword: str) -> bool:
"""添加关键词,返回是否成功"""
with _get_lock("settings"):
settings = get_settings()
kw_list = settings.get("keywords", [])
if keyword in kw_list:
return False
kw_list.append(keyword)
settings["keywords"] = kw_list
_write_json("settings.json", settings)
return True
def remove_keyword(keyword: str) -> bool:
"""删除关键词,返回是否成功"""
with _get_lock("settings"):
settings = get_settings()
kw_list = settings.get("keywords", [])
if keyword not in kw_list:
return False
kw_list.remove(keyword)
settings["keywords"] = kw_list
_write_json("settings.json", settings)
return True

View File

@@ -0,0 +1,97 @@
"""
新闻总结生成模块
使用规则引擎生成中文总结(不调用外部 AI API
"""
import logging
from datetime import datetime, timedelta
from collections import defaultdict
logger = logging.getLogger(__name__)
def generate_summary(news_list: list, period: str = "全天") -> str:
"""
生成新闻总结
period: "最近1小时" / "上午" / "全天" / "昨晚到今早"
"""
if not news_list:
return f"📭 {period}暂无重要新闻"
# 按评分排序,高分在前
sorted_news = sorted(news_list, key=lambda x: x.get("score", 0), reverse=True)
# 按来源分组统计
source_count = defaultdict(int)
for n in sorted_news:
source_count[n.get("source_name", "未知")] += 1
# 构建总结
lines = []
lines.append(f"📊 *{period}新闻总结*")
lines.append(f"━━━━━━━━━━━━━━━")
lines.append(f"📰 共 {len(sorted_news)} 条快讯")
lines.append("")
# 🔴 重磅新闻(评分 >= 8
hot_news = [n for n in sorted_news if n.get("score", 0) >= 8]
if hot_news:
lines.append("🔴 *重磅快讯*")
for n in hot_news[:5]:
emoji = _score_emoji(n.get("score", 0))
lines.append(f" {emoji} {n['title']}")
lines.append("")
# 🟡 值得关注(评分 6-7
mid_news = [n for n in sorted_news if 6 <= n.get("score", 0) <= 7]
if mid_news:
lines.append("🟡 *值得关注*")
for n in mid_news[:8]:
lines.append(f"{n['title']}")
lines.append("")
# 来源统计
lines.append("📡 *来源分布*")
for src, cnt in sorted(source_count.items(), key=lambda x: x[1], reverse=True):
lines.append(f" {src}: {cnt}")
lines.append("")
lines.append(f"⏰ 生成时间: {datetime.now().strftime('%H:%M')}")
return "\n".join(lines)
def _score_emoji(score: int) -> str:
"""根据评分返回 emoji"""
if score >= 9:
return "🔥"
elif score >= 8:
return ""
elif score >= 7:
return "📌"
elif score >= 6:
return "📎"
else:
return ""
def get_period_range(period: str) -> tuple:
"""
根据时间段名称返回 (start_ts, end_ts)
"""
now = datetime.now()
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
if period == "最近1小时":
start = now - timedelta(hours=1)
elif period == "上午":
start = today_start.replace(hour=6)
now = min(now, today_start.replace(hour=12))
elif period == "全天":
start = today_start
elif period == "昨晚到今早":
start = (today_start - timedelta(days=1)).replace(hour=20)
now = today_start.replace(hour=8)
else:
start = now - timedelta(hours=3)
return int(start.timestamp()), int(now.timestamp())