188 lines
5.2 KiB
Python
188 lines
5.2 KiB
Python
|
|
"""
|
||
|
|
数据存储模块 - 使用 JSON 文件持久化
|
||
|
|
"""
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import time
|
||
|
|
import logging
|
||
|
|
from pathlib import Path
|
||
|
|
from threading import Lock
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
DATA_DIR = os.environ.get("DATA_DIR", "/opt/news-bot/data")
|
||
|
|
|
||
|
|
# 文件锁,防止并发写入
|
||
|
|
_locks = {}
|
||
|
|
|
||
|
|
|
||
|
|
def _get_lock(name: str) -> Lock:
|
||
|
|
if name not in _locks:
|
||
|
|
_locks[name] = Lock()
|
||
|
|
return _locks[name]
|
||
|
|
|
||
|
|
|
||
|
|
def _ensure_dir():
|
||
|
|
"""确保数据目录存在"""
|
||
|
|
Path(DATA_DIR).mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
|
||
|
|
def _read_json(filename: str, default=None):
|
||
|
|
"""读取 JSON 文件"""
|
||
|
|
filepath = os.path.join(DATA_DIR, filename)
|
||
|
|
try:
|
||
|
|
if os.path.exists(filepath):
|
||
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
||
|
|
return json.load(f)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"读取 {filename} 失败: {e}")
|
||
|
|
return default if default is not None else {}
|
||
|
|
|
||
|
|
|
||
|
|
def _write_json(filename: str, data):
|
||
|
|
"""写入 JSON 文件"""
|
||
|
|
_ensure_dir()
|
||
|
|
filepath = os.path.join(DATA_DIR, filename)
|
||
|
|
try:
|
||
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
||
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"写入 {filename} 失败: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
# ========== 新闻存储 ==========
|
||
|
|
|
||
|
|
def save_news(news_list: list):
|
||
|
|
"""保存新闻列表(追加,自动去重)"""
|
||
|
|
with _get_lock("news"):
|
||
|
|
existing = _read_json("news.json", [])
|
||
|
|
existing_ids = {n.get("id") for n in existing}
|
||
|
|
added = 0
|
||
|
|
for item in news_list:
|
||
|
|
if item.get("id") not in existing_ids:
|
||
|
|
existing.append(item)
|
||
|
|
existing_ids.add(item["id"])
|
||
|
|
added += 1
|
||
|
|
# 只保留最近 2000 条
|
||
|
|
if len(existing) > 2000:
|
||
|
|
existing = existing[-2000:]
|
||
|
|
_write_json("news.json", existing)
|
||
|
|
return added
|
||
|
|
|
||
|
|
|
||
|
|
def get_news(limit=50, since_ts=0) -> list:
|
||
|
|
"""获取新闻,可按时间过滤"""
|
||
|
|
news = _read_json("news.json", [])
|
||
|
|
if since_ts:
|
||
|
|
news = [n for n in news if n.get("timestamp", 0) >= since_ts]
|
||
|
|
return news[-limit:]
|
||
|
|
|
||
|
|
|
||
|
|
def get_news_by_score(min_score=6, since_ts=0) -> list:
|
||
|
|
"""按评分获取新闻"""
|
||
|
|
news = get_news(limit=500, since_ts=since_ts)
|
||
|
|
return [n for n in news if n.get("score", 0) >= min_score]
|
||
|
|
|
||
|
|
|
||
|
|
# ========== 已推送记录 ==========
|
||
|
|
|
||
|
|
def mark_pushed(news_ids: list):
|
||
|
|
"""标记新闻为已推送"""
|
||
|
|
with _get_lock("pushed"):
|
||
|
|
pushed = set(_read_json("pushed.json", []))
|
||
|
|
pushed.update(news_ids)
|
||
|
|
# 只保留最近 5000 条
|
||
|
|
pushed_list = list(pushed)[-5000:]
|
||
|
|
_write_json("pushed.json", pushed_list)
|
||
|
|
|
||
|
|
|
||
|
|
def is_pushed(news_id: str) -> bool:
|
||
|
|
"""检查新闻是否已推送"""
|
||
|
|
pushed = set(_read_json("pushed.json", []))
|
||
|
|
return news_id in pushed
|
||
|
|
|
||
|
|
|
||
|
|
def get_pushed_ids() -> set:
|
||
|
|
"""获取所有已推送 ID"""
|
||
|
|
return set(_read_json("pushed.json", []))
|
||
|
|
|
||
|
|
|
||
|
|
# ========== 用户设置 ==========
|
||
|
|
|
||
|
|
DEFAULT_SETTINGS = {
|
||
|
|
"push_enabled": True,
|
||
|
|
"batch_interval": 30, # 汇总推送间隔(分钟)
|
||
|
|
"min_score": 6, # 最低推送评分
|
||
|
|
"instant_score": 8, # 即时推送评分阈值
|
||
|
|
"sources": {
|
||
|
|
"jin10": True,
|
||
|
|
"wallstreet": True,
|
||
|
|
"kr36": True,
|
||
|
|
"sina": True,
|
||
|
|
},
|
||
|
|
"keywords": [
|
||
|
|
"AI", "英伟达", "NVIDIA", "AMD", "存储", "光模块", "算力",
|
||
|
|
"华为", "腾讯", "阿里", "字节", "小米",
|
||
|
|
"收购", "突破", "泄露", "内幕", "传闻", "重磅",
|
||
|
|
"A股", "港股", "美股",
|
||
|
|
],
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def get_settings() -> dict:
|
||
|
|
"""获取用户设置"""
|
||
|
|
settings = _read_json("settings.json", None)
|
||
|
|
if settings is None:
|
||
|
|
settings = DEFAULT_SETTINGS.copy()
|
||
|
|
_write_json("settings.json", settings)
|
||
|
|
# 补全缺失字段
|
||
|
|
for k, v in DEFAULT_SETTINGS.items():
|
||
|
|
if k not in settings:
|
||
|
|
settings[k] = v
|
||
|
|
return settings
|
||
|
|
|
||
|
|
|
||
|
|
def update_settings(updates: dict):
|
||
|
|
"""更新用户设置"""
|
||
|
|
with _get_lock("settings"):
|
||
|
|
settings = get_settings()
|
||
|
|
settings.update(updates)
|
||
|
|
_write_json("settings.json", settings)
|
||
|
|
return settings
|
||
|
|
|
||
|
|
|
||
|
|
def toggle_source(source_name: str) -> bool:
|
||
|
|
"""切换信息源开关,返回新状态"""
|
||
|
|
with _get_lock("settings"):
|
||
|
|
settings = get_settings()
|
||
|
|
current = settings.get("sources", {}).get(source_name, True)
|
||
|
|
settings.setdefault("sources", {})[source_name] = not current
|
||
|
|
_write_json("settings.json", settings)
|
||
|
|
return not current
|
||
|
|
|
||
|
|
|
||
|
|
def add_keyword(keyword: str) -> bool:
|
||
|
|
"""添加关键词,返回是否成功"""
|
||
|
|
with _get_lock("settings"):
|
||
|
|
settings = get_settings()
|
||
|
|
kw_list = settings.get("keywords", [])
|
||
|
|
if keyword in kw_list:
|
||
|
|
return False
|
||
|
|
kw_list.append(keyword)
|
||
|
|
settings["keywords"] = kw_list
|
||
|
|
_write_json("settings.json", settings)
|
||
|
|
return True
|
||
|
|
|
||
|
|
|
||
|
|
def remove_keyword(keyword: str) -> bool:
|
||
|
|
"""删除关键词,返回是否成功"""
|
||
|
|
with _get_lock("settings"):
|
||
|
|
settings = get_settings()
|
||
|
|
kw_list = settings.get("keywords", [])
|
||
|
|
if keyword not in kw_list:
|
||
|
|
return False
|
||
|
|
kw_list.remove(keyword)
|
||
|
|
settings["keywords"] = kw_list
|
||
|
|
_write_json("settings.json", settings)
|
||
|
|
return True
|