""" 数据存储模块 - 使用 JSON 文件持久化 """ import json import os import time import logging from pathlib import Path from threading import Lock logger = logging.getLogger(__name__) DATA_DIR = os.environ.get("DATA_DIR", "/opt/news-bot/data") # 文件锁,防止并发写入 _locks = {} def _get_lock(name: str) -> Lock: if name not in _locks: _locks[name] = Lock() return _locks[name] def _ensure_dir(): """确保数据目录存在""" Path(DATA_DIR).mkdir(parents=True, exist_ok=True) def _read_json(filename: str, default=None): """读取 JSON 文件""" filepath = os.path.join(DATA_DIR, filename) try: if os.path.exists(filepath): with open(filepath, "r", encoding="utf-8") as f: return json.load(f) except Exception as e: logger.error(f"读取 {filename} 失败: {e}") return default if default is not None else {} def _write_json(filename: str, data): """写入 JSON 文件""" _ensure_dir() filepath = os.path.join(DATA_DIR, filename) try: with open(filepath, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) except Exception as e: logger.error(f"写入 {filename} 失败: {e}") # ========== 新闻存储 ========== def save_news(news_list: list): """保存新闻列表(追加,自动去重)""" with _get_lock("news"): existing = _read_json("news.json", []) existing_ids = {n.get("id") for n in existing} added = 0 for item in news_list: if item.get("id") not in existing_ids: existing.append(item) existing_ids.add(item["id"]) added += 1 # 只保留最近 2000 条 if len(existing) > 2000: existing = existing[-2000:] _write_json("news.json", existing) return added def get_news(limit=50, since_ts=0) -> list: """获取新闻,可按时间过滤""" news = _read_json("news.json", []) if since_ts: news = [n for n in news if n.get("timestamp", 0) >= since_ts] return news[-limit:] def get_news_by_score(min_score=6, since_ts=0) -> list: """按评分获取新闻""" news = get_news(limit=500, since_ts=since_ts) return [n for n in news if n.get("score", 0) >= min_score] # ========== 已推送记录 ========== def mark_pushed(news_ids: list): """标记新闻为已推送""" with _get_lock("pushed"): pushed = set(_read_json("pushed.json", [])) pushed.update(news_ids) # 只保留最近 5000 条 pushed_list = list(pushed)[-5000:] _write_json("pushed.json", pushed_list) def is_pushed(news_id: str) -> bool: """检查新闻是否已推送""" pushed = set(_read_json("pushed.json", [])) return news_id in pushed def get_pushed_ids() -> set: """获取所有已推送 ID""" return set(_read_json("pushed.json", [])) # ========== 用户设置 ========== DEFAULT_SETTINGS = { "push_enabled": True, "batch_interval": 30, # 汇总推送间隔(分钟) "min_score": 6, # 最低推送评分 "instant_score": 8, # 即时推送评分阈值 "sources": { "jin10": True, "wallstreet": True, "kr36": True, "sina": True, }, "keywords": [ "AI", "英伟达", "NVIDIA", "AMD", "存储", "光模块", "算力", "华为", "腾讯", "阿里", "字节", "小米", "收购", "突破", "泄露", "内幕", "传闻", "重磅", "A股", "港股", "美股", ], } def get_settings() -> dict: """获取用户设置""" settings = _read_json("settings.json", None) if settings is None: settings = DEFAULT_SETTINGS.copy() _write_json("settings.json", settings) # 补全缺失字段 for k, v in DEFAULT_SETTINGS.items(): if k not in settings: settings[k] = v return settings def update_settings(updates: dict): """更新用户设置""" with _get_lock("settings"): settings = get_settings() settings.update(updates) _write_json("settings.json", settings) return settings def toggle_source(source_name: str) -> bool: """切换信息源开关,返回新状态""" with _get_lock("settings"): settings = get_settings() current = settings.get("sources", {}).get(source_name, True) settings.setdefault("sources", {})[source_name] = not current _write_json("settings.json", settings) return not current def add_keyword(keyword: str) -> bool: """添加关键词,返回是否成功""" with _get_lock("settings"): settings = get_settings() kw_list = settings.get("keywords", []) if keyword in kw_list: return False kw_list.append(keyword) settings["keywords"] = kw_list _write_json("settings.json", settings) return True def remove_keyword(keyword: str) -> bool: """删除关键词,返回是否成功""" with _get_lock("settings"): settings = get_settings() kw_list = settings.get("keywords", []) if keyword not in kw_list: return False kw_list.remove(keyword) settings["keywords"] = kw_list _write_json("settings.json", settings) return True