#!/usr/bin/env python3 -u """ Batch update Emby episode overviews from TMDB Chinese metadata. Run with: python3 -u scripts/tmdb_emby_sync.py """ import json import re import sys import time import urllib.request import urllib.parse import urllib.error from html import unescape EMBY_URL = "http://145.239.143.92:8096" API_KEY = "e3e52b1dcb8b47c39d46b5256bf87081" ADMIN_UID = "0f026d40c1e04bb7a099aab75a501614" SERIES = [ ("小猪佩奇", "13", 12225), ("安全警长啦咘啦哆", "18", 219799), ("动物神探队", "11", 195407), ("啦咘啦哆警长大战羚羚羊", "12", 253041), ("布鲁伊", "1548", 82728), ("汪汪队立大功", "14", 57532), ("小恐龙大冒险", "1547", 82027), ("海底小纵队", "16", 37472), ("海底小纵队:中国之旅", "17", 132983), ("小马宝莉:友谊大魔法", "15", 20085), ] def api_get(path, params=None): if params is None: params = {} params["api_key"] = API_KEY url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}" with urllib.request.urlopen(url, timeout=30) as resp: return json.loads(resp.read()) def api_post(path, data, params=None): if params is None: params = {} params["api_key"] = API_KEY url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}" body = json.dumps(data).encode("utf-8") req = urllib.request.Request(url, data=body, method="POST") req.add_header("Content-Type", "application/json") with urllib.request.urlopen(req, timeout=30) as resp: return resp.status def fetch_all_episodes(): """Fetch all episodes once, return dict keyed by SeriesId.""" from collections import defaultdict by_series = defaultdict(list) start = 0 while True: data = api_get("/Items", { "Recursive": "true", "IncludeItemTypes": "Episode", "Fields": "Overview,ParentIndexNumber,IndexNumber,SeriesId,SeriesName", "StartIndex": str(start), "Limit": "200", }) for item in data["Items"]: by_series[str(item.get("SeriesId", ""))].append(item) start += len(data["Items"]) if start >= data["TotalRecordCount"]: break return by_series _all_eps = None def fetch_emby_episodes(series_id): global _all_eps if _all_eps is None: _all_eps = fetch_all_episodes() return _all_eps.get(str(series_id), []) def fetch_and_parse_tmdb_season(tmdb_id, season_num): """Fetch TMDB season page and parse episode overviews.""" url = f"https://www.themoviedb.org/tv/{tmdb_id}/season/{season_num}?language=zh-CN" req = urllib.request.Request(url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Accept-Language": "zh-CN,zh;q=0.9", }) try: with urllib.request.urlopen(req, timeout=30) as resp: html = resp.read().decode("utf-8") except urllib.error.HTTPError as e: if e.code == 404: return {} raise episodes = {} # Phrases that indicate no real content on TMDB PLACEHOLDER_PHRASES = [ "暂无英文版的简介", "We don't have an overview", "No overview", "请添加内容帮助我们完善数据库", ] cards = re.split(r'
(.*?)
', card, re.DOTALL ) if ov_match: overview = re.sub(r'<[^>]+>', '', ov_match.group(1)).strip() overview = unescape(overview).strip() # Skip placeholder/empty overviews if len(overview) < 5: continue if any(ph in overview for ph in PLACEHOLDER_PHRASES): continue episodes[ep_num] = overview return episodes def needs_update(ep): ov = ep.get("Overview", "") return not ov or len(ov.strip()) < 5 def process_series(series_name, series_id, tmdb_id): print(f"\n{'='*50}", flush=True) print(f"{series_name} (Emby:{series_id} TMDB:{tmdb_id})", flush=True) emby_eps = fetch_emby_episodes(series_id) missing = [e for e in emby_eps if needs_update(e)] print(f"Total: {len(emby_eps)}, Missing: {len(missing)}", flush=True) if not missing: print("Nothing to update.", flush=True) return 0 seasons = sorted(set(e.get("ParentIndexNumber", 0) for e in missing)) print(f"Seasons: {seasons}", flush=True) updated = 0 no_tmdb = 0 for sn in seasons: print(f" S{sn:02d}: fetching TMDB...", end=" ", flush=True) try: tmdb_eps = fetch_and_parse_tmdb_season(tmdb_id, sn) print(f"{len(tmdb_eps)} eps found", flush=True) except Exception as e: print(f"ERROR: {e}", flush=True) continue time.sleep(1.5) season_missing = [ e for e in missing if e.get("ParentIndexNumber") == sn ] for ep in season_missing: ep_num = ep.get("IndexNumber") if ep_num is None: continue overview = tmdb_eps.get(ep_num) if not overview: no_tmdb += 1 continue try: item = api_get(f"/Users/{ADMIN_UID}/Items/{ep['Id']}") item["Overview"] = overview api_post(f"/Items/{ep['Id']}", item) updated += 1 print(f" ✓ E{ep_num:02d}: {overview[:50]}", flush=True) except Exception as e: print(f" ✗ E{ep_num:02d}: {e}", flush=True) time.sleep(0.2) print(f" Done: {updated} updated, {no_tmdb} no TMDB data", flush=True) return updated def main(): target = sys.argv[1] if len(sys.argv) > 1 else None total = 0 for name, sid, tid in SERIES: if target and target not in name: continue total += process_series(name, sid, tid) print(f"\n{'='*50}", flush=True) print(f"TOTAL UPDATED: {total}", flush=True) if __name__ == "__main__": main()