204 lines
6.2 KiB
Python
204 lines
6.2 KiB
Python
#!/usr/bin/env python3 -u
|
|
"""
|
|
Batch update Emby episode overviews from TMDB Chinese metadata.
|
|
Run with: python3 -u scripts/tmdb_emby_sync.py
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
import time
|
|
import urllib.request
|
|
import urllib.parse
|
|
import urllib.error
|
|
from html import unescape
|
|
|
|
EMBY_URL = "http://145.239.143.92:8096"
|
|
API_KEY = "e3e52b1dcb8b47c39d46b5256bf87081"
|
|
ADMIN_UID = "0f026d40c1e04bb7a099aab75a501614"
|
|
|
|
SERIES = [
|
|
("小猪佩奇", "13", 12225),
|
|
("安全警长啦咘啦哆", "18", 219799),
|
|
("动物神探队", "11", 195407),
|
|
("啦咘啦哆警长大战羚羚羊", "12", 253041),
|
|
("布鲁伊", "1548", 82728),
|
|
("汪汪队立大功", "14", 57532),
|
|
("小恐龙大冒险", "1547", 82027),
|
|
("海底小纵队", "16", 37472),
|
|
("海底小纵队:中国之旅", "17", 132983),
|
|
("小马宝莉:友谊大魔法", "15", 20085),
|
|
]
|
|
|
|
|
|
def api_get(path, params=None):
|
|
if params is None:
|
|
params = {}
|
|
params["api_key"] = API_KEY
|
|
url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}"
|
|
with urllib.request.urlopen(url, timeout=30) as resp:
|
|
return json.loads(resp.read())
|
|
|
|
|
|
def api_post(path, data, params=None):
|
|
if params is None:
|
|
params = {}
|
|
params["api_key"] = API_KEY
|
|
url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}"
|
|
body = json.dumps(data).encode("utf-8")
|
|
req = urllib.request.Request(url, data=body, method="POST")
|
|
req.add_header("Content-Type", "application/json")
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
return resp.status
|
|
|
|
|
|
def fetch_all_episodes():
|
|
"""Fetch all episodes once, return dict keyed by SeriesId."""
|
|
from collections import defaultdict
|
|
by_series = defaultdict(list)
|
|
start = 0
|
|
while True:
|
|
data = api_get("/Items", {
|
|
"Recursive": "true",
|
|
"IncludeItemTypes": "Episode",
|
|
"Fields": "Overview,ParentIndexNumber,IndexNumber,SeriesId,SeriesName",
|
|
"StartIndex": str(start),
|
|
"Limit": "200",
|
|
})
|
|
for item in data["Items"]:
|
|
by_series[str(item.get("SeriesId", ""))].append(item)
|
|
start += len(data["Items"])
|
|
if start >= data["TotalRecordCount"]:
|
|
break
|
|
return by_series
|
|
|
|
_all_eps = None
|
|
def fetch_emby_episodes(series_id):
|
|
global _all_eps
|
|
if _all_eps is None:
|
|
_all_eps = fetch_all_episodes()
|
|
return _all_eps.get(str(series_id), [])
|
|
|
|
|
|
def fetch_and_parse_tmdb_season(tmdb_id, season_num):
|
|
"""Fetch TMDB season page and parse episode overviews."""
|
|
url = f"https://www.themoviedb.org/tv/{tmdb_id}/season/{season_num}?language=zh-CN"
|
|
req = urllib.request.Request(url, headers={
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
"Accept-Language": "zh-CN,zh;q=0.9",
|
|
})
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
html = resp.read().decode("utf-8")
|
|
except urllib.error.HTTPError as e:
|
|
if e.code == 404:
|
|
return {}
|
|
raise
|
|
|
|
episodes = {}
|
|
# Phrases that indicate no real content on TMDB
|
|
PLACEHOLDER_PHRASES = [
|
|
"暂无英文版的简介",
|
|
"We don't have an overview",
|
|
"No overview",
|
|
"请添加内容帮助我们完善数据库",
|
|
]
|
|
|
|
cards = re.split(r'<div class="card"', html)
|
|
for card in cards[1:]:
|
|
ep_match = re.search(r'data-episode-number="(\d+)"', card)
|
|
if not ep_match:
|
|
continue
|
|
ep_num = int(ep_match.group(1))
|
|
ov_match = re.search(
|
|
r'<div class="overview">\s*<p>(.*?)</p>', card, re.DOTALL
|
|
)
|
|
if ov_match:
|
|
overview = re.sub(r'<[^>]+>', '', ov_match.group(1)).strip()
|
|
overview = unescape(overview).strip()
|
|
# Skip placeholder/empty overviews
|
|
if len(overview) < 5:
|
|
continue
|
|
if any(ph in overview for ph in PLACEHOLDER_PHRASES):
|
|
continue
|
|
episodes[ep_num] = overview
|
|
return episodes
|
|
|
|
|
|
def needs_update(ep):
|
|
ov = ep.get("Overview", "")
|
|
return not ov or len(ov.strip()) < 5
|
|
|
|
|
|
def process_series(series_name, series_id, tmdb_id):
|
|
print(f"\n{'='*50}", flush=True)
|
|
print(f"{series_name} (Emby:{series_id} TMDB:{tmdb_id})", flush=True)
|
|
|
|
emby_eps = fetch_emby_episodes(series_id)
|
|
missing = [e for e in emby_eps if needs_update(e)]
|
|
print(f"Total: {len(emby_eps)}, Missing: {len(missing)}", flush=True)
|
|
|
|
if not missing:
|
|
print("Nothing to update.", flush=True)
|
|
return 0
|
|
|
|
seasons = sorted(set(e.get("ParentIndexNumber", 0) for e in missing))
|
|
print(f"Seasons: {seasons}", flush=True)
|
|
|
|
updated = 0
|
|
no_tmdb = 0
|
|
|
|
for sn in seasons:
|
|
print(f" S{sn:02d}: fetching TMDB...", end=" ", flush=True)
|
|
try:
|
|
tmdb_eps = fetch_and_parse_tmdb_season(tmdb_id, sn)
|
|
print(f"{len(tmdb_eps)} eps found", flush=True)
|
|
except Exception as e:
|
|
print(f"ERROR: {e}", flush=True)
|
|
continue
|
|
|
|
time.sleep(1.5)
|
|
|
|
season_missing = [
|
|
e for e in missing if e.get("ParentIndexNumber") == sn
|
|
]
|
|
|
|
for ep in season_missing:
|
|
ep_num = ep.get("IndexNumber")
|
|
if ep_num is None:
|
|
continue
|
|
|
|
overview = tmdb_eps.get(ep_num)
|
|
if not overview:
|
|
no_tmdb += 1
|
|
continue
|
|
|
|
try:
|
|
item = api_get(f"/Users/{ADMIN_UID}/Items/{ep['Id']}")
|
|
item["Overview"] = overview
|
|
api_post(f"/Items/{ep['Id']}", item)
|
|
updated += 1
|
|
print(f" ✓ E{ep_num:02d}: {overview[:50]}", flush=True)
|
|
except Exception as e:
|
|
print(f" ✗ E{ep_num:02d}: {e}", flush=True)
|
|
|
|
time.sleep(0.2)
|
|
|
|
print(f" Done: {updated} updated, {no_tmdb} no TMDB data", flush=True)
|
|
return updated
|
|
|
|
|
|
def main():
|
|
target = sys.argv[1] if len(sys.argv) > 1 else None
|
|
total = 0
|
|
for name, sid, tid in SERIES:
|
|
if target and target not in name:
|
|
continue
|
|
total += process_series(name, sid, tid)
|
|
print(f"\n{'='*50}", flush=True)
|
|
print(f"TOTAL UPDATED: {total}", flush=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|