Rename to hkt.sh

2026-03-21 01:10:53 +08:00
parent 76a263d0f9
commit 8f1171fe99
6676 changed files with 1724268 additions and 0 deletions
--- a/scripts/tmdb_emby_sync.py
+++ b/scripts/tmdb_emby_sync.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3 -u
+"""
+Batch update Emby episode overviews from TMDB Chinese metadata.
+Run with: python3 -u scripts/tmdb_emby_sync.py
+"""
+
+import json
+import re
+import sys
+import time
+import urllib.request
+import urllib.parse
+import urllib.error
+from html import unescape
+
+EMBY_URL = "http://145.239.143.92:8096"
+API_KEY = "e3e52b1dcb8b47c39d46b5256bf87081"
+ADMIN_UID = "0f026d40c1e04bb7a099aab75a501614"
+
+SERIES = [
+    ("小猪佩奇", "13", 12225),
+    ("安全警长啦咘啦哆", "18", 219799),
+    ("动物神探队", "11", 195407),
+    ("啦咘啦哆警长大战羚羚羊", "12", 253041),
+    ("布鲁伊", "1548", 82728),
+    ("汪汪队立大功", "14", 57532),
+    ("小恐龙大冒险", "1547", 82027),
+    ("海底小纵队", "16", 37472),
+    ("海底小纵队：中国之旅", "17", 132983),
+    ("小马宝莉：友谊大魔法", "15", 20085),
+]
+
+
+def api_get(path, params=None):
+    if params is None:
+        params = {}
+    params["api_key"] = API_KEY
+    url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}"
+    with urllib.request.urlopen(url, timeout=30) as resp:
+        return json.loads(resp.read())
+
+
+def api_post(path, data, params=None):
+    if params is None:
+        params = {}
+    params["api_key"] = API_KEY
+    url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}"
+    body = json.dumps(data).encode("utf-8")
+    req = urllib.request.Request(url, data=body, method="POST")
+    req.add_header("Content-Type", "application/json")
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.status
+
+
+def fetch_all_episodes():
+    """Fetch all episodes once, return dict keyed by SeriesId."""
+    from collections import defaultdict
+    by_series = defaultdict(list)
+    start = 0
+    while True:
+        data = api_get("/Items", {
+            "Recursive": "true",
+            "IncludeItemTypes": "Episode",
+            "Fields": "Overview,ParentIndexNumber,IndexNumber,SeriesId,SeriesName",
+            "StartIndex": str(start),
+            "Limit": "200",
+        })
+        for item in data["Items"]:
+            by_series[str(item.get("SeriesId", ""))].append(item)
+        start += len(data["Items"])
+        if start >= data["TotalRecordCount"]:
+            break
+    return by_series
+
+_all_eps = None
+def fetch_emby_episodes(series_id):
+    global _all_eps
+    if _all_eps is None:
+        _all_eps = fetch_all_episodes()
+    return _all_eps.get(str(series_id), [])
+
+
+def fetch_and_parse_tmdb_season(tmdb_id, season_num):
+    """Fetch TMDB season page and parse episode overviews."""
+    url = f"https://www.themoviedb.org/tv/{tmdb_id}/season/{season_num}?language=zh-CN"
+    req = urllib.request.Request(url, headers={
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+    })
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            html = resp.read().decode("utf-8")
+    except urllib.error.HTTPError as e:
+        if e.code == 404:
+            return {}
+        raise
+
+    episodes = {}
+    # Phrases that indicate no real content on TMDB
+    PLACEHOLDER_PHRASES = [
+        "暂无英文版的简介",
+        "We don't have an overview",
+        "No overview",
+        "请添加内容帮助我们完善数据库",
+    ]
+
+    cards = re.split(r'<div class="card"', html)
+    for card in cards[1:]:
+        ep_match = re.search(r'data-episode-number="(\d+)"', card)
+        if not ep_match:
+            continue
+        ep_num = int(ep_match.group(1))
+        ov_match = re.search(
+            r'<div class="overview">\s*<p>(.*?)</p>', card, re.DOTALL
+        )
+        if ov_match:
+            overview = re.sub(r'<[^>]+>', '', ov_match.group(1)).strip()
+            overview = unescape(overview).strip()
+            # Skip placeholder/empty overviews
+            if len(overview) < 5:
+                continue
+            if any(ph in overview for ph in PLACEHOLDER_PHRASES):
+                continue
+            episodes[ep_num] = overview
+    return episodes
+
+
+def needs_update(ep):
+    ov = ep.get("Overview", "")
+    return not ov or len(ov.strip()) < 5
+
+
+def process_series(series_name, series_id, tmdb_id):
+    print(f"\n{'='*50}", flush=True)
+    print(f"{series_name} (Emby:{series_id} TMDB:{tmdb_id})", flush=True)
+
+    emby_eps = fetch_emby_episodes(series_id)
+    missing = [e for e in emby_eps if needs_update(e)]
+    print(f"Total: {len(emby_eps)}, Missing: {len(missing)}", flush=True)
+
+    if not missing:
+        print("Nothing to update.", flush=True)
+        return 0
+
+    seasons = sorted(set(e.get("ParentIndexNumber", 0) for e in missing))
+    print(f"Seasons: {seasons}", flush=True)
+
+    updated = 0
+    no_tmdb = 0
+
+    for sn in seasons:
+        print(f"  S{sn:02d}: fetching TMDB...", end=" ", flush=True)
+        try:
+            tmdb_eps = fetch_and_parse_tmdb_season(tmdb_id, sn)
+            print(f"{len(tmdb_eps)} eps found", flush=True)
+        except Exception as e:
+            print(f"ERROR: {e}", flush=True)
+            continue
+
+        time.sleep(1.5)
+
+        season_missing = [
+            e for e in missing if e.get("ParentIndexNumber") == sn
+        ]
+
+        for ep in season_missing:
+            ep_num = ep.get("IndexNumber")
+            if ep_num is None:
+                continue
+
+            overview = tmdb_eps.get(ep_num)
+            if not overview:
+                no_tmdb += 1
+                continue
+
+            try:
+                item = api_get(f"/Users/{ADMIN_UID}/Items/{ep['Id']}")
+                item["Overview"] = overview
+                api_post(f"/Items/{ep['Id']}", item)
+                updated += 1
+                print(f"    ✓ E{ep_num:02d}: {overview[:50]}", flush=True)
+            except Exception as e:
+                print(f"    ✗ E{ep_num:02d}: {e}", flush=True)
+
+            time.sleep(0.2)
+
+    print(f"  Done: {updated} updated, {no_tmdb} no TMDB data", flush=True)
+    return updated
+
+
+def main():
+    target = sys.argv[1] if len(sys.argv) > 1 else None
+    total = 0
+    for name, sid, tid in SERIES:
+        if target and target not in name:
+            continue
+        total += process_series(name, sid, tid)
+    print(f"\n{'='*50}", flush=True)
+    print(f"TOTAL UPDATED: {total}", flush=True)
+
+
+if __name__ == "__main__":
+    main()