Rename to hkt.sh

This commit is contained in:
mango
2026-03-21 01:10:53 +08:00
parent 76a263d0f9
commit 8f1171fe99
6676 changed files with 1724268 additions and 0 deletions

203
scripts/tmdb_emby_sync.py Normal file
View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3 -u
"""
Batch update Emby episode overviews from TMDB Chinese metadata.
Run with: python3 -u scripts/tmdb_emby_sync.py
"""
import json
import re
import sys
import time
import urllib.request
import urllib.parse
import urllib.error
from html import unescape
EMBY_URL = "http://145.239.143.92:8096"
API_KEY = "e3e52b1dcb8b47c39d46b5256bf87081"
ADMIN_UID = "0f026d40c1e04bb7a099aab75a501614"
SERIES = [
("小猪佩奇", "13", 12225),
("安全警长啦咘啦哆", "18", 219799),
("动物神探队", "11", 195407),
("啦咘啦哆警长大战羚羚羊", "12", 253041),
("布鲁伊", "1548", 82728),
("汪汪队立大功", "14", 57532),
("小恐龙大冒险", "1547", 82027),
("海底小纵队", "16", 37472),
("海底小纵队:中国之旅", "17", 132983),
("小马宝莉:友谊大魔法", "15", 20085),
]
def api_get(path, params=None):
if params is None:
params = {}
params["api_key"] = API_KEY
url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}"
with urllib.request.urlopen(url, timeout=30) as resp:
return json.loads(resp.read())
def api_post(path, data, params=None):
if params is None:
params = {}
params["api_key"] = API_KEY
url = f"{EMBY_URL}{path}?{urllib.parse.urlencode(params)}"
body = json.dumps(data).encode("utf-8")
req = urllib.request.Request(url, data=body, method="POST")
req.add_header("Content-Type", "application/json")
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.status
def fetch_all_episodes():
"""Fetch all episodes once, return dict keyed by SeriesId."""
from collections import defaultdict
by_series = defaultdict(list)
start = 0
while True:
data = api_get("/Items", {
"Recursive": "true",
"IncludeItemTypes": "Episode",
"Fields": "Overview,ParentIndexNumber,IndexNumber,SeriesId,SeriesName",
"StartIndex": str(start),
"Limit": "200",
})
for item in data["Items"]:
by_series[str(item.get("SeriesId", ""))].append(item)
start += len(data["Items"])
if start >= data["TotalRecordCount"]:
break
return by_series
_all_eps = None
def fetch_emby_episodes(series_id):
global _all_eps
if _all_eps is None:
_all_eps = fetch_all_episodes()
return _all_eps.get(str(series_id), [])
def fetch_and_parse_tmdb_season(tmdb_id, season_num):
"""Fetch TMDB season page and parse episode overviews."""
url = f"https://www.themoviedb.org/tv/{tmdb_id}/season/{season_num}?language=zh-CN"
req = urllib.request.Request(url, headers={
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Accept-Language": "zh-CN,zh;q=0.9",
})
try:
with urllib.request.urlopen(req, timeout=30) as resp:
html = resp.read().decode("utf-8")
except urllib.error.HTTPError as e:
if e.code == 404:
return {}
raise
episodes = {}
# Phrases that indicate no real content on TMDB
PLACEHOLDER_PHRASES = [
"暂无英文版的简介",
"We don't have an overview",
"No overview",
"请添加内容帮助我们完善数据库",
]
cards = re.split(r'<div class="card"', html)
for card in cards[1:]:
ep_match = re.search(r'data-episode-number="(\d+)"', card)
if not ep_match:
continue
ep_num = int(ep_match.group(1))
ov_match = re.search(
r'<div class="overview">\s*<p>(.*?)</p>', card, re.DOTALL
)
if ov_match:
overview = re.sub(r'<[^>]+>', '', ov_match.group(1)).strip()
overview = unescape(overview).strip()
# Skip placeholder/empty overviews
if len(overview) < 5:
continue
if any(ph in overview for ph in PLACEHOLDER_PHRASES):
continue
episodes[ep_num] = overview
return episodes
def needs_update(ep):
ov = ep.get("Overview", "")
return not ov or len(ov.strip()) < 5
def process_series(series_name, series_id, tmdb_id):
print(f"\n{'='*50}", flush=True)
print(f"{series_name} (Emby:{series_id} TMDB:{tmdb_id})", flush=True)
emby_eps = fetch_emby_episodes(series_id)
missing = [e for e in emby_eps if needs_update(e)]
print(f"Total: {len(emby_eps)}, Missing: {len(missing)}", flush=True)
if not missing:
print("Nothing to update.", flush=True)
return 0
seasons = sorted(set(e.get("ParentIndexNumber", 0) for e in missing))
print(f"Seasons: {seasons}", flush=True)
updated = 0
no_tmdb = 0
for sn in seasons:
print(f" S{sn:02d}: fetching TMDB...", end=" ", flush=True)
try:
tmdb_eps = fetch_and_parse_tmdb_season(tmdb_id, sn)
print(f"{len(tmdb_eps)} eps found", flush=True)
except Exception as e:
print(f"ERROR: {e}", flush=True)
continue
time.sleep(1.5)
season_missing = [
e for e in missing if e.get("ParentIndexNumber") == sn
]
for ep in season_missing:
ep_num = ep.get("IndexNumber")
if ep_num is None:
continue
overview = tmdb_eps.get(ep_num)
if not overview:
no_tmdb += 1
continue
try:
item = api_get(f"/Users/{ADMIN_UID}/Items/{ep['Id']}")
item["Overview"] = overview
api_post(f"/Items/{ep['Id']}", item)
updated += 1
print(f" ✓ E{ep_num:02d}: {overview[:50]}", flush=True)
except Exception as e:
print(f" ✗ E{ep_num:02d}: {e}", flush=True)
time.sleep(0.2)
print(f" Done: {updated} updated, {no_tmdb} no TMDB data", flush=True)
return updated
def main():
target = sys.argv[1] if len(sys.argv) > 1 else None
total = 0
for name, sid, tid in SERIES:
if target and target not in name:
continue
total += process_series(name, sid, tid)
print(f"\n{'='*50}", flush=True)
print(f"TOTAL UPDATED: {total}", flush=True)
if __name__ == "__main__":
main()