Jelajahi Sumber

[videos] Switch to TMDB for scraping videos

Colin Powell 4 bulan lalu
induk
melakukan
24ac545f55

+ 48 - 1
poetry.lock

@@ -1065,6 +1065,21 @@ ssh = ["bcrypt (>=3.1.5)"]
 test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]
 
+[[package]]
+name = "dacite"
+version = "1.9.2"
+description = "Simple creation of data classes from dictionaries."
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "dacite-1.9.2-py3-none-any.whl", hash = "sha256:053f7c3f5128ca2e9aceb66892b1a3c8936d02c686e707bee96e19deef4bc4a0"},
+    {file = "dacite-1.9.2.tar.gz", hash = "sha256:6ccc3b299727c7aa17582f0021f6ae14d5de47c7227932c47fec4cdfefd26f09"},
+]
+
+[package.extras]
+dev = ["black", "coveralls", "mypy", "pre-commit", "pylint", "pytest (>=5)", "pytest-benchmark", "pytest-cov"]
+
 [[package]]
 name = "dataclass-wizard"
 version = "0.22.0"
@@ -4610,6 +4625,23 @@ files = [
 [package.dependencies]
 rapidfuzz = ">=3.0.0,<4.0.0"
 
+[[package]]
+name = "themoviedb"
+version = "1.0.2"
+description = "A modern and easy to use API wrapper for The Movie Database (TMDb) API v3 written in Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "themoviedb-1.0.2-py3-none-any.whl", hash = "sha256:badf85e91010c7085509f40270bf2a40ea30ee5ef3ed6fb3ec332c5e50adb576"},
+    {file = "themoviedb-1.0.2.tar.gz", hash = "sha256:7835615142a44e7ca25e48645a3a3c5e06b382e8c518c38c3537effa9a2596ce"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.8.4"
+dacite = ">=1.8.0"
+requests = ">=2.31.0"
+
 [[package]]
 name = "time-machine"
 version = "2.16.0"
@@ -4710,6 +4742,21 @@ files = [
     {file = "tld-0.13.tar.gz", hash = "sha256:93dde5e1c04bdf1844976eae440706379d21f4ab235b73c05d7483e074fb5629"},
 ]
 
+[[package]]
+name = "tmdbv3api"
+version = "1.9.0"
+description = "A lightweight Python library for The Movie Database (TMDb) API."
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "tmdbv3api-1.9.0-py3-none-any.whl", hash = "sha256:2bcd8c6e8902397860715a71045f200ecc3ee06804ecf786cb4c1e09b2deeba8"},
+    {file = "tmdbv3api-1.9.0.tar.gz", hash = "sha256:504c5da6b99c4516ff160a01576112d097f209c0534f943c15c4b56cbd92c33b"},
+]
+
+[package.dependencies]
+requests = "*"
+
 [[package]]
 name = "toml"
 version = "0.10.2"
@@ -5452,4 +5499,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9,<3.12"
-content-hash = "cdd7f577fe3a4c5c8cc960e0070d93b7ddbb2a7968fab63d72bb039afaa05bbe"
+content-hash = "3a483aefea0a3afebf187b17b7df72a158788024ca8121b512b39567fb5ec8ca"

+ 2 - 0
pyproject.toml

@@ -54,6 +54,8 @@ meta-yt = "^0.1.9"
 berserk = "^0.13.2"
 poetry-bumpversion = "^0.3.3"
 orgparse = "^0.4.20250520"
+tmdbv3api = "^1.9.0"
+themoviedb = "^1.0.2"
 
 [tool.poetry.group.test]
 optional = true

+ 4 - 2
vrobbler/apps/videos/metadata.py

@@ -25,6 +25,7 @@ class VideoMetadata:
         60  # Silly default, but things break if this is 0 or null
     )
     imdb_id: Optional[str]
+    tmdb_id: Optional[str]
     youtube_id: Optional[str]
 
     # IMDB specific
@@ -35,6 +36,7 @@ class VideoMetadata:
     tv_series_id: Optional[int]
     plot: Optional[str]
     imdb_rating: Optional[str]
+    tmdb_rating: Optional[str]
     cover_url: Optional[str]
     overview: Optional[str]
 
@@ -59,6 +61,6 @@ class VideoMetadata:
         video_dict = vars(self)
         cover = None
         if "cover_url" in video_dict.keys():
-            cover = video_dict.pop("cover_url")
-        genres = video_dict.pop("genres")
+            cover = video_dict.pop("cover_url", "")
+        genres = video_dict.pop("genres", [])
         return video_dict, cover, genres

+ 18 - 0
vrobbler/apps/videos/migrations/0023_video_tmdb_rating.py

@@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-06-13 15:04
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('videos', '0022_alter_video_run_time_seconds'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='video',
+            name='tmdb_rating',
+            field=models.FloatField(blank=True, null=True),
+        ),
+    ]

+ 5 - 1
vrobbler/apps/videos/models.py

@@ -20,6 +20,7 @@ from scrobbles.mixins import (
 from taggit.managers import TaggableManager
 from videos.metadata import VideoMetadata
 from videos.sources.imdb import lookup_video_from_imdb
+from videos.sources.tmdb import lookup_video_from_tmdb
 from videos.sources.youtube import lookup_video_from_youtube
 
 YOUTUBE_VIDEO_URL = "https://www.youtube.com/watch?v="
@@ -206,6 +207,7 @@ class Video(ScrobblableMixin):
     next_imdb_id = models.CharField(max_length=20, **BNULL)
     imdb_id = models.CharField(max_length=20, **BNULL)
     imdb_rating = models.FloatField(**BNULL)
+    tmdb_rating = models.FloatField(**BNULL)
     cover_image = models.ImageField(upload_to="videos/video/", **BNULL)
     cover_image_small = ImageSpecField(
         source="cover_image",
@@ -307,11 +309,13 @@ class Video(ScrobblableMixin):
     def get_from_imdb_id(
         cls, imdb_id: str, overwrite: bool = False
     ) -> "Video":
+        if "tt" in imdb_id:
+            imdb_id = imdb_id[2:]
         video, created = cls.objects.get_or_create(imdb_id=imdb_id)
         if not created and not overwrite:
             return video
 
-        vdict, cover, genres = lookup_video_from_imdb(
+        vdict, cover, genres = lookup_video_from_tmdb(
             imdb_id
         ).as_dict_with_cover_and_genres()
         if created or overwrite:

+ 77 - 0
vrobbler/apps/videos/sources/tmdb.py

@@ -0,0 +1,77 @@
+import logging
+
+from django.conf import settings
+from themoviedb import TMDb
+from tmdbv3api import TV, TMDb as TMDb_direct
+from videos.metadata import VideoMetadata, VideoType
+
+key = getattr(settings, "TMDB_API_KEY", "33de8d24785931068ae356510dcfbac8")
+key = "33de8d24785931068ae356510dcfbac8"
+
+tmdb_direct = TMDb_direct()
+tmdb_direct.api_key = "33de8d24785931068ae356510dcfbac8"
+
+tmdb = TMDb(key=key, language="en-US", region="US")
+
+TMDB_STILL_URL = "https://image.tmdb.org/t/p/original"
+
+logger = logging.getLogger(__name__)
+
+
+def lookup_video_from_tmdb(
+    name_or_id: str, kind: str = "movie"
+) -> VideoMetadata:
+    from videos.models import Series
+
+    imdb_id = name_or_id
+    if name_or_id.startswith("tt"):
+        imdb_id = name_or_id[2:]
+
+    video_metadata = VideoMetadata(imdb_id=imdb_id)
+    imdb_result: dict = {}
+
+    tmdb_result = tmdb.find().by_imdb("tt" + imdb_id)
+
+    if not tmdb_result:
+        logger.info(
+            "[lookup_video_from_tmdb] no video found on tmdb",
+            extra={"name_or_id": name_or_id},
+        )
+        return None
+
+    video_metadata = VideoMetadata(imdb_id=imdb_id)
+
+    media = None
+    show = None
+    if len(tmdb_result.movie_results) > 0:
+        media = tmdb_result.movie_results[0]
+        video_metadata.video_type = VideoType.MOVIE.value
+    if len(tmdb_result.tv_episode_results) > 0:
+        video_metadata.video_type = VideoType.TV_EPISODE.value
+        media = tmdb_result.tv_episode_results[0]
+        series_imdb_id = TV().external_ids(media.show_id).imdb_id[2:]
+
+        series, created = Series.objects.get_or_create(imdb_id=series_imdb_id)
+        if created:
+            show_data = TV().details(tv_id=media.show_id)
+            series.name = show_data.name
+            series.save()
+        video_metadata.tv_series_id = series.id
+
+    if not media:
+        logger.warning("Video not found on TMDB", extra={"imdb_id":imdb_id})
+        return video_metadata
+
+    video_metadata.tmdb_id = media.id
+    video_metadata.cover_url = TMDB_STILL_URL + media.still_path # TODO: enrich this with TMDB url
+    video_metadata.run_time_seconds = media.runtime * 60
+    video_metadata.title = media.name
+    video_metadata.episode_number = media.episode_number
+    video_metadata.season_number = media.season_number
+    #video_metadata.next_imdb_id = imdb_result.get("next episode", None)
+    video_metadata.year = media.air_date.year
+    video_metadata.plot = media.overview
+    video_metadata.imdb_rating = media.vote_average
+    #video_metadata.genres = imdb_result.get("genres", [])
+
+    return video_metadata