فهرست منبع

[podcasts] Fixes enrichment of podcasts with podcastindex

Colin Powell 1 ماه پیش
والد
کامیت
4767cc7e52

+ 40 - 0
vrobbler/apps/podcasts/migrations/0015_remove_podcast_google_podcasts_url_podcast_dead_date_and_more.py

@@ -0,0 +1,40 @@
+# Generated by Django 4.2.19 on 2025-04-07 17:16
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("podcasts", "0014_alter_podcastepisode_run_time_seconds"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="podcast",
+            name="google_podcasts_url",
+        ),
+        migrations.AddField(
+            model_name="podcast",
+            name="dead_date",
+            field=models.DateField(blank=True, null=True),
+        ),
+        migrations.AddField(
+            model_name="podcast",
+            name="itunes_id",
+            field=models.TextField(blank=True, max_length=15, null=True),
+        ),
+        migrations.AddField(
+            model_name="podcast",
+            name="null",
+            field=models.CharField(
+                default="", max_length=150, verbose_name="blank"
+            ),
+            preserve_default=False,
+        ),
+        migrations.AddField(
+            model_name="podcast",
+            name="site_link",
+            field=models.URLField(blank=True, null=True),
+        ),
+    ]

+ 28 - 0
vrobbler/apps/podcasts/migrations/0016_podcast_genre.py

@@ -0,0 +1,28 @@
+# Generated by Django 4.2.19 on 2025-04-07 17:18
+
+from django.db import migrations
+import taggit.managers
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("scrobbles", "0068_scrobble_paper_alter_scrobble_media_type"),
+        (
+            "podcasts",
+            "0015_remove_podcast_google_podcasts_url_podcast_dead_date_and_more",
+        ),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="podcast",
+            name="genre",
+            field=taggit.managers.TaggableManager(
+                help_text="A comma-separated list of tags.",
+                through="scrobbles.ObjectWithGenres",
+                to="scrobbles.Genre",
+                verbose_name="Tags",
+            ),
+        ),
+    ]

+ 18 - 0
vrobbler/apps/podcasts/migrations/0017_podcast_podcastindex_id.py

@@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-04-07 17:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("podcasts", "0016_podcast_genre"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="podcast",
+            name="podcastindex_id",
+            field=models.CharField(blank=True, max_length=100, null=True),
+        ),
+    ]

+ 89 - 55
vrobbler/apps/podcasts/models.py

@@ -1,5 +1,4 @@
 import logging
-from typing import Dict, Optional
 from uuid import uuid4
 
 import requests
@@ -10,8 +9,16 @@ from django.db import models
 from django.urls import reverse
 from django.utils.translation import gettext_lazy as _
 from django_extensions.db.models import TimeStampedModel
-from podcasts.scrapers import scrape_data_from_google_podcasts
-from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin
+from scrobbles.mixins import (
+    ObjectWithGenres,
+    ScrobblableConstants,
+    ScrobblableMixin,
+)
+from taggit.managers import TaggableManager
+
+from podcasts.sources.podcastindex import (
+    lookup_podcast_from_podcastindex,
+)
 
 logger = logging.getLogger(__name__)
 BNULL = {"blank": True, "null": True}
@@ -24,6 +31,13 @@ class Producer(TimeStampedModel):
     def __str__(self):
         return f"{self.name}"
 
+    @classmethod
+    def find_or_create(cls, name):
+        producer = cls.objects.filter(name__iexact=name).first()
+        if not producer:
+            producer = cls.objects.create(name=name)
+        return producer
+
 
 class Podcast(TimeStampedModel):
     name = models.CharField(max_length=255)
@@ -31,11 +45,17 @@ class Podcast(TimeStampedModel):
     producer = models.ForeignKey(
         Producer, on_delete=models.DO_NOTHING, **BNULL
     )
+    podcastindex_id = models.CharField(max_length=100, **BNULL)
+    owner = models.CharField(max_length=150, *BNULL)
     description = models.TextField(**BNULL)
     active = models.BooleanField(default=True)
     feed_url = models.URLField(**BNULL)
-    google_podcasts_url = models.URLField(**BNULL)
+    site_link = models.URLField(**BNULL)
+    description = models.TextField(**BNULL)
     cover_image = models.ImageField(upload_to="podcasts/covers/", **BNULL)
+    itunes_id = models.TextField(max_length=15, **BNULL)
+    dead_date = models.DateField(**BNULL)
+    genre = TaggableManager(through=ObjectWithGenres)
 
     def __str__(self):
         return f"{self.name}"
@@ -49,32 +69,43 @@ class Podcast(TimeStampedModel):
             user=user, podcast_episode__podcast=self
         ).order_by("-timestamp")
 
-    def scrape_google_podcasts(self, force=False):
-        podcast_dict = {}
-        if not self.cover_image or force:
-            podcast_dict = scrape_data_from_google_podcasts(self.name)
-            if podcast_dict:
-                if not self.producer:
-                    self.producer, created = Producer.objects.get_or_create(
-                        name=podcast_dict["producer"]
-                    )
-                self.description = podcast_dict.get("description")
-                self.google_podcasts_url = podcast_dict.get("google_url")
-                self.save(
-                    update_fields=[
-                        "description",
-                        "producer",
-                        "google_podcasts_url",
-                    ]
-                )
-
-        cover_url = podcast_dict.get("image_url")
+    @property
+    def itunes_link(self) -> str:
+        if not self.itunes_id:
+            return ""
+        return f"https://podcasts.apple.com/us/podcast/id{self.itunes_id}"
+
+    def fix_metadata(self, force=False):
+        if self.podcastindex_id and not force:
+            logger.warning(
+                "Podcast already has PodcastIndex ID, use force=True to overwrite"
+            )
+            return
+
+        podcast_dict = lookup_podcast_from_podcastindex(self.name)
+
+        if not podcast_dict:
+            logger.info(
+                "No podcast data found from PodcastIndex. Are credentials setup?"
+            )
+            return
+
+        genres = podcast_dict.pop("genres")
+        if genres:
+            self.genre.add(*genres)
+
+        cover_url = podcast_dict.pop("image_url")
+
         if (not self.cover_image or force) and cover_url:
             r = requests.get(cover_url)
             if r.status_code == 200:
                 fname = f"{self.name}_{self.uuid}.jpg"
                 self.cover_image.save(fname, ContentFile(r.content), save=True)
 
+        for attr, value in podcast_dict.items():
+            setattr(self, attr, value)
+        self.save()
+
 
 class PodcastEpisode(ScrobblableMixin):
     COMPLETION_PERCENT = getattr(settings, "PODCAST_COMPLETION_PERCENT", 90)
@@ -108,42 +139,45 @@ class PodcastEpisode(ScrobblableMixin):
 
     @classmethod
     def find_or_create(
-        cls, podcast_dict: Dict, producer_dict: Dict, episode_dict: Dict
-    ) -> Optional["Episode"]:
+        cls,
+        title: str,
+        podcast_name: str,
+        pub_date: str,
+        number: int = 0,
+        mopidy_uri: str = "",
+        producer_name: str = "",
+        run_time_seconds: int = 1800,
+        enrich: bool = True,
+    ) -> "PodcastEpisode":
         """Given a data dict from Mopidy, finds or creates a podcast and
         producer before saving the epsiode so it can be scrobbled.
 
         """
-        if not podcast_dict.get("name"):
-            logger.warning(f"No name from source for podcast, not scrobbling")
-            return
-
         producer = None
-        if producer_dict.get("name"):
-            producer, producer_created = Producer.objects.get_or_create(
-                **producer_dict
+        if producer_name:
+            producer = Producer.find_or_create(producer_name)
+
+        podcast = Podcast.objects.filter(
+            name__iexact=podcast_name,
+        ).first()
+        if not podcast:
+            podcast = Podcast.objects.create(
+                name=podcast_name, producer=producer
+            )
+            if enrich:
+                podcast.fix_metadata()
+
+        episode = cls.objects.filter(
+            title__iexact=title, podcast=podcast
+        ).first()
+        if not episode:
+            episode = cls.objects.create(
+                title=title,
+                podcast=podcast,
+                run_time_seconds=run_time_seconds,
+                number=number,
+                pub_date=pub_date,
+                mopidy_uri=mopidy_uri,
             )
-            if producer_created:
-                logger.debug(f"Created new producer {producer}")
-            else:
-                logger.debug(f"Found producer {producer}")
-
-        if producer:
-            podcast_dict["producer_id"] = producer.id
-        podcast, podcast_created = Podcast.objects.get_or_create(
-            **podcast_dict
-        )
-        if podcast_created:
-            logger.debug(f"Created new podcast {podcast}")
-        else:
-            logger.debug(f"Found podcast {podcast}")
-
-        episode_dict["podcast_id"] = podcast.id
-
-        episode, created = cls.objects.get_or_create(**episode_dict)
-        if created:
-            logger.debug(f"Created new episode: {episode}")
-        else:
-            logger.debug(f"Found episode {episode}")
 
         return episode

+ 75 - 0
vrobbler/apps/podcasts/sources/podcastindex.py

@@ -0,0 +1,75 @@
+import hashlib
+import time
+
+import pytz
+import requests
+from django.conf import settings
+from django.utils import timezone
+from scrobbles.utils import timestamp_user_tz_to_utc
+
+PODCASTINDEX_API_KEY = getattr(settings, "PODCASTINDEX_API_KEY")
+PODCASTINDEX_API_SECRET = getattr(settings, "PODCASTINDEX_API_SECRET")
+
+
+def get_auth_headers():
+    now = int(time.time())
+    hash_data = hashlib.sha1(
+        (PODCASTINDEX_API_KEY + PODCASTINDEX_API_SECRET + str(now)).encode(
+            "utf-8"
+        )
+    ).hexdigest()
+
+    return {
+        "User-Agent": "MyPodcastApp/1.0",
+        "X-Auth-Date": str(now),
+        "X-Auth-Key": PODCASTINDEX_API_KEY,
+        "Authorization": hash_data,
+        "Content-Type": "application/json",
+    }
+
+
+def lookup_podcast_from_podcastindex(
+    podcast_name: str, dump_raw_response: bool = False
+) -> dict:
+    url = "https://api.podcastindex.org/api/1.0/search/byterm"
+    headers = get_auth_headers()
+    params = {"q": podcast_name}
+
+    response = requests.get(url, headers=headers, params=params)
+
+    if response.status_code == 200:
+        data = response.json()
+        if dump_raw_response:
+            return data.get("feeds")
+        if data.get("feeds"):
+            try:
+                top_feed_dict = data["feeds"][0]
+
+                newest_episode_date = timestamp_user_tz_to_utc(
+                    top_feed_dict.get("newestItemPubdate"), pytz.UTC
+                )
+                days_since_last_episode = ()
+                dead_date = None
+                if (timezone.now() - newest_episode_date).days > 180:
+                    dead_date = newest_episode_date
+
+                return {
+                    "podcastindex_id": top_feed_dict.get("id"),
+                    "title": top_feed_dict.get("title"),
+                    "site_link": top_feed_dict.get("link"),
+                    "description": top_feed_dict.get("description"),
+                    "owner": top_feed_dict.get("ownerName"),
+                    "image_url": top_feed_dict.get("artwork"),
+                    "feed_url": top_feed_dict.get("url"),
+                    "itunes_id": top_feed_dict.get("itunesId"),
+                    "genres": list(top_feed_dict.get("categories").values()),
+                    "dead_date": dead_date,
+                }
+            except IndexError:
+                return {}
+        else:
+            print("No podcasts found.")
+            return {}
+    else:
+        print("Failed to fetch data:", response.status_code, response.text)
+        return {}

+ 15 - 2
vrobbler/apps/scrobbles/scrobblers.py

@@ -15,7 +15,8 @@ from locations.models import GeoLocation
 from music.constants import JELLYFIN_POST_KEYS, MOPIDY_POST_KEYS
 from music.models import Track
 from music.utils import get_or_create_track
-from podcasts.utils import get_or_create_podcast
+from podcasts.models import PodcastEpisode
+from podcasts.utils import parse_mopidy_uri
 from scrobbles.constants import (
     JELLYFIN_AUDIO_ITEM_TYPES,
     MANUAL_SCROBBLE_FNS,
@@ -54,7 +55,19 @@ def mopidy_scrobble_media(post_data: dict, user_id: int) -> Scrobble:
     )
 
     if media_type == Scrobble.MediaType.PODCAST_EPISODE:
-        media_obj = get_or_create_podcast(post_data)
+        parsed_data = parse_mopidy_uri(post_data.get("mopidy_uri", ""))
+        podcast_name = post_data.get(
+            "album", parsed_data.get("podcast_name", "")
+        )
+
+        media_obj = PodcastEpisode.find_or_create(
+            title=parsed_data.get("episode_filename", ""),
+            podcast_name=podcast_name,
+            producer_name=post_data.get("artist", ""),
+            number=parsed_data.get("episode_num", ""),
+            pub_date=parsed_data.get("pub_date", ""),
+            mopidy_uri=post_data.get("mopidy_uri", ""),
+        )
     else:
         media_obj = Track.find_or_create(
             title=post_data.get("name", ""),

+ 2 - 0
vrobbler/settings.py

@@ -60,6 +60,8 @@ DUMP_REQUEST_DATA = (
 
 THESPORTSDB_API_KEY = os.getenv("VROBBLER_THESPORTSDB_API_KEY", "2")
 THEAUDIODB_API_KEY = os.getenv("VROBBLER_THEAUDIODB_API_KEY", "2")
+PODCASTINDEX_API_KEY = os.getenv("VROBBLER_PODCASTINDEX_API_KEY", "")
+PODCASTINDEX_API_SECRET = os.getenv("VROBBLER_PODCASTINDEX_API_SECRET", "")
 TMDB_API_KEY = os.getenv("VROBBLER_TMDB_API_KEY", "")
 LASTFM_API_KEY = os.getenv("VROBBLER_LASTFM_API_KEY")
 LASTFM_SECRET_KEY = os.getenv("VROBBLER_LASTFM_SECRET_KEY")