Bläddra i källkod

Add url to scraped data for podcasts

Colin Powell 2 år sedan
förälder
incheckning
98f9c4bc04
2 ändrade filer med 22 tillägg och 3 borttagningar
  1. 10 3
      vrobbler/apps/podcasts/models.py
  2. 12 0
      vrobbler/apps/podcasts/scrapers.py

+ 10 - 3
vrobbler/apps/podcasts/models.py

@@ -3,15 +3,15 @@ from typing import Dict, Optional
 from uuid import uuid4
 
 import requests
+from django.apps import apps
 from django.conf import settings
 from django.core.files.base import ContentFile
 from django.db import models
 from django.utils.translation import gettext_lazy as _
 from django_extensions.db.models import TimeStampedModel
+from podcasts.scrapers import scrape_data_from_google_podcasts
 from scrobbles.mixins import ScrobblableMixin
 
-from vrobbler.apps.podcasts.scrapers import scrape_data_from_google_podcasts
-
 logger = logging.getLogger(__name__)
 BNULL = {"blank": True, "null": True}
 
@@ -38,6 +38,12 @@ class Podcast(TimeStampedModel):
     def __str__(self):
         return f"{self.name}"
 
+    def scrobbles(self):
+        Scrobble = apps.get_model("scrobbles", "Scrobble")
+        return Scrobble.objects.filter(podcast_episode__podcast=self).order_by(
+            "-timestamp"
+        )
+
     def scrape_google_podcasts(self, force=False):
         podcast_dict = {}
         if not self.cover_image or force:
@@ -48,7 +54,8 @@ class Podcast(TimeStampedModel):
                         name=podcast_dict["producer"]
                     )
                 self.description = podcast_dict.get("description")
-                self.save(update_fields=["description", "producer"])
+                self.url = podcast_dict.get("url")
+                self.save(update_fields=["description", "producer", "url"])
 
         cover_url = podcast_dict.get("image_url")
         if (not self.cover_image or force) and cover_url:

+ 12 - 0
vrobbler/apps/podcasts/scrapers.py

@@ -24,6 +24,17 @@ def get_title_from_soup(soup) -> Optional[int]:
     return title
 
 
+def get_url_from_soup(soup) -> Optional[int]:
+    url = None
+    try:
+        url_tag = soup.find("div", class_="AZqljb JSLBqe")
+        if url_tag:
+            url = url_tag["data-feed"]
+    except ValueError:
+        pass
+    return url
+
+
 def get_publisher_from_soup(soup) -> str:
     pub = ""
     try:
@@ -69,5 +80,6 @@ def scrape_data_from_google_podcasts(title) -> dict:
         data_dict["title"] = get_title_from_soup(soup)
         data_dict["description"] = get_description_from_soup(soup)
         data_dict["publisher"] = get_publisher_from_soup(soup)
+        data_dict["url"] = get_url_from_soup(soup)
         data_dict["image_url"] = get_img_url_from_soup(soup)
     return data_dict