Browse Source

Add url to scraped data for podcasts

Colin Powell 2 years ago
parent
commit
98f9c4bc04
2 changed files with 22 additions and 3 deletions
  1. 10 3
      vrobbler/apps/podcasts/models.py
  2. 12 0
      vrobbler/apps/podcasts/scrapers.py

+ 10 - 3
vrobbler/apps/podcasts/models.py

@@ -3,15 +3,15 @@ from typing import Dict, Optional
 from uuid import uuid4
 from uuid import uuid4
 
 
 import requests
 import requests
+from django.apps import apps
 from django.conf import settings
 from django.conf import settings
 from django.core.files.base import ContentFile
 from django.core.files.base import ContentFile
 from django.db import models
 from django.db import models
 from django.utils.translation import gettext_lazy as _
 from django.utils.translation import gettext_lazy as _
 from django_extensions.db.models import TimeStampedModel
 from django_extensions.db.models import TimeStampedModel
+from podcasts.scrapers import scrape_data_from_google_podcasts
 from scrobbles.mixins import ScrobblableMixin
 from scrobbles.mixins import ScrobblableMixin
 
 
-from vrobbler.apps.podcasts.scrapers import scrape_data_from_google_podcasts
-
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 BNULL = {"blank": True, "null": True}
 BNULL = {"blank": True, "null": True}
 
 
@@ -38,6 +38,12 @@ class Podcast(TimeStampedModel):
     def __str__(self):
     def __str__(self):
         return f"{self.name}"
         return f"{self.name}"
 
 
+    def scrobbles(self):
+        Scrobble = apps.get_model("scrobbles", "Scrobble")
+        return Scrobble.objects.filter(podcast_episode__podcast=self).order_by(
+            "-timestamp"
+        )
+
     def scrape_google_podcasts(self, force=False):
     def scrape_google_podcasts(self, force=False):
         podcast_dict = {}
         podcast_dict = {}
         if not self.cover_image or force:
         if not self.cover_image or force:
@@ -48,7 +54,8 @@ class Podcast(TimeStampedModel):
                         name=podcast_dict["producer"]
                         name=podcast_dict["producer"]
                     )
                     )
                 self.description = podcast_dict.get("description")
                 self.description = podcast_dict.get("description")
-                self.save(update_fields=["description", "producer"])
+                self.url = podcast_dict.get("url")
+                self.save(update_fields=["description", "producer", "url"])
 
 
         cover_url = podcast_dict.get("image_url")
         cover_url = podcast_dict.get("image_url")
         if (not self.cover_image or force) and cover_url:
         if (not self.cover_image or force) and cover_url:

+ 12 - 0
vrobbler/apps/podcasts/scrapers.py

@@ -24,6 +24,17 @@ def get_title_from_soup(soup) -> Optional[int]:
     return title
     return title
 
 
 
 
+def get_url_from_soup(soup) -> Optional[int]:
+    url = None
+    try:
+        url_tag = soup.find("div", class_="AZqljb JSLBqe")
+        if url_tag:
+            url = url_tag["data-feed"]
+    except ValueError:
+        pass
+    return url
+
+
 def get_publisher_from_soup(soup) -> str:
 def get_publisher_from_soup(soup) -> str:
     pub = ""
     pub = ""
     try:
     try:
@@ -69,5 +80,6 @@ def scrape_data_from_google_podcasts(title) -> dict:
         data_dict["title"] = get_title_from_soup(soup)
         data_dict["title"] = get_title_from_soup(soup)
         data_dict["description"] = get_description_from_soup(soup)
         data_dict["description"] = get_description_from_soup(soup)
         data_dict["publisher"] = get_publisher_from_soup(soup)
         data_dict["publisher"] = get_publisher_from_soup(soup)
+        data_dict["url"] = get_url_from_soup(soup)
         data_dict["image_url"] = get_img_url_from_soup(soup)
         data_dict["image_url"] = get_img_url_from_soup(soup)
     return data_dict
     return data_dict