瀏覽代碼

Add url to scraped data for podcasts

Colin Powell 2 年之前
父節點
當前提交
98f9c4bc04
共有 2 個文件被更改,包括 22 次插入3 次删除
  1. 10 3
      vrobbler/apps/podcasts/models.py
  2. 12 0
      vrobbler/apps/podcasts/scrapers.py

+ 10 - 3
vrobbler/apps/podcasts/models.py

@@ -3,15 +3,15 @@ from typing import Dict, Optional
 from uuid import uuid4
 from uuid import uuid4
 
 
 import requests
 import requests
+from django.apps import apps
 from django.conf import settings
 from django.conf import settings
 from django.core.files.base import ContentFile
 from django.core.files.base import ContentFile
 from django.db import models
 from django.db import models
 from django.utils.translation import gettext_lazy as _
 from django.utils.translation import gettext_lazy as _
 from django_extensions.db.models import TimeStampedModel
 from django_extensions.db.models import TimeStampedModel
+from podcasts.scrapers import scrape_data_from_google_podcasts
 from scrobbles.mixins import ScrobblableMixin
 from scrobbles.mixins import ScrobblableMixin
 
 
-from vrobbler.apps.podcasts.scrapers import scrape_data_from_google_podcasts
-
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 BNULL = {"blank": True, "null": True}
 BNULL = {"blank": True, "null": True}
 
 
@@ -38,6 +38,12 @@ class Podcast(TimeStampedModel):
     def __str__(self):
     def __str__(self):
         return f"{self.name}"
         return f"{self.name}"
 
 
+    def scrobbles(self):
+        Scrobble = apps.get_model("scrobbles", "Scrobble")
+        return Scrobble.objects.filter(podcast_episode__podcast=self).order_by(
+            "-timestamp"
+        )
+
     def scrape_google_podcasts(self, force=False):
     def scrape_google_podcasts(self, force=False):
         podcast_dict = {}
         podcast_dict = {}
         if not self.cover_image or force:
         if not self.cover_image or force:
@@ -48,7 +54,8 @@ class Podcast(TimeStampedModel):
                         name=podcast_dict["producer"]
                         name=podcast_dict["producer"]
                     )
                     )
                 self.description = podcast_dict.get("description")
                 self.description = podcast_dict.get("description")
-                self.save(update_fields=["description", "producer"])
+                self.url = podcast_dict.get("url")
+                self.save(update_fields=["description", "producer", "url"])
 
 
         cover_url = podcast_dict.get("image_url")
         cover_url = podcast_dict.get("image_url")
         if (not self.cover_image or force) and cover_url:
         if (not self.cover_image or force) and cover_url:

+ 12 - 0
vrobbler/apps/podcasts/scrapers.py

@@ -24,6 +24,17 @@ def get_title_from_soup(soup) -> Optional[int]:
     return title
     return title
 
 
 
 
+def get_url_from_soup(soup) -> Optional[int]:
+    url = None
+    try:
+        url_tag = soup.find("div", class_="AZqljb JSLBqe")
+        if url_tag:
+            url = url_tag["data-feed"]
+    except ValueError:
+        pass
+    return url
+
+
 def get_publisher_from_soup(soup) -> str:
 def get_publisher_from_soup(soup) -> str:
     pub = ""
     pub = ""
     try:
     try:
@@ -69,5 +80,6 @@ def scrape_data_from_google_podcasts(title) -> dict:
         data_dict["title"] = get_title_from_soup(soup)
         data_dict["title"] = get_title_from_soup(soup)
         data_dict["description"] = get_description_from_soup(soup)
         data_dict["description"] = get_description_from_soup(soup)
         data_dict["publisher"] = get_publisher_from_soup(soup)
         data_dict["publisher"] = get_publisher_from_soup(soup)
+        data_dict["url"] = get_url_from_soup(soup)
         data_dict["image_url"] = get_img_url_from_soup(soup)
         data_dict["image_url"] = get_img_url_from_soup(soup)
     return data_dict
     return data_dict