فهرست منبع

Add podcast field for google url

Colin Powell 2 سال پیش
والد
کامیت
70118e2e62

+ 23 - 0
vrobbler/apps/podcasts/migrations/0011_rename_url_podcast_feed_url_and_more.py

@@ -0,0 +1,23 @@
+# Generated by Django 4.1.5 on 2023-03-23 04:05
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("podcasts", "0010_remove_podcast_cover_podcast_cover_image_and_more"),
+    ]
+
+    operations = [
+        migrations.RenameField(
+            model_name="podcast",
+            old_name="url",
+            new_name="feed_url",
+        ),
+        migrations.AddField(
+            model_name="podcast",
+            name="google_podcasts_url",
+            field=models.URLField(blank=True, null=True),
+        ),
+    ]

+ 10 - 3
vrobbler/apps/podcasts/models.py

@@ -32,7 +32,8 @@ class Podcast(TimeStampedModel):
     )
     description = models.TextField(**BNULL)
     active = models.BooleanField(default=True)
-    url = models.URLField(**BNULL)
+    feed_url = models.URLField(**BNULL)
+    google_podcasts_url = models.URLField(**BNULL)
     cover_image = models.ImageField(upload_to="podcasts/covers/", **BNULL)
 
     def __str__(self):
@@ -54,8 +55,14 @@ class Podcast(TimeStampedModel):
                         name=podcast_dict["producer"]
                     )
                 self.description = podcast_dict.get("description")
-                self.url = podcast_dict.get("url")
-                self.save(update_fields=["description", "producer", "url"])
+                self.google_podcasts_url = podcast_dict.get("google_url")
+                self.save(
+                    update_fields=[
+                        "description",
+                        "producer",
+                        "google_podcasts_url",
+                    ]
+                )
 
         cover_url = podcast_dict.get("image_url")
         if (not self.cover_image or force) and cover_url:

+ 22 - 16
vrobbler/apps/podcasts/scrapers.py

@@ -9,55 +9,61 @@ logger = logging.getLogger(__name__)
 PODCAST_SEARCH_URL = "https://podcasts.google.com/search/{query}"
 
 
-def strip_and_clean(text):
+def _strip_and_clean(text):
     return text.replace("\n", " ").rstrip().lstrip()
 
 
-def get_title_from_soup(soup) -> Optional[int]:
+def _build_google_url(url):
+    return url.replace("./", "https://podcasts.google.com/")
+
+    return
+
+
+def _get_title_from_soup(soup) -> Optional[int]:
     title = None
     try:
         potential_title = soup.find("div", class_="FyxyKd")
         if potential_title:
-            title = strip_and_clean(potential_title.get_text())
+            title = _strip_and_clean(potential_title.get_text())
     except ValueError:
         pass
     return title
 
 
-def get_url_from_soup(soup) -> Optional[int]:
+def _get_url_from_soup(soup) -> Optional[int]:
     url = None
     try:
-        url_tag = soup.find("div", class_="AZqljb JSLBqe")
+        url_tag = soup.find("div", class_="yXo2Qc")
         if url_tag:
-            url = url_tag.get("data-feed")
+            url = _build_google_url(url_tag.get("href"))
     except ValueError:
         pass
     return url
 
 
-def get_producer_from_soup(soup) -> str:
+def _get_producer_from_soup(soup) -> str:
     pub = ""
     try:
         potential_pub = soup.find("div", class_="J3Ov7d")
         if potential_pub:
-            pub = strip_and_clean(potential_pub.get_text())
+            pub = _strip_and_clean(potential_pub.get_text())
     except ValueError:
         pass
     return pub
 
 
-def get_description_from_soup(soup) -> str:
+def _get_description_from_soup(soup) -> str:
     desc = ""
     try:
         potential_desc = soup.find("div", class_="yuTZxb")
         if potential_desc:
-            desc = strip_and_clean(potential_desc.get_text())
+            desc = _strip_and_clean(potential_desc.get_text())
     except ValueError:
         pass
     return desc
 
 
-def get_img_url_from_soup(soup) -> str:
+def _get_img_url_from_soup(soup) -> str:
     url = ""
     try:
         img_tag = soup.find("img", class_="BhVIWc")
@@ -77,9 +83,9 @@ def scrape_data_from_google_podcasts(title) -> dict:
     r = requests.get(url, headers=headers)
     if r.status_code == 200:
         soup = BeautifulSoup(r.text, "html")
-        data_dict["title"] = get_title_from_soup(soup)
-        data_dict["description"] = get_description_from_soup(soup)
-        data_dict["producer"] = get_producer_from_soup(soup)
-        data_dict["url"] = get_url_from_soup(soup)
-        data_dict["image_url"] = get_img_url_from_soup(soup)
+        data_dict["title"] = _get_title_from_soup(soup)
+        data_dict["description"] = _get_description_from_soup(soup)
+        data_dict["producer"] = _get_producer_from_soup(soup)
+        data_dict["google_url"] = _get_url_from_soup(soup)
+        data_dict["image_url"] = _get_img_url_from_soup(soup)
     return data_dict