Browse Source

[videos] Add skatevideo lookup properly

Colin Powell 8 months ago
parent
commit
0a8acdf33f
3 changed files with 135 additions and 52 deletions
  1. 1 0
      vrobbler/apps/videos/models.py
  2. 128 0
      vrobbler/apps/videos/skatevideosite.py
  3. 6 52
      vrobbler/apps/videos/utils.py

+ 1 - 0
vrobbler/apps/videos/models.py

@@ -123,6 +123,7 @@ class Video(ScrobblableMixin):
         UNKNOWN = "U", _("Unknown")
         TV_EPISODE = "E", _("TV Episode")
         MOVIE = "M", _("Movie")
+        SKATE_VIDEO = "S", _("Skate Video")
 
     video_type = models.CharField(
         max_length=1,

+ 128 - 0
vrobbler/apps/videos/skatevideosite.py

@@ -0,0 +1,128 @@
+from enum import Enum
+from typing import Optional
+from bs4 import BeautifulSoup
+import requests
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+USER_AGENT = (
+    "Mozilla/5.0 (Android 4.4; Mobile; rv:41.0) Gecko/41.0 Firefox/41.0"
+)
+SKATEVIDEOSITE_URL = "https://www.skatevideosite.com"
+SKATEVIDEOSITE_SEARCH_URL = SKATEVIDEOSITE_URL + "/search/?q={title}"
+
+
+class AmazonAttribute(Enum):
+    SERIES = 0
+    PAGES = 1
+    LANGUAGE = 2
+    PUBLISHER = 3
+    PUB_DATE = 4
+    DIMENSIONS = 5
+    ISBN_10 = 6
+    ISBN_13 = 7
+
+
+def strip_and_clean(text):
+    return text.strip("\n").rstrip().lstrip()
+
+
+def get_rating_from_soup(soup) -> Optional[int]:
+    rating = None
+    try:
+        potential_rating = soup.find("div", class_="allmusic-rating")
+        if potential_rating:
+            rating = int(strip_and_clean(potential_rating.get_text()))
+    except ValueError:
+        pass
+    return rating
+
+
+def get_review_from_soup(soup) -> str:
+    review = ""
+    try:
+        potential_text = soup.find("div", class_="text")
+        if potential_text:
+            review = strip_and_clean(potential_text.get_text())
+    except ValueError:
+        pass
+    return review
+
+
+def scrape_data_from_amazon(url) -> dict:
+    data_dict = {}
+    headers = {"User-Agent": USER_AGENT}
+    r = requests.get(url, headers=headers)
+    if r.status_code == 200:
+        soup = BeautifulSoup(r.text, "html.parser")
+        import pdb
+
+        pdb.set_trace()
+        data_dict["rating"] = get_rating_from_soup(soup)
+        data_dict["review"] = get_review_from_soup(soup)
+    return data_dict
+
+
+def lookup_video_from_skatevideosite(title: str) -> Optional[dict]:
+    video_metadata = None
+
+    search_url = SKATEVIDEOSITE_SEARCH_URL.format(title=title)
+    headers = {
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+        "accept-language": "en-GB,en;q=0.9",
+    }
+
+    response = requests.get(search_url, headers=headers)
+
+    if response.status_code != 200:
+        logger.info(f"Bad http response from SkateVideoSite {response}")
+        return video_metadata
+
+    soup = BeautifulSoup(response.text, "html.parser")
+    detail_url = ""
+    try:
+        detail_url = SKATEVIDEOSITE_URL + soup.findAll("a")[12]["href"]
+    except IndexError:
+        pass
+
+    detail_response = requests.get(detail_url, headers=headers)
+    detail_soup = BeautifulSoup(detail_response.text, "html.parser")
+
+    try:
+        result = soup.find("div", class_="card-body").find("a")
+    except:
+        result = None
+
+    if not result:
+        logger.info(
+            f"No search results found on skatevideosite",
+            extra={"title": title},
+        )
+        return video_metadata
+
+    year = (
+        detail_soup.find("span", class_="whitespace-normal")
+        .contents[0]
+        .replace("(", "")
+        .replace(")", "")
+    )
+    run_time_seconds = (
+        int(
+            detail_soup.find("div", class_="p-1")
+            .contents[-1]
+            .contents[0]
+            .strip("(")
+            .strip("min )")
+        )
+        * 60
+    )
+
+    return {
+        "title": str(result.find("img").get("alt").replace(" cover", "")),
+        "video_type": "S",
+        "year": year,
+        "run_time_seconds": run_time_seconds,
+        "cover_url": str(result.find("img").get("src")),
+    }

+ 6 - 52
vrobbler/apps/videos/utils.py

@@ -1,8 +1,9 @@
 import logging
 
+from scrobbles.utils import convert_to_seconds
 from videos.imdb import lookup_video_from_imdb
 from videos.models import Series, Video
-from scrobbles.utils import convert_to_seconds
+from videos.skatevideosite import lookup_video_from_skatevideosite
 
 logger = logging.getLogger(__name__)
 
@@ -12,10 +13,10 @@ def get_or_create_video(data_dict: dict, post_keys: dict, force_update=False):
         post_keys.get("VIDEO_TITLE"), ""
     )
     imdb_metadata = lookup_video_from_imdb(name_or_id)
-    # skatevideosite_metadata = lookup_video_from_skatevideosite(name_or_id)
+    skatevideosite_metadata = lookup_video_from_skatevideosite(name_or_id)
     # youtube_metadata = lookup_vide_from_youtube(name_or_id)
 
-    video_dict = imdb_metadata
+    video_dict = skatevideosite_metadata or imdb_metadata
     # video_metadata = imdb_metadata or skatevideosite_metadata or youtube_metadata
     if not video_dict:
         logger.info(
@@ -42,8 +43,8 @@ def get_or_create_video(data_dict: dict, post_keys: dict, force_update=False):
                 post_keys.get("TMDB_ID"), None
             )
 
-        series = None
-        if video_dict.get("series_name"):
+        series_name = video_dict.pop("series_name", None)
+        if series_name:
 
             series_name = video_dict.pop("series_name")
             series, series_created = Series.objects.get_or_create(
@@ -66,50 +67,3 @@ def get_or_create_video(data_dict: dict, post_keys: dict, force_update=False):
 
 def get_or_create_video_from_skatevideosite(title: str, force_update=True):
     ...
-
-
-def get_or_create_video_from_jellyfin(jellyfin_data: dict, force_update=True):
-    """Given a Jellyfin webhook payload as a dictionary, lookup the video or
-    create a new one.
-
-    """
-    video, video_created = Video.objects.get_or_create(
-        imdb_id=jellyfin_data.get("Provider_imdb", "").replace("tt", ""),
-        title=jellyfin_data.get("Name"),
-    )
-
-    if video_created:
-        video_type = Video.VideoType.MOVIE
-        series = None
-        if jellyfin_data.get("ItemType", "") == "Episode":
-            series_name = jellyfin_data.get("SeriesName", "")
-            series, series_created = Series.objects.get_or_create(
-                name=series_name
-            )
-            if series_created:
-                series.fix_metadata()
-            video_type = Video.VideoType.TV_EPISODE
-
-        video_dict = {
-            "video_type": video_type,
-            "year": jellyfin_data.get("Year", ""),
-            "overview": jellyfin_data.get("Overview", None),
-            "tagline": jellyfin_data.get("Tagline", None),
-            "run_time_seconds": convert_to_seconds(
-                jellyfin_data.get("RunTime", 0)
-            ),
-            "tvdb_id": jellyfin_data.get("Provider_tvdb", None),
-            "tvrage_id": jellyfin_data.get("Provider_tvrage", None),
-            "episode_number": jellyfin_data.get("EpisodeNumber", None),
-            "season_number": jellyfin_data.get("SeasonNumber", None),
-        }
-
-        if series:
-            video_dict["tv_series_id"] = series.id
-
-        Video.objects.filter(pk=video.id).update(**video_dict)
-        video.refresh_from_db()
-
-        video.fix_metadata()
-
-    return video