浏览代码

Order locations by scrobble timestmap

Colin Powell 1 年之前
父节点
当前提交
7564292f5b
共有 3 个文件被更改,包括 147 次插入3 次删除
  1. 144 0
      vrobbler/apps/books/amazon.py
  2. 1 1
      vrobbler/apps/locations/views.py
  3. 2 2
      vrobbler/templates/locations/geolocation_list.html

+ 144 - 0
vrobbler/apps/books/amazon.py

@@ -0,0 +1,144 @@
+from enum import Enum
+from typing import Optional
+from bs4 import BeautifulSoup
+import requests
+import logging
+
+logger = logging.getLogger(__name__)
+
+USER_AGENT = (
+    "Mozilla/5.0 (Android 4.4; Mobile; rv:41.0) Gecko/41.0 Firefox/41.0"
+)
+AMAZON_SEARCH_URL = "https://www.amazon.com/s?k={amazon_id}"
+
+
+class AmazonAttribute(Enum):
+    SERIES = 0
+    PAGES = 1
+    LANGUAGE = 2
+    PUBLISHER = 3
+    PUB_DATE = 4
+    DIMENSIONS = 5
+    ISBN_10 = 6
+    ISBN_13 = 7
+
+
+def strip_and_clean(text):
+    return text.strip("\n").rstrip().lstrip()
+
+
+def get_rating_from_soup(soup) -> Optional[int]:
+    rating = None
+    try:
+        potential_rating = soup.find("div", class_="allmusic-rating")
+        if potential_rating:
+            rating = int(strip_and_clean(potential_rating.get_text()))
+    except ValueError:
+        pass
+    return rating
+
+
+def get_review_from_soup(soup) -> str:
+    review = ""
+    try:
+        potential_text = soup.find("div", class_="text")
+        if potential_text:
+            review = strip_and_clean(potential_text.get_text())
+    except ValueError:
+        pass
+    return review
+
+
+def scrape_data_from_amazon(url) -> dict:
+    data_dict = {}
+    headers = {"User-Agent": USER_AGENT}
+    r = requests.get(url, headers=headers)
+    if r.status_code == 200:
+        soup = BeautifulSoup(r.text, "html.parser")
+        import pdb
+
+        pdb.set_trace()
+        data_dict["rating"] = get_rating_from_soup(soup)
+        data_dict["review"] = get_review_from_soup(soup)
+    return data_dict
+
+
+def get_amazon_product_dict(amazon_id: str) -> dict:
+    data_dict = {}
+    url = ""
+
+    search_url = AMAZON_SEARCH_URL.format(amazon_id=amazon_id)
+    headers = {
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+        "accept-language": "en-GB,en;q=0.9",
+    }
+
+    response = requests.get(search_url, headers=headers)
+
+    if response.status_code != 200:
+        logger.info(f"Bad http response from Amazon {response}")
+        return data_dict
+
+    soup = BeautifulSoup(response.text, "html.parser")
+    results = soup.find("a", class_="a-link-normal")
+
+    if not results:
+        logger.info(f"No search results for {amazon_id}")
+        return data_dict
+
+    product_url = "https://www.amazon.com" + str(results.get("href", ""))
+
+    data_dict = {}
+    response = requests.get(product_url, headers=headers)
+
+    if response.status_code != 200:
+        logger.info(f"Bad http response from Amazon {response}")
+        return data_dict
+
+    soup = BeautifulSoup(response.text, "html.parser")
+    try:
+        data_dict["title"] = soup.findAll("span", class_="celwidget")[
+            1
+        ].text.strip()
+        data_dict["cover_url"] = soup.find("img", class_="frontImage").get(
+            "src"
+        )
+        data_dict["summary"] = soup.findAll(
+            "div", class_="a-expander-content"
+        )[1].text
+        meta = soup.findAll("div", class_="rpi-attribute-value")
+        data_dict["isbn"] = meta[AmazonAttribute.ISBN_10.value].text.strip()
+        pages = meta[AmazonAttribute.PAGES.value].text
+        if "pages" in pages:
+            data_dict["pages"] = (
+                meta[AmazonAttribute.PAGES.value]
+                .text.split("pages")[0]
+                .strip()
+            )
+    except IndexError as e:
+        logger.error(
+            f"Amazon lookup is failing for this product {amazon_id}: {e}"
+        )
+    except AttributeError as e:
+        logger.error(
+            f"Amazon lookup is failing for this product {amazon_id}: {e}"
+        )
+
+    return data_dict
+
+
+def lookup_book_from_amazon(amazon_id: str) -> dict:
+    top = {}
+
+    return {
+        "title": top.get("title"),
+        "isbn": isbn,
+        "openlibrary_id": ol_id,
+        "goodreads_id": get_first("id_goodreads", top),
+        "first_publish_year": top.get("first_publish_year"),
+        "first_sentence": first_sentence,
+        "pages": top.get("number_of_pages_median", None),
+        "cover_url": COVER_URL.format(id=ol_id),
+        "ol_author_id": ol_author_id,
+        "subject_key_list": top.get("subject_key", []),
+    }

+ 1 - 1
vrobbler/apps/locations/views.py

@@ -9,7 +9,7 @@ class GeoLocationListView(generic.ListView):
     paginate_by = 75
 
     def get_queryset(self):
-        return super().get_queryset().filter(scrobble__user_id=self.request.user.id).order_by("-created")
+        return super().get_queryset().filter(scrobble__user_id=self.request.user.id).order_by("-scrobble__timestamp")
 
     def get_context_data(self, **kwargs):
         context_data = super().get_context_data(**kwargs)

+ 2 - 2
vrobbler/templates/locations/geolocation_list.html

@@ -20,9 +20,9 @@
      crossorigin=""></script>
 
  <script>
-   var map = L.map('map').setView([{{latest.lat}}, {{latest.lon}}], 13);
+   var map = L.map('map').setView([{{latest.lat}}, {{latest.lon}}], 17);
     L.tileLayer('https://tile.openstreetmap.org/{z}/{x}/{y}.png', {
-        maxZoom: 19,
+        maxZoom: 20,
         attribution: '&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>'
     }).addTo(map);
    var marker = L.marker([{{latest.lat}}, {{latest.lon}}]).addTo(map);