Ver Fonte

Add book lookup via locg

Colin Powell há 1 ano atrás
pai
commit
f1b1989424

+ 124 - 0
vrobbler/apps/books/locg.py

@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+
+from enum import Enum
+from typing import Optional
+from bs4 import BeautifulSoup
+import requests
+import logging
+
+logger = logging.getLogger(__name__)
+
+HEADERS = {
+    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+    "accept-language": "en-GB,en;q=0.9",
+}
+LOCG_WRTIER_URL = ""
+LOCG_WRITER_DETAIL_URL = "https://leagueofcomicgeeks.com/people/{slug}"
+LOCG_SEARCH_URL = (
+    "https://leagueofcomicgeeks.com/search/ajax_issues?query={query}"
+)
+LOCG_DETAIL_URL = "https://leagueofcomicgeeks.com/comic/{locg_slug}"
+
+
+def strip_and_clean(text):
+    return text.strip("\n").strip()
+
+
+def get_rating_from_soup(soup) -> Optional[int]:
+    rating = None
+    try:
+        potential_rating = soup.find("div", class_="allmusic-rating")
+        if potential_rating:
+            rating = int(strip_and_clean(potential_rating.get_text()))
+    except ValueError:
+        pass
+    return rating
+
+
+def lookup_comic_writer_by_locg_slug(slug: str) -> dict:
+    data_dict = {}
+    writer_url = LOCG_WRITER_DETAIL_URL.format(slug=slug)
+
+    response = requests.get(writer_url, headers=HEADERS)
+
+    if response.status_code != 200:
+        logger.info(f"Bad http response from LOCG {response}")
+        return data_dict
+
+    soup = BeautifulSoup(response.text, "html.parser")
+    data_dict["locg_slug"] = slug
+    data_dict["name"] = soup.find("h1").text.strip()
+    data_dict["photo_url"] = soup.find("div", class_="avatar").img.get("src")
+
+    return data_dict
+
+
+def lookup_comic_by_locg_slug(slug: str) -> dict:
+    data_dict = {}
+    product_url = LOCG_DETAIL_URL.format(locg_slug=slug)
+
+    response = requests.get(product_url, headers=HEADERS)
+
+    if response.status_code != 200:
+        logger.info(f"Bad http response from LOCG {response}")
+        return data_dict
+
+    soup = BeautifulSoup(response.text, "html.parser")
+    try:
+        data_dict["title"] = soup.find("h1").text.strip()
+        data_dict["summary"] = soup.find("p").text.strip()
+        data_dict["cover_url"] = (
+            soup.find("div", class_="cover-art").find("img").get("src")
+        )
+        attrs = soup.findAll("div", class_="details-addtl-block")
+        try:
+            data_dict["pages"] = (
+                attrs[1]
+                .find("div", class_="value")
+                .text.split("pages")[0]
+                .strip()
+            )
+        except IndexError:
+            logger.warn(f"No ISBN field")
+        try:
+            data_dict["isbn"] = (
+                attrs[3].find("div", class_="value").text.strip()
+            )
+        except IndexError:
+            logger.warn(f"No ISBN field")
+
+        writer_slug = None
+        try:
+            writer_slug = (
+                soup.findAll("div", class_="name")[5]
+                .a.get("href")
+                .split("people/")[1]
+            )
+        except IndexError:
+            logger.warn(f"No wrtier found")
+        if writer_slug:
+            data_dict["locg_writer_slug"] = writer_slug
+
+    except AttributeError:
+        logger.warn(f"Trouble parsing HTML, elements missing")
+
+    return data_dict
+
+
+def lookup_comic_from_locg(title: str) -> dict:
+    search_url = LOCG_SEARCH_URL.format(query=title)
+    response = requests.get(search_url, headers=HEADERS)
+
+    if response.status_code != 200:
+        logger.warn(f"Bad http response from LOCG {response}")
+        return {}
+
+    soup = BeautifulSoup(response.text, "html.parser")
+
+    try:
+        slug = soup.findAll("a")[1].get("href").split("comic/")[1]
+    except IndexError:
+        logger.warn(f"No comic found on LOCG for {title}")
+        return {}
+
+    return lookup_comic_by_locg_slug(slug)

+ 23 - 0
vrobbler/apps/books/migrations/0016_book_locg_slug_book_summary.py

@@ -0,0 +1,23 @@
+# Generated by Django 4.1.7 on 2023-08-26 04:48
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0015_alter_book_first_sentence_alter_book_genre"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="book",
+            name="locg_slug",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="book",
+            name="summary",
+            field=models.TextField(blank=True, null=True),
+        ),
+    ]

+ 20 - 0
vrobbler/apps/books/migrations/0017_alter_book_authors.py

@@ -0,0 +1,20 @@
+# Generated by Django 4.1.7 on 2023-08-26 16:39
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0016_book_locg_slug_book_summary"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="book",
+            name="authors",
+            field=models.ManyToManyField(
+                blank=True, null=True, to="books.author"
+            ),
+        ),
+    ]

+ 18 - 0
vrobbler/apps/books/migrations/0018_author_locg_slug.py

@@ -0,0 +1,18 @@
+# Generated by Django 4.1.7 on 2023-08-31 03:57
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0017_alter_book_authors"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="author",
+            name="locg_slug",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]

+ 57 - 14
vrobbler/apps/books/models.py

@@ -21,6 +21,12 @@ from scrobbles.mixins import (
 from scrobbles.utils import get_scrobbles_for_media
 from taggit.managers import TaggableManager
 
+from vrobbler.apps.books.locg import (
+    lookup_comic_by_locg_slug,
+    lookup_comic_from_locg,
+    lookup_comic_writer_by_locg_slug,
+)
+
 logger = logging.getLogger(__name__)
 User = get_user_model()
 BNULL = {"blank": True, "null": True}
@@ -34,6 +40,7 @@ class Author(TimeStampedModel):
     bio = models.TextField(**BNULL)
     wikipedia_url = models.CharField(max_length=255, **BNULL)
     isni = models.CharField(max_length=255, **BNULL)
+    locg_slug = models.CharField(max_length=255, **BNULL)
     wikidata_id = models.CharField(max_length=255, **BNULL)
     goodreads_id = models.CharField(max_length=255, **BNULL)
     librarything_id = models.CharField(max_length=255, **BNULL)
@@ -68,7 +75,7 @@ class Book(LongPlayScrobblableMixin):
     )
 
     title = models.CharField(max_length=255)
-    authors = models.ManyToManyField(Author)
+    authors = models.ManyToManyField(Author, blank=True)
     goodreads_id = models.CharField(max_length=255, **BNULL)
     koreader_id = models.IntegerField(**BNULL)
     koreader_authors = models.CharField(max_length=255, **BNULL)
@@ -79,12 +86,14 @@ class Book(LongPlayScrobblableMixin):
     first_publish_year = models.IntegerField(**BNULL)
     first_sentence = models.TextField(**BNULL)
     openlibrary_id = models.CharField(max_length=255, **BNULL)
+    locg_slug = models.CharField(max_length=255, **BNULL)
     cover = models.ImageField(upload_to="books/covers/", **BNULL)
+    summary = models.TextField(**BNULL)
 
     genre = TaggableManager(through=ObjectWithGenres)
 
     def __str__(self):
-        return f"{self.title} by {self.author}"
+        return f"{self.title}"
 
     @property
     def subtitle(self):
@@ -104,28 +113,39 @@ class Book(LongPlayScrobblableMixin):
         return reverse("books:book_detail", kwargs={"slug": self.uuid})
 
     def fix_metadata(self, data: dict = {}, force_update=False):
-        if not self.openlibrary_id or force_update:
+        if (not self.openlibrary_id or not self.locg_slug) or force_update:
             author_name = ""
             if self.author:
                 author_name = self.author.name
 
             if not data:
-                if self.openlibrary_id:
-                    data = lookup_book_from_openlibrary(
-                        str(self.openlibrary_id)
-                    )
+                if self.locg_slug:
+                    data = lookup_comic_by_locg_slug(str(self.locg_slug))
                 else:
-                    data = lookup_book_from_openlibrary(
-                        str(self.title), author_name
-                    )
+                    data = lookup_comic_from_locg(str(self.title))
 
-            if not data:
-                logger.warn(f"Book not found in OL {self.title}")
-                return
+                if not data:
+                    logger.warn(
+                        f"Book not found on LOCG, checking OL {self.title}"
+                    )
+                    if self.openlibrary_id and force_update:
+                        data = lookup_book_from_openlibrary(
+                            str(self.openlibrary_id)
+                        )
+                    else:
+                        data = lookup_book_from_openlibrary(
+                            str(self.title), author_name
+                        )
+                if not data:
+                    logger.warn(f"Book not found in OL {self.title}")
+                    return
 
             # We can discard the author name from OL for now, we'll lookup details below
             data.pop("ol_author_name", "")
-            self.fix_authors_metadata(data.pop("ol_author_id", ""))
+            if data.get("ol_author_id"):
+                self.fix_authors_metadata(data.pop("ol_author_id", ""))
+            if data.get("locg_writer_slug"):
+                self.get_author_from_locg(data.pop("locg_writer_slug", ""))
 
             ol_title = data.get("title", "")
 
@@ -183,6 +203,19 @@ class Book(LongPlayScrobblableMixin):
                     )
         self.authors.add(author)
 
+    def get_author_from_locg(self, locg_slug):
+        writer = lookup_comic_writer_by_locg_slug(locg_slug)
+
+        author, created = Author.objects.get_or_create(
+            name=writer["name"], locg_slug=writer["locg_slug"]
+        )
+        if (created or not author.headshot) and writer["photo_url"]:
+            r = requests.get(writer["photo_url"])
+            if r.status_code == 200:
+                fname = f"{author.name}_{author.uuid}.jpg"
+                author.headshot.save(fname, ContentFile(r.content), save=True)
+        self.authors.add(author)
+
     @property
     def author(self):
         return self.authors.first()
@@ -218,6 +251,16 @@ class Book(LongPlayScrobblableMixin):
 
         return book
 
+    def save(self, *args, **kwargs):
+        if (
+            (not self.isbn and not self.cover)
+            and (self.locg_slug or self.openlibrary_id)
+            and self.id
+        ):
+            self.fix_metadata(force_update=True)
+
+        return super(Book, self).save(*args, **kwargs)
+
 
 class Page(TimeStampedModel):
     user = models.ForeignKey(User, on_delete=models.CASCADE)