há 1 ano atrás · f1b1989424
--- a/vrobbler/apps/books/locg.py
+++ b/vrobbler/apps/books/locg.py
@@ -0,0 +1,124 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+from enum import Enum
			
 
				+from typing import Optional
			
 
				+from bs4 import BeautifulSoup
			
 
				+import requests
			
 
				+import logging
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+HEADERS = {
			
 
				+    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
			
 
				+    "accept-language": "en-GB,en;q=0.9",
			
 
				+}
			
 
				+LOCG_WRTIER_URL = ""
			
 
				+LOCG_WRITER_DETAIL_URL = "https://leagueofcomicgeeks.com/people/{slug}"
			
 
				+LOCG_SEARCH_URL = (
			
 
				+    "https://leagueofcomicgeeks.com/search/ajax_issues?query={query}"
			
 
				+)
			
 
				+LOCG_DETAIL_URL = "https://leagueofcomicgeeks.com/comic/{locg_slug}"
			
 
				+
			
 
				+
			
 
				+def strip_and_clean(text):
			
 
				+    return text.strip("\n").strip()
			
 
				+
			
 
				+
			
 
				+def get_rating_from_soup(soup) -> Optional[int]:
			
 
				+    rating = None
			
 
				+    try:
			
 
				+        potential_rating = soup.find("div", class_="allmusic-rating")
			
 
				+        if potential_rating:
			
 
				+            rating = int(strip_and_clean(potential_rating.get_text()))
			
 
				+    except ValueError:
			
 
				+        pass
			
 
				+    return rating
			
 
				+
			
 
				+
			
 
				+def lookup_comic_writer_by_locg_slug(slug: str) -> dict:
			
 
				+    data_dict = {}
			
 
				+    writer_url = LOCG_WRITER_DETAIL_URL.format(slug=slug)
			
 
				+
			
 
				+    response = requests.get(writer_url, headers=HEADERS)
			
 
				+
			
 
				+    if response.status_code != 200:
			
 
				+        logger.info(f"Bad http response from LOCG {response}")
			
 
				+        return data_dict
			
 
				+
			
 
				+    soup = BeautifulSoup(response.text, "html.parser")
			
 
				+    data_dict["locg_slug"] = slug
			
 
				+    data_dict["name"] = soup.find("h1").text.strip()
			
 
				+    data_dict["photo_url"] = soup.find("div", class_="avatar").img.get("src")
			
 
				+
			
 
				+    return data_dict
			
 
				+
			
 
				+
			
 
				+def lookup_comic_by_locg_slug(slug: str) -> dict:
			
 
				+    data_dict = {}
			
 
				+    product_url = LOCG_DETAIL_URL.format(locg_slug=slug)
			
 
				+
			
 
				+    response = requests.get(product_url, headers=HEADERS)
			
 
				+
			
 
				+    if response.status_code != 200:
			
 
				+        logger.info(f"Bad http response from LOCG {response}")
			
 
				+        return data_dict
			
 
				+
			
 
				+    soup = BeautifulSoup(response.text, "html.parser")
			
 
				+    try:
			
 
				+        data_dict["title"] = soup.find("h1").text.strip()
			
 
				+        data_dict["summary"] = soup.find("p").text.strip()
			
 
				+        data_dict["cover_url"] = (
			
 
				+            soup.find("div", class_="cover-art").find("img").get("src")
			
 
				+        )
			
 
				+        attrs = soup.findAll("div", class_="details-addtl-block")
			
 
				+        try:
			
 
				+            data_dict["pages"] = (
			
 
				+                attrs[1]
			
 
				+                .find("div", class_="value")
			
 
				+                .text.split("pages")[0]
			
 
				+                .strip()
			
 
				+            )
			
 
				+        except IndexError:
			
 
				+            logger.warn(f"No ISBN field")
			
 
				+        try:
			
 
				+            data_dict["isbn"] = (
			
 
				+                attrs[3].find("div", class_="value").text.strip()
			
 
				+            )
			
 
				+        except IndexError:
			
 
				+            logger.warn(f"No ISBN field")
			
 
				+
			
 
				+        writer_slug = None
			
 
				+        try:
			
 
				+            writer_slug = (
			
 
				+                soup.findAll("div", class_="name")[5]
			
 
				+                .a.get("href")
			
 
				+                .split("people/")[1]
			
 
				+            )
			
 
				+        except IndexError:
			
 
				+            logger.warn(f"No wrtier found")
			
 
				+        if writer_slug:
			
 
				+            data_dict["locg_writer_slug"] = writer_slug
			
 
				+
			
 
				+    except AttributeError:
			
 
				+        logger.warn(f"Trouble parsing HTML, elements missing")
			
 
				+
			
 
				+    return data_dict
			
 
				+
			
 
				+
			
 
				+def lookup_comic_from_locg(title: str) -> dict:
			
 
				+    search_url = LOCG_SEARCH_URL.format(query=title)
			
 
				+    response = requests.get(search_url, headers=HEADERS)
			
 
				+
			
 
				+    if response.status_code != 200:
			
 
				+        logger.warn(f"Bad http response from LOCG {response}")
			
 
				+        return {}
			
 
				+
			
 
				+    soup = BeautifulSoup(response.text, "html.parser")
			
 
				+
			
 
				+    try:
			
 
				+        slug = soup.findAll("a")[1].get("href").split("comic/")[1]
			
 
				+    except IndexError:
			
 
				+        logger.warn(f"No comic found on LOCG for {title}")
			
 
				+        return {}
			
 
				+
			
 
				+    return lookup_comic_by_locg_slug(slug)
			
--- a/vrobbler/apps/books/migrations/0016_book_locg_slug_book_summary.py
+++ b/vrobbler/apps/books/migrations/0016_book_locg_slug_book_summary.py
@@ -0,0 +1,23 @@
 
				+# Generated by Django 4.1.7 on 2023-08-26 04:48
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ("books", "0015_alter_book_first_sentence_alter_book_genre"),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.AddField(
			
 
				+            model_name="book",
			
 
				+            name="locg_slug",
			
 
				+            field=models.CharField(blank=True, max_length=255, null=True),
			
 
				+        ),
			
 
				+        migrations.AddField(
			
 
				+            model_name="book",
			
 
				+            name="summary",
			
 
				+            field=models.TextField(blank=True, null=True),
			
 
				+        ),
			
 
				+    ]
			
--- a/vrobbler/apps/books/migrations/0017_alter_book_authors.py
+++ b/vrobbler/apps/books/migrations/0017_alter_book_authors.py
@@ -0,0 +1,20 @@
 
				+# Generated by Django 4.1.7 on 2023-08-26 16:39
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ("books", "0016_book_locg_slug_book_summary"),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.AlterField(
			
 
				+            model_name="book",
			
 
				+            name="authors",
			
 
				+            field=models.ManyToManyField(
			
 
				+                blank=True, null=True, to="books.author"
			
 
				+            ),
			
 
				+        ),
			
 
				+    ]
			
--- a/vrobbler/apps/books/migrations/0018_author_locg_slug.py
+++ b/vrobbler/apps/books/migrations/0018_author_locg_slug.py
@@ -0,0 +1,18 @@
 
				+# Generated by Django 4.1.7 on 2023-08-31 03:57
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ("books", "0017_alter_book_authors"),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.AddField(
			
 
				+            model_name="author",
			
 
				+            name="locg_slug",
			
 
				+            field=models.CharField(blank=True, max_length=255, null=True),
			
 
				+        ),
			
 
				+    ]
			
--- a/vrobbler/apps/books/models.py
+++ b/vrobbler/apps/books/models.py
@@ -21,6 +21,12 @@ from scrobbles.mixins import (
 
				 from scrobbles.utils import get_scrobbles_for_media
			
 
				 from taggit.managers import TaggableManager
			
 
				 
			
 
				+from vrobbler.apps.books.locg import (
			
 
				+    lookup_comic_by_locg_slug,
			
 
				+    lookup_comic_from_locg,
			
 
				+    lookup_comic_writer_by_locg_slug,
			
 
				+)
			
 
				+
			
 
				 logger = logging.getLogger(__name__)
			
 
				 User = get_user_model()
			
 
				 BNULL = {"blank": True, "null": True}
			
@@ -34,6 +40,7 @@ class Author(TimeStampedModel):
 
				     bio = models.TextField(**BNULL)
			
 
				     wikipedia_url = models.CharField(max_length=255, **BNULL)
			
 
				     isni = models.CharField(max_length=255, **BNULL)
			
 
				+    locg_slug = models.CharField(max_length=255, **BNULL)
			
 
				     wikidata_id = models.CharField(max_length=255, **BNULL)
			
 
				     goodreads_id = models.CharField(max_length=255, **BNULL)
			
 
				     librarything_id = models.CharField(max_length=255, **BNULL)
			
@@ -68,7 +75,7 @@ class Book(LongPlayScrobblableMixin):
 
				     )
			
 
				 
			
 
				     title = models.CharField(max_length=255)
			
 
				-    authors = models.ManyToManyField(Author)
			
 
				+    authors = models.ManyToManyField(Author, blank=True)
			
 
				     goodreads_id = models.CharField(max_length=255, **BNULL)
			
 
				     koreader_id = models.IntegerField(**BNULL)
			
 
				     koreader_authors = models.CharField(max_length=255, **BNULL)
			
@@ -79,12 +86,14 @@ class Book(LongPlayScrobblableMixin):
 
				     first_publish_year = models.IntegerField(**BNULL)
			
 
				     first_sentence = models.TextField(**BNULL)
			
 
				     openlibrary_id = models.CharField(max_length=255, **BNULL)
			
 
				+    locg_slug = models.CharField(max_length=255, **BNULL)
			
 
				     cover = models.ImageField(upload_to="books/covers/", **BNULL)
			
 
				+    summary = models.TextField(**BNULL)
			
 
				 
			
 
				     genre = TaggableManager(through=ObjectWithGenres)
			
 
				 
			
 
				     def __str__(self):
			
 
				-        return f"{self.title} by {self.author}"
			
 
				+        return f"{self.title}"
			
 
				 
			
 
				     @property
			
 
				     def subtitle(self):
			
@@ -104,28 +113,39 @@ class Book(LongPlayScrobblableMixin):
 
				         return reverse("books:book_detail", kwargs={"slug": self.uuid})
			
 
				 
			
 
				     def fix_metadata(self, data: dict = {}, force_update=False):
			
 
				-        if not self.openlibrary_id or force_update:
			
 
				+        if (not self.openlibrary_id or not self.locg_slug) or force_update:
			
 
				             author_name = ""
			
 
				             if self.author:
			
 
				                 author_name = self.author.name
			
 
				 
			
 
				             if not data:
			
 
				-                if self.openlibrary_id:
			
 
				-                    data = lookup_book_from_openlibrary(
			
 
				-                        str(self.openlibrary_id)
			
 
				-                    )
			
 
				+                if self.locg_slug:
			
 
				+                    data = lookup_comic_by_locg_slug(str(self.locg_slug))
			
 
				                 else:
			
 
				-                    data = lookup_book_from_openlibrary(
			
 
				-                        str(self.title), author_name
			
 
				-                    )
			
 
				+                    data = lookup_comic_from_locg(str(self.title))
			
 
				 
			
 
				-            if not data:
			
 
				-                logger.warn(f"Book not found in OL {self.title}")
			
 
				-                return
			
 
				+                if not data:
			
 
				+                    logger.warn(
			
 
				+                        f"Book not found on LOCG, checking OL {self.title}"
			
 
				+                    )
			
 
				+                    if self.openlibrary_id and force_update:
			
 
				+                        data = lookup_book_from_openlibrary(
			
 
				+                            str(self.openlibrary_id)
			
 
				+                        )
			
 
				+                    else:
			
 
				+                        data = lookup_book_from_openlibrary(
			
 
				+                            str(self.title), author_name
			
 
				+                        )
			
 
				+                if not data:
			
 
				+                    logger.warn(f"Book not found in OL {self.title}")
			
 
				+                    return
			
 
				 
			
 
				             # We can discard the author name from OL for now, we'll lookup details below
			
 
				             data.pop("ol_author_name", "")
			
 
				-            self.fix_authors_metadata(data.pop("ol_author_id", ""))
			
 
				+            if data.get("ol_author_id"):
			
 
				+                self.fix_authors_metadata(data.pop("ol_author_id", ""))
			
 
				+            if data.get("locg_writer_slug"):
			
 
				+                self.get_author_from_locg(data.pop("locg_writer_slug", ""))
			
 
				 
			
 
				             ol_title = data.get("title", "")
			
 
				 
			
@@ -183,6 +203,19 @@ class Book(LongPlayScrobblableMixin):
 
				                     )
			
 
				         self.authors.add(author)
			
 
				 
			
 
				+    def get_author_from_locg(self, locg_slug):
			
 
				+        writer = lookup_comic_writer_by_locg_slug(locg_slug)
			
 
				+
			
 
				+        author, created = Author.objects.get_or_create(
			
 
				+            name=writer["name"], locg_slug=writer["locg_slug"]
			
 
				+        )
			
 
				+        if (created or not author.headshot) and writer["photo_url"]:
			
 
				+            r = requests.get(writer["photo_url"])
			
 
				+            if r.status_code == 200:
			
 
				+                fname = f"{author.name}_{author.uuid}.jpg"
			
 
				+                author.headshot.save(fname, ContentFile(r.content), save=True)
			
 
				+        self.authors.add(author)
			
 
				+
			
 
				     @property
			
 
				     def author(self):
			
 
				         return self.authors.first()
			
@@ -218,6 +251,16 @@ class Book(LongPlayScrobblableMixin):
 
				 
			
 
				         return book
			
 
				 
			
 
				+    def save(self, *args, **kwargs):
			
 
				+        if (
			
 
				+            (not self.isbn and not self.cover)
			
 
				+            and (self.locg_slug or self.openlibrary_id)
			
 
				+            and self.id
			
 
				+        ):
			
 
				+            self.fix_metadata(force_update=True)
			
 
				+
			
 
				+        return super(Book, self).save(*args, **kwargs)
			
 
				+
			
 
				 
			
 
				 class Page(TimeStampedModel):
			
 
				     user = models.ForeignKey(User, on_delete=models.CASCADE)