Ver Fonte

[books] Add google as a source and clean up data model

Colin Powell há 6 meses atrás
pai
commit
f90a3b84a8

+ 1 - 0
vrobbler.conf.example

@@ -23,6 +23,7 @@ VROBBLER_IGDB_CLIENT_SECRET="<key>"
 VROBBLER_COMICVINE_API_KEY="<key>"
 VROBBLER_TODOIST_CLIENT_ID="<id>"
 VROBBLER_TODOIST_CLIENT_SECRET="<key>"
+VROBBLER_GOOGLE_API_KEY="<key>"
 
 # Storages
 # VROBBLER_DATABASE_URL="postgres://USER:PASSWORD@HOST:PORT/NAME"

+ 2 - 2
vrobbler/apps/books/admin.py

@@ -30,10 +30,10 @@ class BookAdmin(admin.ModelAdmin):
     date_hierarchy = "created"
     list_display = (
         "title",
-        "isbn",
+        "subtitle",
+        "isbn_13",
         "first_publish_year",
         "pages",
-        "openlibrary_id",
     )
     search_fields = ("name",)
     ordering = ("-created",)

+ 42 - 0
vrobbler/apps/books/metadata.py

@@ -0,0 +1,42 @@
+from enum import Enum
+from typing import Optional
+
+import pendulum
+from meta_yt import YouTube
+
+
+YOUTUBE_VIDEO_URL = "https://www.youtube.com/watch?v="
+IMDB_VIDEO_URL = "https://www.imdb.com/title/tt"
+
+
+class BookType:
+    ...
+
+
+class BookMetadata:
+    title: str
+    run_time_seconds: Optional[int]
+    authors = Optional[str]
+    goodreads_id = Optional[str]
+    koreader_data_by_hash = Optional[dict]
+    isbn = Optional[str]
+    # isbn_13 = Optional[str]
+    # isbn_10 = Optional[str]
+    pages = Optional[int]
+    language = Optional[str]
+    first_publish_year = Optional[int]
+    summary = Optional[str]
+
+    # General
+    cover_url: Optional[str]
+    genres: list[str]
+
+    def __init__(self, title: Optional[str] = ""):
+        self.title = title
+
+    def as_dict_with_authors_cover_and_genres(self) -> tuple:
+        book_dict = vars(self)
+        authors = book_dict.pop("authors")
+        cover = book_dict.pop("cover_url")
+        genres = book_dict.pop("genres")
+        return book_dict, authors, cover, genres

+ 40 - 0
vrobbler/apps/books/migrations/0023_rename_isbn_book_isbn_13_remove_book_comicvine_data_and_more.py

@@ -0,0 +1,40 @@
+# Generated by Django 4.2.18 on 2025-01-27 04:20
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0022_alter_book_run_time_seconds"),
+    ]
+
+    operations = [
+        migrations.RenameField(
+            model_name="book",
+            old_name="isbn",
+            new_name="isbn_13",
+        ),
+        migrations.RemoveField(
+            model_name="book",
+            name="comicvine_data",
+        ),
+        migrations.RemoveField(
+            model_name="book",
+            name="goodreads_id",
+        ),
+        migrations.RemoveField(
+            model_name="book",
+            name="locg_slug",
+        ),
+        migrations.AddField(
+            model_name="book",
+            name="isbn_10",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="book",
+            name="publish_date",
+            field=models.DateField(blank=True, null=True),
+        ),
+    ]

+ 21 - 0
vrobbler/apps/books/migrations/0024_book_publisher.py

@@ -0,0 +1,21 @@
+# Generated by Django 4.2.18 on 2025-01-27 04:31
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        (
+            "books",
+            "0023_rename_isbn_book_isbn_13_remove_book_comicvine_data_and_more",
+        ),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="book",
+            name="publisher",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]

+ 34 - 5
vrobbler/apps/books/models.py

@@ -35,6 +35,7 @@ from vrobbler.apps.books.locg import (
     lookup_comic_from_locg,
     lookup_comic_writer_by_locg_slug,
 )
+from vrobbler.apps.books.sources.google import lookup_book_from_google
 from vrobbler.apps.scrobbles.dataclasses import BookLogData
 
 COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")
@@ -101,16 +102,16 @@ class Book(LongPlayScrobblableMixin):
 
     title = models.CharField(max_length=255)
     authors = models.ManyToManyField(Author, blank=True)
-    goodreads_id = models.CharField(max_length=255, **BNULL)
     koreader_data_by_hash = models.JSONField(**BNULL)
-    isbn = models.CharField(max_length=255, **BNULL)
+    isbn_13 = models.CharField(max_length=255, **BNULL)
+    isbn_10 = models.CharField(max_length=255, **BNULL)
     pages = models.IntegerField(**BNULL)
     language = models.CharField(max_length=4, **BNULL)
     first_publish_year = models.IntegerField(**BNULL)
+    publish_date = models.DateField(**BNULL)
+    publisher = models.CharField(max_length=255, **BNULL)
     first_sentence = models.TextField(**BNULL)
     openlibrary_id = models.CharField(max_length=255, **BNULL)
-    locg_slug = models.CharField(max_length=255, **BNULL)
-    comicvine_data = models.JSONField(**BNULL)
     cover = models.ImageField(upload_to="books/covers/", **BNULL)
     cover_small = ImageSpecField(
         source="cover",
@@ -153,6 +154,32 @@ class Book(LongPlayScrobblableMixin):
     def get_absolute_url(self):
         return reverse("books:book_detail", kwargs={"slug": self.uuid})
 
+    @classmethod
+    def get_from_google(cls, title: str, overwrite: bool = False):
+        book, created = cls.objects.get_or_create(title=title)
+        if not created and not overwrite:
+            return book
+
+        bdict, authors, cover, genres = lookup_book_from_google(
+            title
+        ).as_dict_with_authors_cover_and_genres()
+
+        if created or overwrite:
+            for k, v in bdict.items():
+                setattr(book, k, v)
+                book.save()
+
+                book.save_image_from_url(cover)
+                book.genre.add(*genres)
+        return book
+
+    def save_image_from_url(self, url: str, force_update: bool = False):
+        if not self.cover or (force_update and url):
+            r = requests.get(url)
+            if r.status_code == 200:
+                fname = f"{self.title}_{self.uuid}.jpg"
+                self.cover.save(fname, ContentFile(r.content), save=True)
+
     def fix_metadata(self, data: dict = {}, force_update=False):
         if (not self.openlibrary_id or not self.locg_slug) or force_update:
             author_name = ""
@@ -340,7 +367,9 @@ class Book(LongPlayScrobblableMixin):
                 )
                 return book
 
-            book, book_created = cls.objects.get_or_create(isbn=data["isbn"])
+            book, book_created = cls.objects.get_or_create(
+                isbn_13=data["isbn"]
+            )
             if book_created:
                 book.fix_metadata(data=data)
 

+ 0 - 0
vrobbler/apps/books/sources/__init__.py


+ 68 - 0
vrobbler/apps/books/sources/google.py

@@ -0,0 +1,68 @@
+import json
+import logging
+
+import pendulum
+import requests
+from books.metadata import BookMetadata
+from django.conf import settings
+
+API_KEY = settings.GOOGLE_API_KEY
+GOOGLE_BOOKS_URL = (
+    "https://www.googleapis.com/books/v1/volumes?q={title}&key={key}"
+)
+
+logger = logging.getLogger(__name__)
+
+
+def lookup_book_from_google(title: str) -> BookMetadata:
+    book_metadata = BookMetadata(title=title)
+
+    url = GOOGLE_BOOKS_URL.format(title=title, key=API_KEY)
+    headers = {"User-Agent": "Vrobbler 0.11.12"}
+    response = requests.get(url, headers=headers)
+
+    if response.status_code != 200:
+        logger.warning(
+            "Bad response from Google", extra={"response": response}
+        )
+        return book_metadata
+
+    google_result = (
+        json.loads(response.content).get("items", [{}])[0].get("volumeInfo")
+    )
+    publish_date = pendulum.parse(google_result.get("publishedDate"))
+
+    isbn_13 = ""
+    isbn_10 = ""
+    for ident in google_result.get("industryIdentifiers", []):
+        if ident.get("type") == "ISBN_13":
+            isbn_13 = ident.get("identifier")
+        if ident.get("type") == "ISBN_10":
+            isbn_10 = ident.get("identifier")
+    book_metadata.title = google_result.get("title")
+    if google_result.get("subtitle"):
+        book_metadata.title = ": ".join(
+            [google_result.get("title"), google_result.get("subtitle")]
+        )
+    book_metadata.authors = google_result.get("authors")
+    book_metadata.publisher = google_result.get("publisher")
+    book_metadata.first_publish_year = publish_date.year
+    book_metadata.pages = google_result.get("pageCount")
+    book_metadata.isbn_13 = isbn_13
+    book_metadata.isbn_10 = isbn_10
+    book_metadata.publish_date = google_result.get("publishedDate")
+    book_metadata.language = google_result.get("language")
+    book_metadata.summary = google_result.get("description")
+    book_metadata.genres = google_result.get("categories")
+    book_metadata.cover_url = (
+        google_result.get("imageLinks", {})
+        .get("thumbnail")
+        .replace("zoom=1", "zoom=15")
+        .replace("&edge=curl", "")
+    )
+
+    book_metadata.run_time_seconds = book_metadata.pages * getattr(
+        settings, "AVERAGE_PAGE_READING_SECONDS", 60
+    )
+
+    return book_metadata

+ 2 - 0
vrobbler/settings.py

@@ -74,6 +74,8 @@ POINTS_FOR_MOVEMENT_HISTORY = os.getenv(
 TODOIST_CLIENT_ID = os.getenv("VROBBLER_TODOIST_CLIENT_ID", "")
 TODOIST_CLIENT_SECRET = os.getenv("VROBBLER_TODOIST_CLIENT_SECRET", "")
 
+GOOGLE_API_KEY = os.getenv("VROBBLER_GOOGLE_API_KEY", "")
+
 DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
 
 TIME_ZONE = os.getenv("VROBBLER_TIME_ZONE", "US/Eastern")