2 年之前 · d19838a26f
--- a/vrobbler/apps/books/koreader.py
+++ b/vrobbler/apps/books/koreader.py
@@ -12,6 +12,7 @@ from books.models import Author, Book, Page
 
				 from pylast import httpx, tempfile
			
 
				 from scrobbles.models import Scrobble
			
 
				 from stream_sqlite import stream_sqlite
			
 
				+from vrobbler.apps.books.openlibrary import get_author_openlibrary_id
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
@@ -53,39 +54,55 @@ def get_book_map_from_sqlite(rows: Iterable) -> dict:
 
				     book_id_map = {}
			
 
				 
			
 
				     for book_row in rows:
			
 
				-        authors = book_row[KoReaderBookColumn.AUTHORS.value].split("\n")
			
 
				-        author_list = []
			
 
				-        for author_str in authors:
			
 
				-            logger.debug(f"Looking up author {author_str}")
			
 
				-            if author_str == "N/A":
			
 
				-                continue
			
 
				-
			
 
				-            author, created = Author.objects.get_or_create(name=author_str)
			
 
				-            if created:
			
 
				-                author.fix_metadata()
			
 
				-            author_list.append(author)
			
 
				-            logger.debug(f"Found author {author}, created: {created}")
			
 
				+        book = Book.objects.filter(
			
 
				+            koreader_md5=book_row[KoReaderBookColumn.MD5.value]
			
 
				+        ).first()
			
 
				 
			
 
				-        book, created = Book.objects.get_or_create(
			
 
				-            title=book_row[KoReaderBookColumn.TITLE.value]
			
 
				-        )
			
 
				+        if not book:
			
 
				+            book, created = Book.objects.get_or_create(
			
 
				+                title=book_row[KoReaderBookColumn.TITLE.value]
			
 
				+            )
			
 
				 
			
 
				-        if created:
			
 
				-            total_pages = book_row[KoReaderBookColumn.PAGES.value]
			
 
				-            run_time = total_pages * book.AVG_PAGE_READING_SECONDS
			
 
				-            book_dict = {
			
 
				-                "title": book_row[KoReaderBookColumn.TITLE.value],
			
 
				-                "pages": total_pages,
			
 
				-                "koreader_md5": book_row[KoReaderBookColumn.MD5.value],
			
 
				-                "koreader_id": int(book_row[KoReaderBookColumn.ID.value]),
			
 
				-                "koreader_authors": book_row[KoReaderBookColumn.AUTHORS.value],
			
 
				-                "run_time_seconds": run_time,
			
 
				-            }
			
 
				-            Book.objects.filter(pk=book.id).update(**book_dict)
			
 
				-            book.fix_metadata()
			
 
				-
			
 
				-            if author_list:
			
 
				-                book.authors.add(*[a.id for a in author_list])
			
 
				+            if created:
			
 
				+                total_pages = book_row[KoReaderBookColumn.PAGES.value]
			
 
				+                run_time = total_pages * book.AVG_PAGE_READING_SECONDS
			
 
				+                ko_authors = book_row[
			
 
				+                    KoReaderBookColumn.AUTHORS.value
			
 
				+                ].replace("\n", ", ")
			
 
				+                book_dict = {
			
 
				+                    "title": book_row[KoReaderBookColumn.TITLE.value],
			
 
				+                    "pages": total_pages,
			
 
				+                    "koreader_md5": book_row[KoReaderBookColumn.MD5.value],
			
 
				+                    "koreader_id": int(book_row[KoReaderBookColumn.ID.value]),
			
 
				+                    "koreader_authors": ko_authors,
			
 
				+                    "run_time_seconds": run_time,
			
 
				+                }
			
 
				+                Book.objects.filter(pk=book.id).update(**book_dict)
			
 
				+
			
 
				+                # Add authors
			
 
				+                authors = book_row[KoReaderBookColumn.AUTHORS.value].split(
			
 
				+                    "\n"
			
 
				+                )
			
 
				+                author_list = []
			
 
				+                for author_str in authors:
			
 
				+                    logger.debug(f"Looking up author {author_str}")
			
 
				+                    if author_str == "N/A":
			
 
				+                        continue
			
 
				+
			
 
				+                    author, created = Author.objects.get_or_create(
			
 
				+                        name=author_str
			
 
				+                    )
			
 
				+                    if created:
			
 
				+                        author.openlibrary_id = get_author_openlibrary_id(
			
 
				+                            author_str
			
 
				+                        )
			
 
				+                        author.save(update_fields=["openlibrary_id"])
			
 
				+                        author.fix_metadata()
			
 
				+                        logger.debug(f"Created author {author}")
			
 
				+                    book.authors.add(author)
			
 
				+
			
 
				+                # This will try to fix metadata by looking it up on OL
			
 
				+                book.fix_metadata()
			
 
				 
			
 
				         playback_position_seconds = 0
			
 
				         if book_row[KoReaderBookColumn.TOTAL_READ_TIME.value]:
			
@@ -101,6 +118,7 @@ def get_book_map_from_sqlite(rows: Iterable) -> dict:
 
				         timestamp = datetime.utcfromtimestamp(
			
 
				             book_row[KoReaderBookColumn.LAST_OPEN.value]
			
 
				         ).replace(tzinfo=pytz.utc)
			
 
				+        book.refresh_from_db()
			
 
				         book_id_map[book.koreader_id] = book.id
			
 
				 
			
 
				     return book_id_map
			
@@ -130,6 +148,7 @@ def build_scrobbles_from_pages(
 
				             ]
			
 
				             page.save(update_fields=["start_time", "duration_seconds"])
			
 
				             page.refresh_from_db()
			
 
				+
			
 
				         if page.is_scrobblable:
			
 
				             # Page number is a placeholder, we'll re-preocess this after creation
			
 
				             logger.debug(
			
@@ -155,11 +174,12 @@ def enrich_koreader_scrobbles(scrobbles: list) -> None:
 
				 
			
 
				     for scrobble in scrobbles:
			
 
				         if scrobble.next:
			
 
				+            # Set pages read to the starting page of the next scrobble minus one, if it exists
			
 
				             scrobble.book_pages_read = scrobble.next.book_pages_read - 1
			
 
				             scrobble.save(update_fields=["book_pages_read"])
			
 
				         else:
			
 
				+            # Set pages read to the last page we have
			
 
				             scrobble.book_pages_read = scrobble.book.page_set.last().number
			
 
				-            scrobble.long_play_complete =
			
 
				 
			
 
				         scrobble.save(update_fields=["book_pages_read", "long_play_complete"])
			
 
				 
			
--- a/vrobbler/apps/books/models.py
+++ b/vrobbler/apps/books/models.py
@@ -98,7 +98,10 @@ class Book(LongPlayScrobblableMixin):
 
				 
			
 
				     def fix_metadata(self, force_update=False):
			
 
				         if not self.openlibrary_id or force_update:
			
 
				-            book_dict = lookup_book_from_openlibrary(self.title, self.author)
			
 
				+            author_name = ""
			
 
				+            if self.author:
			
 
				+                author_name = self.author.name
			
 
				+            book_dict = lookup_book_from_openlibrary(self.title, author_name)
			
 
				             if not book_dict:
			
 
				                 logger.warn(f"Book not found in OL {self.title}")
			
 
				                 return
			
@@ -115,7 +118,6 @@ class Book(LongPlayScrobblableMixin):
 
				                 logger.warn(
			
 
				                     f"OL and KoReader disagree on this book title {self.title} != {ol_title}"
			
 
				                 )
			
 
				-                return
			
 
				 
			
 
				             Book.objects.filter(pk=self.id).update(**book_dict)
			
 
				             self.refresh_from_db()
			
--- a/vrobbler/apps/books/openlibrary.py
+++ b/vrobbler/apps/books/openlibrary.py
@@ -1,14 +1,15 @@
 
				 import json
			
 
				 import logging
			
 
				+import re
			
 
				 import urllib
			
 
				 
			
 
				 import requests
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				-SEARCH_URL = "https://openlibrary.org/search.json?title={title}"
			
 
				 ISBN_URL = "https://openlibrary.org/isbn/{isbn}.json"
			
 
				-SEARCH_URL = "https://openlibrary.org/search.json?title={title}"
			
 
				+SEARCH_URL = "https://openlibrary.org/search.json?q={query}&sort=editions&mode=everything"
			
 
				+AUTHOR_SEARCH_URL = "https://openlibrary.org/search/authors.json?q={query}"
			
 
				 COVER_URL = "https://covers.openlibrary.org/b/olid/{id}-L.jpg"
			
 
				 AUTHOR_URL = "https://openlibrary.org/authors/{id}.json"
			
 
				 AUTHOR_IMAGE_URL = "https://covers.openlibrary.org/a/olid/{id}-L.jpg"
			
@@ -21,6 +22,24 @@ def get_first(key: str, result: dict) -> str:
 
				     return obj
			
 
				 
			
 
				 
			
 
				+def get_author_openlibrary_id(name: str) -> str:
			
 
				+    search_url = AUTHOR_SEARCH_URL.format(query=name)
			
 
				+    response = requests.get(search_url)
			
 
				+
			
 
				+    if response.status_code != 200:
			
 
				+        logger.warn(f"Bad response from OL: {response.status_code}")
			
 
				+        return ""
			
 
				+
			
 
				+    results = json.loads(response.content)
			
 
				+
			
 
				+    if not results:
			
 
				+        logger.warn(f"No author results found from search for {name}")
			
 
				+        return ""
			
 
				+
			
 
				+    result = results.get("docs", [])
			
 
				+    return result[0].get("key")
			
 
				+
			
 
				+
			
 
				 def lookup_author_from_openlibrary(olid: str) -> dict:
			
 
				     author_url = AUTHOR_URL.format(id=olid)
			
 
				     response = requests.get(author_url)
			
@@ -58,7 +77,14 @@ def lookup_author_from_openlibrary(olid: str) -> dict:
 
				 
			
 
				 def lookup_book_from_openlibrary(title: str, author: str = None) -> dict:
			
 
				     title_quoted = urllib.parse.quote(title)
			
 
				-    search_url = SEARCH_URL.format(title=title_quoted)
			
 
				+    author_quoted = ""
			
 
				+    if author:
			
 
				+        # Strip middle initials, OpenLibrary often fails with these
			
 
				+        author = re.sub(" [A-Z]. ", " ", author)
			
 
				+        author_quoted = urllib.parse.quote(author)
			
 
				+    query = f"{title_quoted} {author_quoted}"
			
 
				+
			
 
				+    search_url = SEARCH_URL.format(query=query)
			
 
				     response = requests.get(search_url)
			
 
				 
			
 
				     if response.status_code != 200:
			
@@ -71,7 +97,17 @@ def lookup_book_from_openlibrary(title: str, author: str = None) -> dict:
 
				         logger.warn(f"No results found from OL for {title}")
			
 
				         return {}
			
 
				 
			
 
				-    top = results.get("docs")[0]
			
 
				+    top = None
			
 
				+    for result in results.get("docs"):
			
 
				+        # These Summary things suck and ruin our one-shot search
			
 
				+        if "Summary of" not in result.get("title"):
			
 
				+            top = result
			
 
				+            break
			
 
				+
			
 
				+    if not top:
			
 
				+        logger.warn(f"No book found for query {query}")
			
 
				+        return {}
			
 
				+
			
 
				     ol_id = top.get("cover_edition_key")
			
 
				     ol_author_id = get_first("author_key", top)
			
 
				     first_sentence = ""