1 год назад · 4ae13b3a1a
--- a/vrobbler/apps/books/koreader.py
+++ b/vrobbler/apps/books/koreader.py
@@ -5,16 +5,16 @@ import re
 
															 import sqlite3
														
 
															 from datetime import datetime, timedelta
														
 
															 from enum import Enum
														
 
															-from typing import Iterable, List
														
 
															+from typing import Iterable, List, Optional
														
 
															 import pytz
														
 
															 import requests
														
 
															 from books.models import Author, Book, Page
														
 
															 from books.openlibrary import get_author_openlibrary_id
														
 
															+from django.apps import apps
														
 
															 from django.contrib.auth import get_user_model
														
 
															 from django.db.models import Sum
														
 
															 from pylast import httpx, tempfile
														
 
															-from scrobbles.models import Scrobble
														
 
															 from scrobbles.utils import timestamp_user_tz_to_utc
														
 
															 from stream_sqlite import stream_sqlite
														
@@ -51,307 +51,277 @@ def _sqlite_bytes(sqlite_url):
 
															 # Grace period between page reads for it to be a new scrobble
														
 
															-SESSION_GAP_SECONDS = 3600  # one hour
														
 
															-
														
 
															-
														
 
															-class KoReaderImporter:
														
 
															-    # Maps a KoReader book ID to the Book.id and total read time of the book in Django
														
 
															-    # Example:
														
 
															-    # {"KOREADER_DB_ID": {
														
 
															-    #     "book_id": <int>,
														
 
															-    #     "total_seconds": <int>,
														
 
															-    #     "pages": {
														
 
															-    #         <int>: {
														
 
															-    #             "start_ts": <TIMESTAMP>,
														
 
															-    #             "end_ts": <TIMESTAMP>,
														
 
															-    #             "duration": <int>
														
 
															-    #         }
														
 
															-    #     }
														
 
															-    # }
														
 
															-    BOOK_MAP = dict()
														
 
															-    SQLITE_FILE_URL = str
														
 
															-    USER_ID = int
														
 
															-
														
 
															-    def __init__(self, sqlite_file_url: str, user_id: int):
														
 
															-        # Map KoReader book IDs to
														
 
															-        self.SQLITE_FILE_URL = sqlite_file_url
														
 
															-        self.USER_ID = user_id
														
 
															-        self.importing_user = User.objects.filter(id=user_id).first()
														
 
															-
														
 
															-    def _get_author_str_from_row(self, row):
														
 
															-        """Given a the raw author string from KoReader, convert it to a single line and
														
 
															-        strip the middle initials, as OpenLibrary lookup usually fails with those.
														
 
															-        """
														
 
															-        ko_authors = row[KoReaderBookColumn.AUTHORS.value].replace("\n", ", ")
														
 
															-        # Strip middle initials, OpenLibrary often fails with these
														
 
															-        return re.sub(" [A-Z]. ", " ", ko_authors)
														
 
															-
														
 
															-    def _lookup_or_create_authors_from_author_str(
														
 
															-        self, ko_author_str: str
														
 
															-    ) -> list:
														
 
															-        author_str_list = ko_author_str.split(", ")
														
 
															-        author_list = []
														
 
															-        for author_str in author_str_list:
														
 
															-            logger.debug(f"Looking up author {author_str}")
														
 
															-            # KoReader gave us nothing, bail
														
 
															-            if author_str == "N/A":
														
 
															-                logger.warn(
														
 
															-                    f"KoReader author string is N/A, no authors to find"
														
 
															-                )
														
 
															-                continue
														
 
															+SESSION_GAP_SECONDS = 1800  #  a half hour
														
 
															+
														
 
															+
														
 
															+def get_author_str_from_row(row):
														
 
															+    """Given a the raw author string from KoReader, convert it to a single line and
														
 
															+    strip the middle initials, as OpenLibrary lookup usually fails with those.
														
 
															+    """
														
 
															+    ko_authors = row[KoReaderBookColumn.AUTHORS.value].replace("\n", ", ")
														
 
															+    # Strip middle initials, OpenLibrary often fails with these
														
 
															+    return re.sub(" [A-Z]. ", " ", ko_authors)
														
 
															+
														
 
															+def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list:
														
 
															+    """Takes a string of authors from KoReader and returns a list
														
 
															+    of Authors from our database
														
 
															+    """
														
 
															+    author_str_list = ko_author_str.split(", ")
														
 
															+    author_list = []
														
 
															+    for author_str in author_str_list:
														
 
															+        logger.debug(f"Looking up author {author_str}")
														
 
															+        # KoReader gave us nothing, bail
														
 
															+        if author_str == "N/A":
														
 
															+            logger.warn(
														
 
															+                f"KoReader author string is N/A, no authors to find"
														
 
															+            )
														
 
															+            continue
														
 
															-            author = Author.objects.filter(name=author_str).first()
														
 
															-            if not author:
														
 
															-                author = Author.objects.create(
														
 
															-                    name=author_str,
														
 
															-                    openlibrary_id=get_author_openlibrary_id(author_str),
														
 
															-                )
														
 
															-                author.fix_metadata()
														
 
															-                logger.debug(f"Created author {author}")
														
 
															-            author_list.append(author)
														
 
															-        return author_list
														
 
															-
														
 
															-    def get_or_create_books(self, rows):
														
 
															-        """Given an interable of sqlite rows from the books table, lookup existing
														
 
															-        books, create ones that don't exist, and return a mapping of koreader IDs to
														
 
															-        primary key IDs for page creation.
														
 
															-
														
 
															-        """
														
 
															-        book_id_map = {}
														
 
															-
														
 
															-        for book_row in rows:
														
 
															-            book = Book.objects.filter(
														
 
															-                koreader_md5=book_row[KoReaderBookColumn.MD5.value]
														
 
															-            ).first()
														
 
															-
														
 
															-            if not book:
														
 
															-                # No KoReader book yet, create it
														
 
															-                author_str = self._get_author_str_from_row(book_row)
														
 
															-                total_pages = book_row[KoReaderBookColumn.PAGES.value]
														
 
															-                run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
														
 
															-
														
 
															-                book = Book.objects.create(
														
 
															-                    koreader_md5=book_row[KoReaderBookColumn.MD5.value],
														
 
															-                    title=book_row[KoReaderBookColumn.TITLE.value],
														
 
															-                    koreader_id=book_row[KoReaderBookColumn.ID.value],
														
 
															-                    koreader_authors=author_str,
														
 
															-                    pages=total_pages,
														
 
															-                    run_time_seconds=run_time,
														
 
															-                )
														
 
															-                book.fix_metadata()
														
 
															+        author = Author.objects.filter(name=author_str).first()
														
 
															+        if not author:
														
 
															+            author = Author.objects.create(
														
 
															+                name=author_str,
														
 
															+                openlibrary_id=get_author_openlibrary_id(author_str),
														
 
															+            )
														
 
															+            author.fix_metadata()
														
 
															+            logger.debug(f"Created author {author}")
														
 
															+        author_list.append(author)
														
 
															+    return author_list
														
 
															+
														
 
															+
														
 
															+def create_book_from_row(row: list):
														
 
															+    # No KoReader book yet, create it
														
 
															+    author_str = get_author_str_from_row(row)
														
 
															+    total_pages = row[KoReaderBookColumn.PAGES.value]
														
 
															+    run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
														
 
															+
														
 
															+    book = Book.objects.create(
														
 
															+        koreader_md5=row[KoReaderBookColumn.MD5.value],
														
 
															+        title=row[KoReaderBookColumn.TITLE.value],
														
 
															+        koreader_id=row[KoReaderBookColumn.ID.value],
														
 
															+        koreader_authors=author_str,
														
 
															+        pages=total_pages,
														
 
															+        run_time_seconds=run_time,
														
 
															+    )
														
 
															+    book.fix_metadata()
														
 
															+
														
 
															+    # Add authors
														
 
															+    author_list = lookup_or_create_authors_from_author_str(
														
 
															+        author_str
														
 
															+    )
														
 
															+    if author_list:
														
 
															+        book.authors.add(*author_list)
														
 
															+
														
 
															+    # self._lookup_authors
														
 
															+    return book
														
 
															+
														
 
															+def build_book_map(rows) -> dict:
														
 
															+    """Given an interable of sqlite rows from the books table, lookup existing
														
 
															+    books, create ones that don't exist, and return a mapping of koreader IDs to
														
 
															+    primary key IDs for page creation.
														
 
															+
														
 
															+    """
														
 
															+    book_id_map = {}
														
 
															+
														
 
															+    for book_row in rows:
														
 
															+        book = Book.objects.filter(
														
 
															+            koreader_md5=book_row[KoReaderBookColumn.MD5.value]
														
 
															+        ).first()
														
 
															+
														
 
															+        if not book:
														
 
															+            book = create_book_from_row(book_row)
														
 
															+
														
 
															+        book.refresh_from_db()
														
 
															+        total_seconds = 0
														
 
															+        if book_row[KoReaderBookColumn.TOTAL_READ_TIME.value]:
														
 
															+            total_seconds = book_row[
														
 
															+                KoReaderBookColumn.TOTAL_READ_TIME.value
														
 
															+            ]
														
 
															+
														
 
															+        book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
														
 
															+            "book_id": book.id,
														
 
															+            "total_seconds": total_seconds,
														
 
															+        }
														
 
															+    return book_id_map
														
 
															+
														
 
															+def build_page_data(page_rows: list, book_map: dict, user_tz = None) -> dict:
														
 
															+    """Given rows of page data from KoReader, parse each row and build
														
 
															+    scrobbles for our user, loading the page data into the page_data
														
 
															+    field on the scrobble instance.
														
 
															+    """
														
 
															+    for page_row in page_rows:
														
 
															+        koreader_book_id = page_row[KoReaderPageStatColumn.ID_BOOK.value]
														
 
															+        if "pages" not in book_map[koreader_book_id].keys():
														
 
															+            book_map[koreader_book_id]["pages"] = {}
														
 
															+
														
 
															+        if koreader_book_id not in book_map.keys():
														
 
															+            logger.warn(
														
 
															+                f"Found a page without a corresponding book ID ({koreader_book_id}) in KoReader DB",
														
 
															+                {"page_row": page_row},
														
 
															+            )
														
 
															+            continue
														
 
															+
														
 
															+        page_number = page_row[KoReaderPageStatColumn.PAGE.value]
														
 
															+        duration = page_row[KoReaderPageStatColumn.DURATION.value]
														
 
															+        start_ts = page_row[KoReaderPageStatColumn.START_TIME.value]
														
 
															+        if user_tz:
														
 
															+            start_ts = timestamp_user_tz_to_utc(
														
 
															+                page_row[KoReaderPageStatColumn.START_TIME.value],
														
 
															+                pytz.timezone(user_tz),
														
 
															+            ).timestamp()
														
 
															+        else:
														
 
															+            logger.warning(f"Page data built with out user timezone, defaulting to UTC")
														
 
															+
														
 
															+        book_map[koreader_book_id]["pages"][page_number] = {
														
 
															+            "duration": duration,
														
 
															+            "start_ts": start_ts,
														
 
															+            "end_ts": start_ts + duration,
														
 
															+        }
														
 
															+    return book_map
														
 
															+
														
 
															+def build_scrobbles_from_book_map(book_map: dict, user: "User") -> list["Scrobble"]:
														
 
															+    Scrobble = apps.get_model("scrobbles", "Scrobble")
														
 
															+
														
 
															+    scrobbles_to_create = []
														
 
															+
														
 
															+    for koreader_book_id, book_dict in book_map.items():
														
 
															+        book_id = book_dict["book_id"]
														
 
															+        if "pages" not in book_dict.keys():
														
 
															+            logger.warn(f"No page data found in book map for {book_id}")
														
 
															+            continue
														
 
															+
														
 
															+        should_create_scrobble = False
														
 
															+        scrobble_page_data = {}
														
 
															+        playback_position_seconds = 0
														
 
															+        prev_page_stats = {}
														
 
															+
														
 
															+        pages_processed = 0
														
 
															+        total_pages = len(book_map[koreader_book_id]["pages"])
														
 
															+
														
 
															+        for page_number, stats in book_map[koreader_book_id][
														
 
															+            "pages"
														
 
															+        ].items():
														
 
															+            pages_processed += 1
														
 
															+            # Accumulate our page data for this scrobble
														
 
															+            scrobble_page_data[page_number] = stats
														
 
															+
														
 
															+            seconds_from_last_page = 0
														
 
															+            if prev_page_stats:
														
 
															+                seconds_from_last_page = stats.get(
														
 
															+                    "end_ts"
														
 
															+                ) - prev_page_stats.get("start_ts")
														
 
															+            playback_position_seconds = (
														
 
															+                playback_position_seconds + stats.get("duration")
														
 
															+            )
														
 
															-                # Add authors
														
 
															-                author_list = self._lookup_or_create_authors_from_author_str(
														
 
															-                    author_str
														
 
															-                )
														
 
															-                if author_list:
														
 
															-                    book.authors.add(*author_list)
														
 
															-
														
 
															-                # self._lookup_authors
														
 
															-
														
 
															-            book.refresh_from_db()
														
 
															-            total_seconds = 0
														
 
															-            if book_row[KoReaderBookColumn.TOTAL_READ_TIME.value]:
														
 
															-                total_seconds = book_row[
														
 
															-                    KoReaderBookColumn.TOTAL_READ_TIME.value
														
 
															-                ]
														
 
															-
														
 
															-            book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
														
 
															-                "book_id": book.id,
														
 
															-                "total_seconds": total_seconds,
														
 
															-            }
														
 
															-        self.BOOK_MAP = book_id_map
														
 
															-
														
 
															-    def load_page_data_to_map(self, rows: Iterable) -> List[Scrobble]:
														
 
															-        """Given rows of page data from KoReader, parse each row and build
														
 
															-        scrobbles for our user, loading the page data into the page_data
														
 
															-        field on the scrobble instance.
														
 
															-        """
														
 
															-        for page_row in rows:
														
 
															-            koreader_book_id = page_row[KoReaderPageStatColumn.ID_BOOK.value]
														
 
															-            if "pages" not in self.BOOK_MAP[koreader_book_id].keys():
														
 
															-                self.BOOK_MAP[koreader_book_id]["pages"] = {}
														
 
															-
														
 
															-            if koreader_book_id not in self.BOOK_MAP.keys():
														
 
															-                logger.warn(
														
 
															-                    f"Found a page without a corresponding book ID ({koreader_book_id}) in KoReader DB",
														
 
															-                    {"page_row": page_row},
														
 
															-                )
														
 
															-                continue
														
 
															-
														
 
															-            page_number = page_row[KoReaderPageStatColumn.PAGE.value]
														
 
															-            duration = page_row[KoReaderPageStatColumn.DURATION.value]
														
 
															-            start_ts = page_row[KoReaderPageStatColumn.START_TIME.value]
														
 
															-            if self.importing_user:
														
 
															-                start_ts = timestamp_user_tz_to_utc(
														
 
															-                    page_row[KoReaderPageStatColumn.START_TIME.value],
														
 
															-                    self.importing_user.timezone,
														
 
															+            end_of_reading = pages_processed == total_pages
														
 
															+            if (
														
 
															+                seconds_from_last_page > SESSION_GAP_SECONDS
														
 
															+                or end_of_reading
														
 
															+            ):
														
 
															+                should_create_scrobble = True
														
 
															+
														
 
															+            logger.info(f"Book {koreader_book_id} - {page_number} {seconds_from_last_page} read seconds")
														
 
															+            if should_create_scrobble:
														
 
															+                first_page_in_scrobble = list(scrobble_page_data.keys())[0]
														
 
															+                timestamp = timestamp_user_tz_to_utc(
														
 
															+                    int(scrobble_page_data.get(first_page_in_scrobble).get("start_ts")),
														
 
															+                    pytz.timezone(user.profile.timezone),
														
 
															                 )
														
 
															-            self.BOOK_MAP[koreader_book_id]["pages"][page_number] = {
														
 
															-                "duration": duration,
														
 
															-                "start_ts": start_ts,
														
 
															-                "end_ts": start_ts + duration,
														
 
															-            }
														
 
															-
														
 
															-    def build_scrobbles_from_pages(self) -> List[Scrobble]:
														
 
															-        scrobbles_to_create = []
														
 
															-
														
 
															-        for koreader_book_id, book_dict in self.BOOK_MAP.items():
														
 
															-            book_id = book_dict["book_id"]
														
 
															-            if "pages" not in book_dict.keys():
														
 
															-                logger.warn(f"No page data found in book map for {book_id}")
														
 
															-                continue
														
 
															-
														
 
															-            should_create_scrobble = False
														
 
															-            scrobble_page_data = {}
														
 
															-            playback_position_seconds = 0
														
 
															-            prev_page_stats = {}
														
 
															-
														
 
															-            pages_processed = 0
														
 
															-            total_pages = len(self.BOOK_MAP[koreader_book_id]["pages"])
														
 
															-
														
 
															-            for page_number, stats in self.BOOK_MAP[koreader_book_id][
														
 
															-                "pages"
														
 
															-            ].items():
														
 
															-                pages_processed += 1
														
 
															-                # Accumulate our page data for this scrobble
														
 
															-                scrobble_page_data[page_number] = stats
														
 
															-
														
 
															-                seconds_from_last_page = 0
														
 
															-                if prev_page_stats:
														
 
															-                    seconds_from_last_page = stats.get(
														
 
															-                        "end_ts"
														
 
															-                    ) - prev_page_stats.get("start_ts")
														
 
															-                playback_position_seconds = (
														
 
															-                    playback_position_seconds + stats.get("duration")
														
 
															-                )
														
 
															+                scrobble = Scrobble.objects.filter(
														
 
															+                    timestamp=timestamp,
														
 
															+                    book_id=book_id,
														
 
															+                    user_id=user.id,
														
 
															+                ).first()
														
 
															+                if scrobble:
														
 
															+                    logger.info(f"Found existing scrobble {scrobble}, updating")
														
 
															+                    scrobble.book_page_data = scrobble_page_data
														
 
															+                    scrobble.playback_position_seconds = scrobble.calc_reading_duration()
														
 
															+                    scrobble.save(update_fields=["book_page_data", "playback_position_seconds"])
														
 
															+                if not scrobble:
														
 
															+                    logger.info(
														
 
															+                        f"Queueing scrobble for {book_id}, page {page_number}"
														
 
															+                    )
														
 
															+                    scrobbles_to_create.append(
														
 
															+                        Scrobble(
														
 
															+                            book_id=book_id,
														
 
															+                            user_id=user.id,
														
 
															+                            source="KOReader",
														
 
															+                            media_type=Scrobble.MediaType.BOOK,
														
 
															+                            timestamp=timestamp,
														
 
															+                            played_to_completion=True,
														
 
															+                            playback_position_seconds=playback_position_seconds,
														
 
															+                            in_progress=False,
														
 
															+                            book_page_data=scrobble_page_data,
														
 
															+                            book_pages_read=page_number,
														
 
															+                            long_play_complete=False,
														
 
															+                        )
														
 
															+                    )
														
 
															+                    # Then start over
														
 
															+                    should_create_scrobble = False
														
 
															+                    playback_position_seconds = 0
														
 
															+                    scrobble_page_data = {}
														
 
															+
														
 
															+            prev_page_stats = stats
														
 
															+    return scrobbles_to_create
														
 
															+
														
 
															+
														
 
															+def fix_long_play_stats_for_scrobbles(scrobbles: list) -> None:
														
 
															+    """Given a list of scrobbles, update pages read, long play seconds and check
														
 
															+    for media completion"""
														
 
															+
														
 
															+    for scrobble in scrobbles:
														
 
															+        # But if there's a next scrobble, set pages read to their starting page
														
 
															+        #
														
 
															+        if scrobble.previous:
														
 
															+            scrobble.long_play_seconds = scrobble.playback_position_seconds + scrobble.previous.long_play_seconds
														
 
															+            scrobble.book_pages_read = scrobble.book_pages_read + scrobble.previous.book_pages_read
														
 
															+        else:
														
 
															+            scrobble.long_play_seconds = scrobble.playback_position_seconds
														
 
															+            scrobble.book_pages_read = scrobble.calc_pages_read()
														
 
															-                if (
														
 
															-                    seconds_from_last_page > SESSION_GAP_SECONDS
														
 
															-                    or pages_processed == total_pages
														
 
															-                ):
														
 
															-                    should_create_scrobble = True
														
 
															+        scrobble.save(
														
 
															+            update_fields=["book_pages_read", "long_play_seconds"]
														
 
															+        )
														
 
															-                print(
														
 
															-                    f"Seconds: {seconds_from_last_page} - {should_create_scrobble}"
														
 
															-                )
														
 
															-                if should_create_scrobble:
														
 
															-                    first_page_in_scrobble = list(scrobble_page_data.keys())[0]
														
 
															-                    timestamp = datetime.utcfromtimestamp(
														
 
															-                        int(
														
 
															-                            scrobble_page_data.get(first_page_in_scrobble).get(
														
 
															-                                "start_ts"
														
 
															-                            )
														
 
															-                        )
														
 
															-                    ).replace(tzinfo=pytz.utc)
														
 
															-
														
 
															-                    scrobble = Scrobble.objects.filter(
														
 
															-                        timestamp=timestamp,
														
 
															-                        book_id=book_id,
														
 
															-                        # user_id=self.importing_user.id,
														
 
															-                    ).first()
														
 
															-                    if not scrobble:
														
 
															-                        logger.info(
														
 
															-                            f"Queueing scrobble for {book_id}, page {page_number}"
														
 
															-                        )
														
 
															-                        scrobbles_to_create.append(
														
 
															-                            Scrobble(
														
 
															-                                book_id=book_id,
														
 
															-                                # user_id=self.importing_user.id,
														
 
															-                                source="KOReader",
														
 
															-                                media_type=Scrobble.MediaType.BOOK,
														
 
															-                                timestamp=timestamp,
														
 
															-                                played_to_completion=True,
														
 
															-                                playback_position_seconds=playback_position_seconds,
														
 
															-                                in_progress=False,
														
 
															-                                book_page_data=scrobble_page_data,
														
 
															-                                book_pages_read=page_number,
														
 
															-                                long_play_complete=False,
														
 
															-                            )
														
 
															-                        )
														
 
															-                        # Then start over
														
 
															-                        should_create_scrobble = False
														
 
															-                        playback_position_seconds = 0
														
 
															-                        scrobble_page_data = {}
														
 
															-
														
 
															-                prev_page_stats = stats
														
 
															-        return scrobbles_to_create
														
 
															-
														
 
															-    def _enrich_koreader_scrobbles(self, scrobbles: list) -> None:
														
 
															-        """Given a list of scrobbles, update pages read, long play seconds and check
														
 
															-        for media completion"""
														
 
															-
														
 
															-        for scrobble in scrobbles:
														
 
															-            # But if there's a next scrobble, set pages read to their starting page
														
 
															-            #
														
 
															-            if scrobble.next:
														
 
															-                scrobble.book_pages_read = scrobble.next.book_pages_read - 1
														
 
															-            scrobble.long_play_seconds = scrobble.book.page_set.filter(
														
 
															-                number__lte=scrobble.book_pages_read
														
 
															-            ).aggregate(Sum("duration_seconds"))["duration_seconds__sum"]
														
 
															-
														
 
															-            scrobble.save(
														
 
															-                update_fields=["book_pages_read", "long_play_seconds"]
														
 
															-            )
														
 
															+def process_koreader_sqlite_file(file_path, user_id) -> list:
														
 
															+    """Given a sqlite file from KoReader, open the book table, iterate
														
 
															+    over rows creating scrobbles from each book found"""
														
 
															+    Scrobble = apps.get_model("scrobbles", "Scrobble")
														
 
															-    def process_file(self):
														
 
															-        new_scrobbles = []
														
 
															+    new_scrobbles = []
														
 
															+    user = User.objects.filter(id=user_id).first()
														
 
															+    tz = pytz.utc
														
 
															+    if user:
														
 
															+        tz = user.profile.timezone
														
 
															+    is_os_file = "https://" not in file_path
														
 
															+    if is_os_file:
														
 
															+        con = sqlite3.connect(file_path)
														
 
															+        cur = con.cursor()
														
 
															+        book_map = build_book_map(cur.execute("SELECT * FROM book"))
														
 
															+        book_map = build_page_data(cur.execute("SELECT * from page_stat_data"), book_map, tz)
														
 
															+        new_scrobbles = build_scrobbles_from_book_map(book_map, user)
														
 
															+    else:
														
 
															         for table_name, pragma_table_info, rows in stream_sqlite(
														
 
															-            _sqlite_bytes(self.FILE_URL), max_buffer_size=1_048_576
														
 
															+            _sqlite_bytes(file_path), max_buffer_size=1_048_576
														
 
															         ):
														
 
															             logger.debug(f"Found table {table_name} - processing")
														
 
															             if table_name == "book":
														
 
															-                self.get_or_create_books(rows)
														
 
															+                book_map = build_book_map(rows)
														
 
															             if table_name == "page_stat_data":
														
 
															-                self.build_scrobbles_from_page_data(rows)
														
 
															-
														
 
															-                # new_scrobbles = build_scrobbles_from_pages(
														
 
															-                #    rows, book_id_map, user_id
														
 
															-                # )
														
 
															-                # logger.debug(f"Creating {len(new_scrobbles)} new scrobbles")
														
 
															-
														
 
															-        created = []
														
 
															-        if new_scrobbles:
														
 
															-            created = Scrobble.objects.bulk_create(new_scrobbles)
														
 
															-            self._enrich_koreader_scrobbles(created)
														
 
															-            logger.info(
														
 
															-                f"Created {len(created)} scrobbles",
														
 
															-                extra={"created_scrobbles": created},
														
 
															-            )
														
 
															-        return created
														
 
															-
														
 
															-    def process_koreader_sqlite_file(self, file_path, user_id) -> list:
														
 
															-        """Given a sqlite file from KoReader, open the book table, iterate
														
 
															-        over rows creating scrobbles from each book found"""
														
 
															-        # Create a SQL connection to our SQLite database
														
 
															-        con = sqlite3.connect(file_path)
														
 
															-        cur = con.cursor()
														
 
															-
														
 
															-        book_id_map = self.get_or_create_books(
														
 
															-            cur.execute("SELECT * FROM book")
														
 
															+                book_map = build_page_data(rows, book_map, tz)
														
 
															+                new_scrobbles = build_scrobbles_from_book_map(book_map, user)
														
 
															+
														
 
															+    logger.info(f"Creating {len(new_scrobbles)} new scrobbles")
														
 
															+    created = []
														
 
															+    if new_scrobbles:
														
 
															+        created = Scrobble.objects.bulk_create(new_scrobbles)
														
 
															+        fix_long_play_stats_for_scrobbles(created)
														
 
															+        logger.info(
														
 
															+            f"Created {len(created)} scrobbles",
														
 
															+            extra={"created_scrobbles": created},
														
 
															         )
														
 
															-        new_scrobbles = self.build_scrobbles_from_pages(
														
 
															-            cur.execute("SELECT * from page_stat_data"), book_id_map, user_id
														
 
															-        )
														
 
															-
														
 
															-        created = []
														
 
															-        if new_scrobbles:
														
 
															-            created = Scrobble.objects.bulk_create(new_scrobbles)
														
 
															-            self._enrich_koreader_scrobbles(created)
														
 
															-            logger.info(
														
 
															-                f"Created {len(created)} scrobbles",
														
 
															-                extra={"created_scrobbles": created},
														
 
															-            )
														
 
															-        return created
														
 
															-
														
 
															-    def process_koreader_sqlite(file_path: str, user_id: int) -> list:
														
 
															-        is_os_file = "https://" not in file_path
														
 
															-
														
 
															-        if is_os_file:
														
 
															-            created = process_koreader_sqlite_file(file_path, user_id)
														
 
															-        else:
														
 
															-            created = process_koreader_sqlite_url(file_path, user_id)
														
 
															-        return created
														
 
															+    return created
														
--- a/vrobbler/apps/books/tests/conftest.py
+++ b/vrobbler/apps/books/tests/conftest.py
@@ -2,11 +2,7 @@ import hashlib
 
															 import pytest
														
 
															 import random
														
 
															-from vrobbler.apps.books.koreader import (
														
 
															-    KoReaderBookColumn,
														
 
															-    KoReaderImporter,
														
 
															-    KoReaderPageStatColumn,
														
 
															-)
														
 
															+from vrobbler.apps.books.koreader import KoReaderBookColumn
														
 
															 ordinal = lambda n: "%d%s" % (
														
 
															     n,
														
@@ -113,21 +109,6 @@ class KoReaderBookRows:
 
															         self._generate_random_page_stats_rows()
														
 
															-@pytest.fixture
														
 
															-def koreader_book_row():
														
 
															-    return KoReaderBookRows(book_count=1).BOOK_ROWS[0]
														
 
															-
														
 
															-
														
 
															-@pytest.fixture
														
 
															-def koreader_book_rows():
														
 
															-    return KoReaderBookRows(book_count=4).BOOK_ROWS
														
 
															-
														
 
															-
														
 
															 @pytest.fixture
														
 
															 def koreader_rows():
														
 
															     return KoReaderBookRows(book_count=1)
														
 
															-
														
 
															-
														
 
															-@pytest.fixture
														
 
															-def koreader_rows_for_pages():
														
 
															-    return KoReaderBookRows(book_count=1)
														
--- a/vrobbler/apps/books/tests/test_koreader.py
+++ b/vrobbler/apps/books/tests/test_koreader.py
@@ -1,28 +1,26 @@
 
															 import pytest
														
 
															 from unittest import mock
														
 
															-from books.koreader import KoReaderImporter, KoReaderBookColumn
														
 
															+from books.koreader import KoReaderBookColumn, build_book_map, build_page_data, build_scrobbles_from_book_map
														
 
															 @pytest.mark.django_db
														
 
															 @mock.patch("requests.get")
														
 
															-def test_get_or_create_books(get_mock, koreader_book_rows, valid_response):
														
 
															+def test_build_book_map(get_mock, koreader_rows, valid_response):
														
 
															     get_mock.return_value = valid_response
														
 
															-    importer = KoReaderImporter("test.sqlite3", user_id=1)
														
 
															-    importer.get_or_create_books(koreader_book_rows)
														
 
															-    assert len(importer.BOOK_MAP) == 4
														
 
															+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
														
 
															+    assert len(book_map) == 1
														
 
															 @pytest.mark.django_db
														
 
															 @mock.patch("requests.get")
														
 
															 def test_load_page_data_to_map(get_mock, koreader_rows, valid_response):
														
 
															     get_mock.return_value = valid_response
														
 
															-    importer = KoReaderImporter("test.sqlite3", user_id=1)
														
 
															-    importer.get_or_create_books(koreader_rows.BOOK_ROWS)
														
 
															+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
														
 
															-    importer.load_page_data_to_map(koreader_rows.PAGE_STATS_ROWS)
														
 
															+    book_map = build_page_data(koreader_rows.PAGE_STATS_ROWS, book_map)
														
 
															     assert (
														
 
															-        len(importer.BOOK_MAP[1]["pages"])
														
 
															+        len(book_map[1]["pages"])
														
 
															         == koreader_rows.BOOK_ROWS[0][
														
 
															             KoReaderBookColumn.TOTAL_READ_PAGES.value
														
 
															         ]
														
@@ -32,15 +30,16 @@ def test_load_page_data_to_map(get_mock, koreader_rows, valid_response):
 
															 @pytest.mark.django_db
														
 
															 @mock.patch("requests.get")
														
 
															 def test_build_scrobbles_from_pages(
														
 
															-    get_mock, koreader_rows_for_pages, valid_response
														
 
															+    get_mock, koreader_rows, valid_response
														
 
															 ):
														
 
															     get_mock.return_value = valid_response
														
 
															-    importer = KoReaderImporter("test.sqlite3", user_id=1)
														
 
															-    importer.get_or_create_books(koreader_rows.BOOK_ROWS)
														
 
															-    importer.load_page_data_to_map(koreader_rows.PAGE_STATS_ROWS)
														
 
															-    scrobbles = importer.build_scrobbles_from_pages()
														
 
															+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
														
 
															+    book_map = build_page_data(koreader_rows.PAGE_STATS_ROWS, book_map)
														
 
															+
														
 
															+    scrobbles = build_scrobbles_from_book_map(book_map)
														
 
															     # Corresponds to number of sessions per book ( 20 pages per session, 120 +/- 15 pages read )
														
 
															-    assert len(scrobbles) == 6
														
 
															+    expected_scrobbles = 6 * len(book_map.keys())
														
 
															+    assert len(scrobbles) == expected_scrobbles
														
 
															     assert len(scrobbles[0].book_page_data.keys()) == 22
														
 
															     assert len(scrobbles[1].book_page_data.keys()) == 20
														
 
															     assert len(scrobbles[2].book_page_data.keys()) == 20
														
--- a/vrobbler/apps/scrobbles/models.py
+++ b/vrobbler/apps/scrobbles/models.py
@@ -5,6 +5,7 @@ from typing import Optional
 
															 from uuid import uuid4
														
 
															 from boardgames.models import BoardGame
														
 
															+from books.koreader import process_koreader_sqlite_file
														
 
															 from books.models import Book
														
 
															 from django.conf import settings
														
 
															 from django.contrib.auth import get_user_model
														
@@ -172,7 +173,6 @@ class KoReaderImport(BaseFileImportMixin):
 
															     sqlite_file = models.FileField(upload_to=get_path, **BNULL)
														
 
															     def process(self, force=False):
														
 
															-        from books.koreader import process_koreader_sqlite
														
 
															         if self.processed_finished and not force:
														
 
															             logger.info(
														
@@ -181,7 +181,7 @@ class KoReaderImport(BaseFileImportMixin):
 
															             return
														
 
															         self.mark_started()
														
 
															-        scrobbles = process_koreader_sqlite(
														
 
															+        scrobbles = process_koreader_sqlite_file(
														
 
															             self.upload_file_path, self.user.id
														
 
															         )
														
 
															         self.record_log(scrobbles)
														
@@ -741,6 +741,21 @@ class Scrobble(TimeStampedModel):
 
															         timestamp = self.timestamp.strftime("%Y-%m-%d")
														
 
															         return f"Scrobble of {self.media_obj} ({timestamp})"
														
 
															+    def calc_reading_duration(self) -> int:
														
 
															+        duration = 0
														
 
															+        if self.book_page_data:
														
 
															+            for k, v in self.book_page_data.items():
														
 
															+                duration += v.get("duration")
														
 
															+        return duration
														
 
															+
														
 
															+    def calc_pages_read(self) -> int:
														
 
															+        pages_read = 0
														
 
															+        if self.book_page_data:
														
 
															+            pages = [int(k) for k in self.book_page_data.keys()]
														
 
															+            pages.sort()
														
 
															+            pages_read = pages[-1] - pages[0]
														
 
															+        return pages_read
														
 
															+
														
 
															     @classmethod
														
 
															     def create_or_update(
														
 
															         cls, media, user_id: int, scrobble_data: dict, **kwargs
														
--- a/vrobbler/apps/scrobbles/urls.py
+++ b/vrobbler/apps/scrobbles/urls.py
@@ -9,26 +9,21 @@ urlpatterns = [
 
															         views.ManualScrobbleView.as_view(),
														
 
															         name="lookup-manual-scrobble",
														
 
															     ),
														
 
															-    path(
														
 
															-        "manual/audioscrobbler/",
														
 
															-        views.AudioScrobblerImportCreateView.as_view(),
														
 
															-        name="audioscrobbler-file-upload",
														
 
															-    ),
														
 
															-    path(
														
 
															-        "manual/koreader/",
														
 
															-        views.KoReaderImportCreateView.as_view(),
														
 
															-        name="koreader-file-upload",
														
 
															-    ),
														
 
															     path(
														
 
															         "long-play-finish/<slug:uuid>/",
														
 
															         views.scrobble_longplay_finish,
														
 
															         name="longplay-finish",
														
 
															     ),
														
 
															     path(
														
 
															-        "upload/",
														
 
															+        "upload/audioscrobbler/",
														
 
															         views.AudioScrobblerImportCreateView.as_view(),
														
 
															         name="audioscrobbler-file-upload",
														
 
															     ),
														
 
															+    path(
														
 
															+        "upload/koreader/",
														
 
															+        views.KoReaderImportCreateView.as_view(),
														
 
															+        name="koreader-file-upload",
														
 
															+    ),
														
 
															     path(
														
 
															         "lastfm-import/",
														
 
															         views.lastfm_import,