il y a 1 an · 4ae13b3a1a
--- a/vrobbler/apps/books/koreader.py
+++ b/vrobbler/apps/books/koreader.py
@@ -5,16 +5,16 @@ import re
 
				 import sqlite3
			
 
				 from datetime import datetime, timedelta
			
 
				 from enum import Enum
			
 
				-from typing import Iterable, List
			
 
				+from typing import Iterable, List, Optional
			
 
				 
			
 
				 import pytz
			
 
				 import requests
			
 
				 from books.models import Author, Book, Page
			
 
				 from books.openlibrary import get_author_openlibrary_id
			
 
				+from django.apps import apps
			
 
				 from django.contrib.auth import get_user_model
			
 
				 from django.db.models import Sum
			
 
				 from pylast import httpx, tempfile
			
 
				-from scrobbles.models import Scrobble
			
 
				 from scrobbles.utils import timestamp_user_tz_to_utc
			
 
				 from stream_sqlite import stream_sqlite
			
 
				 
			
@@ -51,307 +51,277 @@ def _sqlite_bytes(sqlite_url):
 
				 
			
 
				 
			
 
				 # Grace period between page reads for it to be a new scrobble
			
 
				-SESSION_GAP_SECONDS = 3600  # one hour
			
 
				-
			
 
				-
			
 
				-class KoReaderImporter:
			
 
				-    # Maps a KoReader book ID to the Book.id and total read time of the book in Django
			
 
				-    # Example:
			
 
				-    # {"KOREADER_DB_ID": {
			
 
				-    #     "book_id": <int>,
			
 
				-    #     "total_seconds": <int>,
			
 
				-    #     "pages": {
			
 
				-    #         <int>: {
			
 
				-    #             "start_ts": <TIMESTAMP>,
			
 
				-    #             "end_ts": <TIMESTAMP>,
			
 
				-    #             "duration": <int>
			
 
				-    #         }
			
 
				-    #     }
			
 
				-    # }
			
 
				-    BOOK_MAP = dict()
			
 
				-    SQLITE_FILE_URL = str
			
 
				-    USER_ID = int
			
 
				-
			
 
				-    def __init__(self, sqlite_file_url: str, user_id: int):
			
 
				-        # Map KoReader book IDs to
			
 
				-        self.SQLITE_FILE_URL = sqlite_file_url
			
 
				-        self.USER_ID = user_id
			
 
				-        self.importing_user = User.objects.filter(id=user_id).first()
			
 
				-
			
 
				-    def _get_author_str_from_row(self, row):
			
 
				-        """Given a the raw author string from KoReader, convert it to a single line and
			
 
				-        strip the middle initials, as OpenLibrary lookup usually fails with those.
			
 
				-        """
			
 
				-        ko_authors = row[KoReaderBookColumn.AUTHORS.value].replace("\n", ", ")
			
 
				-        # Strip middle initials, OpenLibrary often fails with these
			
 
				-        return re.sub(" [A-Z]. ", " ", ko_authors)
			
 
				-
			
 
				-    def _lookup_or_create_authors_from_author_str(
			
 
				-        self, ko_author_str: str
			
 
				-    ) -> list:
			
 
				-        author_str_list = ko_author_str.split(", ")
			
 
				-        author_list = []
			
 
				-        for author_str in author_str_list:
			
 
				-            logger.debug(f"Looking up author {author_str}")
			
 
				-            # KoReader gave us nothing, bail
			
 
				-            if author_str == "N/A":
			
 
				-                logger.warn(
			
 
				-                    f"KoReader author string is N/A, no authors to find"
			
 
				-                )
			
 
				-                continue
			
 
				+SESSION_GAP_SECONDS = 1800  #  a half hour
			
 
				+
			
 
				+
			
 
				+def get_author_str_from_row(row):
			
 
				+    """Given a the raw author string from KoReader, convert it to a single line and
			
 
				+    strip the middle initials, as OpenLibrary lookup usually fails with those.
			
 
				+    """
			
 
				+    ko_authors = row[KoReaderBookColumn.AUTHORS.value].replace("\n", ", ")
			
 
				+    # Strip middle initials, OpenLibrary often fails with these
			
 
				+    return re.sub(" [A-Z]. ", " ", ko_authors)
			
 
				+
			
 
				+def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list:
			
 
				+    """Takes a string of authors from KoReader and returns a list
			
 
				+    of Authors from our database
			
 
				+    """
			
 
				+    author_str_list = ko_author_str.split(", ")
			
 
				+    author_list = []
			
 
				+    for author_str in author_str_list:
			
 
				+        logger.debug(f"Looking up author {author_str}")
			
 
				+        # KoReader gave us nothing, bail
			
 
				+        if author_str == "N/A":
			
 
				+            logger.warn(
			
 
				+                f"KoReader author string is N/A, no authors to find"
			
 
				+            )
			
 
				+            continue
			
 
				 
			
 
				-            author = Author.objects.filter(name=author_str).first()
			
 
				-            if not author:
			
 
				-                author = Author.objects.create(
			
 
				-                    name=author_str,
			
 
				-                    openlibrary_id=get_author_openlibrary_id(author_str),
			
 
				-                )
			
 
				-                author.fix_metadata()
			
 
				-                logger.debug(f"Created author {author}")
			
 
				-            author_list.append(author)
			
 
				-        return author_list
			
 
				-
			
 
				-    def get_or_create_books(self, rows):
			
 
				-        """Given an interable of sqlite rows from the books table, lookup existing
			
 
				-        books, create ones that don't exist, and return a mapping of koreader IDs to
			
 
				-        primary key IDs for page creation.
			
 
				-
			
 
				-        """
			
 
				-        book_id_map = {}
			
 
				-
			
 
				-        for book_row in rows:
			
 
				-            book = Book.objects.filter(
			
 
				-                koreader_md5=book_row[KoReaderBookColumn.MD5.value]
			
 
				-            ).first()
			
 
				-
			
 
				-            if not book:
			
 
				-                # No KoReader book yet, create it
			
 
				-                author_str = self._get_author_str_from_row(book_row)
			
 
				-                total_pages = book_row[KoReaderBookColumn.PAGES.value]
			
 
				-                run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
			
 
				-
			
 
				-                book = Book.objects.create(
			
 
				-                    koreader_md5=book_row[KoReaderBookColumn.MD5.value],
			
 
				-                    title=book_row[KoReaderBookColumn.TITLE.value],
			
 
				-                    koreader_id=book_row[KoReaderBookColumn.ID.value],
			
 
				-                    koreader_authors=author_str,
			
 
				-                    pages=total_pages,
			
 
				-                    run_time_seconds=run_time,
			
 
				-                )
			
 
				-                book.fix_metadata()
			
 
				+        author = Author.objects.filter(name=author_str).first()
			
 
				+        if not author:
			
 
				+            author = Author.objects.create(
			
 
				+                name=author_str,
			
 
				+                openlibrary_id=get_author_openlibrary_id(author_str),
			
 
				+            )
			
 
				+            author.fix_metadata()
			
 
				+            logger.debug(f"Created author {author}")
			
 
				+        author_list.append(author)
			
 
				+    return author_list
			
 
				+
			
 
				+
			
 
				+def create_book_from_row(row: list):
			
 
				+    # No KoReader book yet, create it
			
 
				+    author_str = get_author_str_from_row(row)
			
 
				+    total_pages = row[KoReaderBookColumn.PAGES.value]
			
 
				+    run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
			
 
				+
			
 
				+    book = Book.objects.create(
			
 
				+        koreader_md5=row[KoReaderBookColumn.MD5.value],
			
 
				+        title=row[KoReaderBookColumn.TITLE.value],
			
 
				+        koreader_id=row[KoReaderBookColumn.ID.value],
			
 
				+        koreader_authors=author_str,
			
 
				+        pages=total_pages,
			
 
				+        run_time_seconds=run_time,
			
 
				+    )
			
 
				+    book.fix_metadata()
			
 
				+
			
 
				+    # Add authors
			
 
				+    author_list = lookup_or_create_authors_from_author_str(
			
 
				+        author_str
			
 
				+    )
			
 
				+    if author_list:
			
 
				+        book.authors.add(*author_list)
			
 
				+
			
 
				+    # self._lookup_authors
			
 
				+    return book
			
 
				+
			
 
				+def build_book_map(rows) -> dict:
			
 
				+    """Given an interable of sqlite rows from the books table, lookup existing
			
 
				+    books, create ones that don't exist, and return a mapping of koreader IDs to
			
 
				+    primary key IDs for page creation.
			
 
				+
			
 
				+    """
			
 
				+    book_id_map = {}
			
 
				+
			
 
				+    for book_row in rows:
			
 
				+        book = Book.objects.filter(
			
 
				+            koreader_md5=book_row[KoReaderBookColumn.MD5.value]
			
 
				+        ).first()
			
 
				+
			
 
				+        if not book:
			
 
				+            book = create_book_from_row(book_row)
			
 
				+
			
 
				+        book.refresh_from_db()
			
 
				+        total_seconds = 0
			
 
				+        if book_row[KoReaderBookColumn.TOTAL_READ_TIME.value]:
			
 
				+            total_seconds = book_row[
			
 
				+                KoReaderBookColumn.TOTAL_READ_TIME.value
			
 
				+            ]
			
 
				+
			
 
				+        book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
			
 
				+            "book_id": book.id,
			
 
				+            "total_seconds": total_seconds,
			
 
				+        }
			
 
				+    return book_id_map
			
 
				+
			
 
				+def build_page_data(page_rows: list, book_map: dict, user_tz = None) -> dict:
			
 
				+    """Given rows of page data from KoReader, parse each row and build
			
 
				+    scrobbles for our user, loading the page data into the page_data
			
 
				+    field on the scrobble instance.
			
 
				+    """
			
 
				+    for page_row in page_rows:
			
 
				+        koreader_book_id = page_row[KoReaderPageStatColumn.ID_BOOK.value]
			
 
				+        if "pages" not in book_map[koreader_book_id].keys():
			
 
				+            book_map[koreader_book_id]["pages"] = {}
			
 
				+
			
 
				+        if koreader_book_id not in book_map.keys():
			
 
				+            logger.warn(
			
 
				+                f"Found a page without a corresponding book ID ({koreader_book_id}) in KoReader DB",
			
 
				+                {"page_row": page_row},
			
 
				+            )
			
 
				+            continue
			
 
				+
			
 
				+        page_number = page_row[KoReaderPageStatColumn.PAGE.value]
			
 
				+        duration = page_row[KoReaderPageStatColumn.DURATION.value]
			
 
				+        start_ts = page_row[KoReaderPageStatColumn.START_TIME.value]
			
 
				+        if user_tz:
			
 
				+            start_ts = timestamp_user_tz_to_utc(
			
 
				+                page_row[KoReaderPageStatColumn.START_TIME.value],
			
 
				+                pytz.timezone(user_tz),
			
 
				+            ).timestamp()
			
 
				+        else:
			
 
				+            logger.warning(f"Page data built with out user timezone, defaulting to UTC")
			
 
				+
			
 
				+        book_map[koreader_book_id]["pages"][page_number] = {
			
 
				+            "duration": duration,
			
 
				+            "start_ts": start_ts,
			
 
				+            "end_ts": start_ts + duration,
			
 
				+        }
			
 
				+    return book_map
			
 
				+
			
 
				+def build_scrobbles_from_book_map(book_map: dict, user: "User") -> list["Scrobble"]:
			
 
				+    Scrobble = apps.get_model("scrobbles", "Scrobble")
			
 
				+
			
 
				+    scrobbles_to_create = []
			
 
				+
			
 
				+    for koreader_book_id, book_dict in book_map.items():
			
 
				+        book_id = book_dict["book_id"]
			
 
				+        if "pages" not in book_dict.keys():
			
 
				+            logger.warn(f"No page data found in book map for {book_id}")
			
 
				+            continue
			
 
				+
			
 
				+        should_create_scrobble = False
			
 
				+        scrobble_page_data = {}
			
 
				+        playback_position_seconds = 0
			
 
				+        prev_page_stats = {}
			
 
				+
			
 
				+        pages_processed = 0
			
 
				+        total_pages = len(book_map[koreader_book_id]["pages"])
			
 
				+
			
 
				+        for page_number, stats in book_map[koreader_book_id][
			
 
				+            "pages"
			
 
				+        ].items():
			
 
				+            pages_processed += 1
			
 
				+            # Accumulate our page data for this scrobble
			
 
				+            scrobble_page_data[page_number] = stats
			
 
				+
			
 
				+            seconds_from_last_page = 0
			
 
				+            if prev_page_stats:
			
 
				+                seconds_from_last_page = stats.get(
			
 
				+                    "end_ts"
			
 
				+                ) - prev_page_stats.get("start_ts")
			
 
				+            playback_position_seconds = (
			
 
				+                playback_position_seconds + stats.get("duration")
			
 
				+            )
			
 
				 
			
 
				-                # Add authors
			
 
				-                author_list = self._lookup_or_create_authors_from_author_str(
			
 
				-                    author_str
			
 
				-                )
			
 
				-                if author_list:
			
 
				-                    book.authors.add(*author_list)
			
 
				-
			
 
				-                # self._lookup_authors
			
 
				-
			
 
				-            book.refresh_from_db()
			
 
				-            total_seconds = 0
			
 
				-            if book_row[KoReaderBookColumn.TOTAL_READ_TIME.value]:
			
 
				-                total_seconds = book_row[
			
 
				-                    KoReaderBookColumn.TOTAL_READ_TIME.value
			
 
				-                ]
			
 
				-
			
 
				-            book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
			
 
				-                "book_id": book.id,
			
 
				-                "total_seconds": total_seconds,
			
 
				-            }
			
 
				-        self.BOOK_MAP = book_id_map
			
 
				-
			
 
				-    def load_page_data_to_map(self, rows: Iterable) -> List[Scrobble]:
			
 
				-        """Given rows of page data from KoReader, parse each row and build
			
 
				-        scrobbles for our user, loading the page data into the page_data
			
 
				-        field on the scrobble instance.
			
 
				-        """
			
 
				-        for page_row in rows:
			
 
				-            koreader_book_id = page_row[KoReaderPageStatColumn.ID_BOOK.value]
			
 
				-            if "pages" not in self.BOOK_MAP[koreader_book_id].keys():
			
 
				-                self.BOOK_MAP[koreader_book_id]["pages"] = {}
			
 
				-
			
 
				-            if koreader_book_id not in self.BOOK_MAP.keys():
			
 
				-                logger.warn(
			
 
				-                    f"Found a page without a corresponding book ID ({koreader_book_id}) in KoReader DB",
			
 
				-                    {"page_row": page_row},
			
 
				-                )
			
 
				-                continue
			
 
				-
			
 
				-            page_number = page_row[KoReaderPageStatColumn.PAGE.value]
			
 
				-            duration = page_row[KoReaderPageStatColumn.DURATION.value]
			
 
				-            start_ts = page_row[KoReaderPageStatColumn.START_TIME.value]
			
 
				-            if self.importing_user:
			
 
				-                start_ts = timestamp_user_tz_to_utc(
			
 
				-                    page_row[KoReaderPageStatColumn.START_TIME.value],
			
 
				-                    self.importing_user.timezone,
			
 
				+            end_of_reading = pages_processed == total_pages
			
 
				+            if (
			
 
				+                seconds_from_last_page > SESSION_GAP_SECONDS
			
 
				+                or end_of_reading
			
 
				+            ):
			
 
				+                should_create_scrobble = True
			
 
				+
			
 
				+            logger.info(f"Book {koreader_book_id} - {page_number} {seconds_from_last_page} read seconds")
			
 
				+            if should_create_scrobble:
			
 
				+                first_page_in_scrobble = list(scrobble_page_data.keys())[0]
			
 
				+                timestamp = timestamp_user_tz_to_utc(
			
 
				+                    int(scrobble_page_data.get(first_page_in_scrobble).get("start_ts")),
			
 
				+                    pytz.timezone(user.profile.timezone),
			
 
				                 )
			
 
				 
			
 
				-            self.BOOK_MAP[koreader_book_id]["pages"][page_number] = {
			
 
				-                "duration": duration,
			
 
				-                "start_ts": start_ts,
			
 
				-                "end_ts": start_ts + duration,
			
 
				-            }
			
 
				-
			
 
				-    def build_scrobbles_from_pages(self) -> List[Scrobble]:
			
 
				-        scrobbles_to_create = []
			
 
				-
			
 
				-        for koreader_book_id, book_dict in self.BOOK_MAP.items():
			
 
				-            book_id = book_dict["book_id"]
			
 
				-            if "pages" not in book_dict.keys():
			
 
				-                logger.warn(f"No page data found in book map for {book_id}")
			
 
				-                continue
			
 
				-
			
 
				-            should_create_scrobble = False
			
 
				-            scrobble_page_data = {}
			
 
				-            playback_position_seconds = 0
			
 
				-            prev_page_stats = {}
			
 
				-
			
 
				-            pages_processed = 0
			
 
				-            total_pages = len(self.BOOK_MAP[koreader_book_id]["pages"])
			
 
				-
			
 
				-            for page_number, stats in self.BOOK_MAP[koreader_book_id][
			
 
				-                "pages"
			
 
				-            ].items():
			
 
				-                pages_processed += 1
			
 
				-                # Accumulate our page data for this scrobble
			
 
				-                scrobble_page_data[page_number] = stats
			
 
				-
			
 
				-                seconds_from_last_page = 0
			
 
				-                if prev_page_stats:
			
 
				-                    seconds_from_last_page = stats.get(
			
 
				-                        "end_ts"
			
 
				-                    ) - prev_page_stats.get("start_ts")
			
 
				-                playback_position_seconds = (
			
 
				-                    playback_position_seconds + stats.get("duration")
			
 
				-                )
			
 
				+                scrobble = Scrobble.objects.filter(
			
 
				+                    timestamp=timestamp,
			
 
				+                    book_id=book_id,
			
 
				+                    user_id=user.id,
			
 
				+                ).first()
			
 
				+                if scrobble:
			
 
				+                    logger.info(f"Found existing scrobble {scrobble}, updating")
			
 
				+                    scrobble.book_page_data = scrobble_page_data
			
 
				+                    scrobble.playback_position_seconds = scrobble.calc_reading_duration()
			
 
				+                    scrobble.save(update_fields=["book_page_data", "playback_position_seconds"])
			
 
				+                if not scrobble:
			
 
				+                    logger.info(
			
 
				+                        f"Queueing scrobble for {book_id}, page {page_number}"
			
 
				+                    )
			
 
				+                    scrobbles_to_create.append(
			
 
				+                        Scrobble(
			
 
				+                            book_id=book_id,
			
 
				+                            user_id=user.id,
			
 
				+                            source="KOReader",
			
 
				+                            media_type=Scrobble.MediaType.BOOK,
			
 
				+                            timestamp=timestamp,
			
 
				+                            played_to_completion=True,
			
 
				+                            playback_position_seconds=playback_position_seconds,
			
 
				+                            in_progress=False,
			
 
				+                            book_page_data=scrobble_page_data,
			
 
				+                            book_pages_read=page_number,
			
 
				+                            long_play_complete=False,
			
 
				+                        )
			
 
				+                    )
			
 
				+                    # Then start over
			
 
				+                    should_create_scrobble = False
			
 
				+                    playback_position_seconds = 0
			
 
				+                    scrobble_page_data = {}
			
 
				+
			
 
				+            prev_page_stats = stats
			
 
				+    return scrobbles_to_create
			
 
				+
			
 
				+
			
 
				+def fix_long_play_stats_for_scrobbles(scrobbles: list) -> None:
			
 
				+    """Given a list of scrobbles, update pages read, long play seconds and check
			
 
				+    for media completion"""
			
 
				+
			
 
				+    for scrobble in scrobbles:
			
 
				+        # But if there's a next scrobble, set pages read to their starting page
			
 
				+        #
			
 
				+        if scrobble.previous:
			
 
				+            scrobble.long_play_seconds = scrobble.playback_position_seconds + scrobble.previous.long_play_seconds
			
 
				+            scrobble.book_pages_read = scrobble.book_pages_read + scrobble.previous.book_pages_read
			
 
				+        else:
			
 
				+            scrobble.long_play_seconds = scrobble.playback_position_seconds
			
 
				+            scrobble.book_pages_read = scrobble.calc_pages_read()
			
 
				 
			
 
				-                if (
			
 
				-                    seconds_from_last_page > SESSION_GAP_SECONDS
			
 
				-                    or pages_processed == total_pages
			
 
				-                ):
			
 
				-                    should_create_scrobble = True
			
 
				+        scrobble.save(
			
 
				+            update_fields=["book_pages_read", "long_play_seconds"]
			
 
				+        )
			
 
				 
			
 
				-                print(
			
 
				-                    f"Seconds: {seconds_from_last_page} - {should_create_scrobble}"
			
 
				-                )
			
 
				-                if should_create_scrobble:
			
 
				-                    first_page_in_scrobble = list(scrobble_page_data.keys())[0]
			
 
				-                    timestamp = datetime.utcfromtimestamp(
			
 
				-                        int(
			
 
				-                            scrobble_page_data.get(first_page_in_scrobble).get(
			
 
				-                                "start_ts"
			
 
				-                            )
			
 
				-                        )
			
 
				-                    ).replace(tzinfo=pytz.utc)
			
 
				-
			
 
				-                    scrobble = Scrobble.objects.filter(
			
 
				-                        timestamp=timestamp,
			
 
				-                        book_id=book_id,
			
 
				-                        # user_id=self.importing_user.id,
			
 
				-                    ).first()
			
 
				-                    if not scrobble:
			
 
				-                        logger.info(
			
 
				-                            f"Queueing scrobble for {book_id}, page {page_number}"
			
 
				-                        )
			
 
				-                        scrobbles_to_create.append(
			
 
				-                            Scrobble(
			
 
				-                                book_id=book_id,
			
 
				-                                # user_id=self.importing_user.id,
			
 
				-                                source="KOReader",
			
 
				-                                media_type=Scrobble.MediaType.BOOK,
			
 
				-                                timestamp=timestamp,
			
 
				-                                played_to_completion=True,
			
 
				-                                playback_position_seconds=playback_position_seconds,
			
 
				-                                in_progress=False,
			
 
				-                                book_page_data=scrobble_page_data,
			
 
				-                                book_pages_read=page_number,
			
 
				-                                long_play_complete=False,
			
 
				-                            )
			
 
				-                        )
			
 
				-                        # Then start over
			
 
				-                        should_create_scrobble = False
			
 
				-                        playback_position_seconds = 0
			
 
				-                        scrobble_page_data = {}
			
 
				-
			
 
				-                prev_page_stats = stats
			
 
				-        return scrobbles_to_create
			
 
				-
			
 
				-    def _enrich_koreader_scrobbles(self, scrobbles: list) -> None:
			
 
				-        """Given a list of scrobbles, update pages read, long play seconds and check
			
 
				-        for media completion"""
			
 
				-
			
 
				-        for scrobble in scrobbles:
			
 
				-            # But if there's a next scrobble, set pages read to their starting page
			
 
				-            #
			
 
				-            if scrobble.next:
			
 
				-                scrobble.book_pages_read = scrobble.next.book_pages_read - 1
			
 
				-            scrobble.long_play_seconds = scrobble.book.page_set.filter(
			
 
				-                number__lte=scrobble.book_pages_read
			
 
				-            ).aggregate(Sum("duration_seconds"))["duration_seconds__sum"]
			
 
				-
			
 
				-            scrobble.save(
			
 
				-                update_fields=["book_pages_read", "long_play_seconds"]
			
 
				-            )
			
 
				+def process_koreader_sqlite_file(file_path, user_id) -> list:
			
 
				+    """Given a sqlite file from KoReader, open the book table, iterate
			
 
				+    over rows creating scrobbles from each book found"""
			
 
				+    Scrobble = apps.get_model("scrobbles", "Scrobble")
			
 
				 
			
 
				-    def process_file(self):
			
 
				-        new_scrobbles = []
			
 
				+    new_scrobbles = []
			
 
				+    user = User.objects.filter(id=user_id).first()
			
 
				+    tz = pytz.utc
			
 
				+    if user:
			
 
				+        tz = user.profile.timezone
			
 
				 
			
 
				+    is_os_file = "https://" not in file_path
			
 
				+    if is_os_file:
			
 
				+        con = sqlite3.connect(file_path)
			
 
				+        cur = con.cursor()
			
 
				+        book_map = build_book_map(cur.execute("SELECT * FROM book"))
			
 
				+        book_map = build_page_data(cur.execute("SELECT * from page_stat_data"), book_map, tz)
			
 
				+        new_scrobbles = build_scrobbles_from_book_map(book_map, user)
			
 
				+    else:
			
 
				         for table_name, pragma_table_info, rows in stream_sqlite(
			
 
				-            _sqlite_bytes(self.FILE_URL), max_buffer_size=1_048_576
			
 
				+            _sqlite_bytes(file_path), max_buffer_size=1_048_576
			
 
				         ):
			
 
				             logger.debug(f"Found table {table_name} - processing")
			
 
				             if table_name == "book":
			
 
				-                self.get_or_create_books(rows)
			
 
				+                book_map = build_book_map(rows)
			
 
				 
			
 
				             if table_name == "page_stat_data":
			
 
				-                self.build_scrobbles_from_page_data(rows)
			
 
				-
			
 
				-                # new_scrobbles = build_scrobbles_from_pages(
			
 
				-                #    rows, book_id_map, user_id
			
 
				-                # )
			
 
				-                # logger.debug(f"Creating {len(new_scrobbles)} new scrobbles")
			
 
				-
			
 
				-        created = []
			
 
				-        if new_scrobbles:
			
 
				-            created = Scrobble.objects.bulk_create(new_scrobbles)
			
 
				-            self._enrich_koreader_scrobbles(created)
			
 
				-            logger.info(
			
 
				-                f"Created {len(created)} scrobbles",
			
 
				-                extra={"created_scrobbles": created},
			
 
				-            )
			
 
				-        return created
			
 
				-
			
 
				-    def process_koreader_sqlite_file(self, file_path, user_id) -> list:
			
 
				-        """Given a sqlite file from KoReader, open the book table, iterate
			
 
				-        over rows creating scrobbles from each book found"""
			
 
				-        # Create a SQL connection to our SQLite database
			
 
				-        con = sqlite3.connect(file_path)
			
 
				-        cur = con.cursor()
			
 
				-
			
 
				-        book_id_map = self.get_or_create_books(
			
 
				-            cur.execute("SELECT * FROM book")
			
 
				+                book_map = build_page_data(rows, book_map, tz)
			
 
				+                new_scrobbles = build_scrobbles_from_book_map(book_map, user)
			
 
				+
			
 
				+    logger.info(f"Creating {len(new_scrobbles)} new scrobbles")
			
 
				+    created = []
			
 
				+    if new_scrobbles:
			
 
				+        created = Scrobble.objects.bulk_create(new_scrobbles)
			
 
				+        fix_long_play_stats_for_scrobbles(created)
			
 
				+        logger.info(
			
 
				+            f"Created {len(created)} scrobbles",
			
 
				+            extra={"created_scrobbles": created},
			
 
				         )
			
 
				-        new_scrobbles = self.build_scrobbles_from_pages(
			
 
				-            cur.execute("SELECT * from page_stat_data"), book_id_map, user_id
			
 
				-        )
			
 
				-
			
 
				-        created = []
			
 
				-        if new_scrobbles:
			
 
				-            created = Scrobble.objects.bulk_create(new_scrobbles)
			
 
				-            self._enrich_koreader_scrobbles(created)
			
 
				-            logger.info(
			
 
				-                f"Created {len(created)} scrobbles",
			
 
				-                extra={"created_scrobbles": created},
			
 
				-            )
			
 
				-        return created
			
 
				-
			
 
				-    def process_koreader_sqlite(file_path: str, user_id: int) -> list:
			
 
				-        is_os_file = "https://" not in file_path
			
 
				-
			
 
				-        if is_os_file:
			
 
				-            created = process_koreader_sqlite_file(file_path, user_id)
			
 
				-        else:
			
 
				-            created = process_koreader_sqlite_url(file_path, user_id)
			
 
				-        return created
			
 
				+    return created
			
--- a/vrobbler/apps/books/tests/conftest.py
+++ b/vrobbler/apps/books/tests/conftest.py
@@ -2,11 +2,7 @@ import hashlib
 
				 import pytest
			
 
				 import random
			
 
				 
			
 
				-from vrobbler.apps.books.koreader import (
			
 
				-    KoReaderBookColumn,
			
 
				-    KoReaderImporter,
			
 
				-    KoReaderPageStatColumn,
			
 
				-)
			
 
				+from vrobbler.apps.books.koreader import KoReaderBookColumn
			
 
				 
			
 
				 ordinal = lambda n: "%d%s" % (
			
 
				     n,
			
@@ -113,21 +109,6 @@ class KoReaderBookRows:
 
				         self._generate_random_page_stats_rows()
			
 
				 
			
 
				 
			
 
				-@pytest.fixture
			
 
				-def koreader_book_row():
			
 
				-    return KoReaderBookRows(book_count=1).BOOK_ROWS[0]
			
 
				-
			
 
				-
			
 
				-@pytest.fixture
			
 
				-def koreader_book_rows():
			
 
				-    return KoReaderBookRows(book_count=4).BOOK_ROWS
			
 
				-
			
 
				-
			
 
				 @pytest.fixture
			
 
				 def koreader_rows():
			
 
				     return KoReaderBookRows(book_count=1)
			
 
				-
			
 
				-
			
 
				-@pytest.fixture
			
 
				-def koreader_rows_for_pages():
			
 
				-    return KoReaderBookRows(book_count=1)
			
--- a/vrobbler/apps/books/tests/test_koreader.py
+++ b/vrobbler/apps/books/tests/test_koreader.py
@@ -1,28 +1,26 @@
 
				 import pytest
			
 
				 from unittest import mock
			
 
				 
			
 
				-from books.koreader import KoReaderImporter, KoReaderBookColumn
			
 
				+from books.koreader import KoReaderBookColumn, build_book_map, build_page_data, build_scrobbles_from_book_map
			
 
				 
			
 
				 
			
 
				 @pytest.mark.django_db
			
 
				 @mock.patch("requests.get")
			
 
				-def test_get_or_create_books(get_mock, koreader_book_rows, valid_response):
			
 
				+def test_build_book_map(get_mock, koreader_rows, valid_response):
			
 
				     get_mock.return_value = valid_response
			
 
				-    importer = KoReaderImporter("test.sqlite3", user_id=1)
			
 
				-    importer.get_or_create_books(koreader_book_rows)
			
 
				-    assert len(importer.BOOK_MAP) == 4
			
 
				+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
			
 
				+    assert len(book_map) == 1
			
 
				 
			
 
				 
			
 
				 @pytest.mark.django_db
			
 
				 @mock.patch("requests.get")
			
 
				 def test_load_page_data_to_map(get_mock, koreader_rows, valid_response):
			
 
				     get_mock.return_value = valid_response
			
 
				-    importer = KoReaderImporter("test.sqlite3", user_id=1)
			
 
				-    importer.get_or_create_books(koreader_rows.BOOK_ROWS)
			
 
				+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
			
 
				 
			
 
				-    importer.load_page_data_to_map(koreader_rows.PAGE_STATS_ROWS)
			
 
				+    book_map = build_page_data(koreader_rows.PAGE_STATS_ROWS, book_map)
			
 
				     assert (
			
 
				-        len(importer.BOOK_MAP[1]["pages"])
			
 
				+        len(book_map[1]["pages"])
			
 
				         == koreader_rows.BOOK_ROWS[0][
			
 
				             KoReaderBookColumn.TOTAL_READ_PAGES.value
			
 
				         ]
			
@@ -32,15 +30,16 @@ def test_load_page_data_to_map(get_mock, koreader_rows, valid_response):
 
				 @pytest.mark.django_db
			
 
				 @mock.patch("requests.get")
			
 
				 def test_build_scrobbles_from_pages(
			
 
				-    get_mock, koreader_rows_for_pages, valid_response
			
 
				+    get_mock, koreader_rows, valid_response
			
 
				 ):
			
 
				     get_mock.return_value = valid_response
			
 
				-    importer = KoReaderImporter("test.sqlite3", user_id=1)
			
 
				-    importer.get_or_create_books(koreader_rows.BOOK_ROWS)
			
 
				-    importer.load_page_data_to_map(koreader_rows.PAGE_STATS_ROWS)
			
 
				-    scrobbles = importer.build_scrobbles_from_pages()
			
 
				+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
			
 
				+    book_map = build_page_data(koreader_rows.PAGE_STATS_ROWS, book_map)
			
 
				+
			
 
				+    scrobbles = build_scrobbles_from_book_map(book_map)
			
 
				     # Corresponds to number of sessions per book ( 20 pages per session, 120 +/- 15 pages read )
			
 
				-    assert len(scrobbles) == 6
			
 
				+    expected_scrobbles = 6 * len(book_map.keys())
			
 
				+    assert len(scrobbles) == expected_scrobbles
			
 
				     assert len(scrobbles[0].book_page_data.keys()) == 22
			
 
				     assert len(scrobbles[1].book_page_data.keys()) == 20
			
 
				     assert len(scrobbles[2].book_page_data.keys()) == 20
			
--- a/vrobbler/apps/scrobbles/models.py
+++ b/vrobbler/apps/scrobbles/models.py
@@ -5,6 +5,7 @@ from typing import Optional
 
				 from uuid import uuid4
			
 
				 
			
 
				 from boardgames.models import BoardGame
			
 
				+from books.koreader import process_koreader_sqlite_file
			
 
				 from books.models import Book
			
 
				 from django.conf import settings
			
 
				 from django.contrib.auth import get_user_model
			
@@ -172,7 +173,6 @@ class KoReaderImport(BaseFileImportMixin):
 
				     sqlite_file = models.FileField(upload_to=get_path, **BNULL)
			
 
				 
			
 
				     def process(self, force=False):
			
 
				-        from books.koreader import process_koreader_sqlite
			
 
				 
			
 
				         if self.processed_finished and not force:
			
 
				             logger.info(
			
@@ -181,7 +181,7 @@ class KoReaderImport(BaseFileImportMixin):
 
				             return
			
 
				 
			
 
				         self.mark_started()
			
 
				-        scrobbles = process_koreader_sqlite(
			
 
				+        scrobbles = process_koreader_sqlite_file(
			
 
				             self.upload_file_path, self.user.id
			
 
				         )
			
 
				         self.record_log(scrobbles)
			
@@ -741,6 +741,21 @@ class Scrobble(TimeStampedModel):
 
				         timestamp = self.timestamp.strftime("%Y-%m-%d")
			
 
				         return f"Scrobble of {self.media_obj} ({timestamp})"
			
 
				 
			
 
				+    def calc_reading_duration(self) -> int:
			
 
				+        duration = 0
			
 
				+        if self.book_page_data:
			
 
				+            for k, v in self.book_page_data.items():
			
 
				+                duration += v.get("duration")
			
 
				+        return duration
			
 
				+
			
 
				+    def calc_pages_read(self) -> int:
			
 
				+        pages_read = 0
			
 
				+        if self.book_page_data:
			
 
				+            pages = [int(k) for k in self.book_page_data.keys()]
			
 
				+            pages.sort()
			
 
				+            pages_read = pages[-1] - pages[0]
			
 
				+        return pages_read
			
 
				+
			
 
				     @classmethod
			
 
				     def create_or_update(
			
 
				         cls, media, user_id: int, scrobble_data: dict, **kwargs
			
--- a/vrobbler/apps/scrobbles/urls.py
+++ b/vrobbler/apps/scrobbles/urls.py
@@ -9,26 +9,21 @@ urlpatterns = [
 
				         views.ManualScrobbleView.as_view(),
			
 
				         name="lookup-manual-scrobble",
			
 
				     ),
			
 
				-    path(
			
 
				-        "manual/audioscrobbler/",
			
 
				-        views.AudioScrobblerImportCreateView.as_view(),
			
 
				-        name="audioscrobbler-file-upload",
			
 
				-    ),
			
 
				-    path(
			
 
				-        "manual/koreader/",
			
 
				-        views.KoReaderImportCreateView.as_view(),
			
 
				-        name="koreader-file-upload",
			
 
				-    ),
			
 
				     path(
			
 
				         "long-play-finish/<slug:uuid>/",
			
 
				         views.scrobble_longplay_finish,
			
 
				         name="longplay-finish",
			
 
				     ),
			
 
				     path(
			
 
				-        "upload/",
			
 
				+        "upload/audioscrobbler/",
			
 
				         views.AudioScrobblerImportCreateView.as_view(),
			
 
				         name="audioscrobbler-file-upload",
			
 
				     ),
			
 
				+    path(
			
 
				+        "upload/koreader/",
			
 
				+        views.KoReaderImportCreateView.as_view(),
			
 
				+        name="koreader-file-upload",
			
 
				+    ),
			
 
				     path(
			
 
				         "lastfm-import/",
			
 
				         views.lastfm_import,