Parcourir la source

Refactor new KoReader importer a bit

Colin Powell il y a 1 an
Parent
commit
4ae13b3a1a

+ 261 - 291
vrobbler/apps/books/koreader.py

@@ -5,16 +5,16 @@ import re
 import sqlite3
 from datetime import datetime, timedelta
 from enum import Enum
-from typing import Iterable, List
+from typing import Iterable, List, Optional
 
 import pytz
 import requests
 from books.models import Author, Book, Page
 from books.openlibrary import get_author_openlibrary_id
+from django.apps import apps
 from django.contrib.auth import get_user_model
 from django.db.models import Sum
 from pylast import httpx, tempfile
-from scrobbles.models import Scrobble
 from scrobbles.utils import timestamp_user_tz_to_utc
 from stream_sqlite import stream_sqlite
 
@@ -51,307 +51,277 @@ def _sqlite_bytes(sqlite_url):
 
 
 # Grace period between page reads for it to be a new scrobble
-SESSION_GAP_SECONDS = 3600  # one hour
-
-
-class KoReaderImporter:
-    # Maps a KoReader book ID to the Book.id and total read time of the book in Django
-    # Example:
-    # {"KOREADER_DB_ID": {
-    #     "book_id": <int>,
-    #     "total_seconds": <int>,
-    #     "pages": {
-    #         <int>: {
-    #             "start_ts": <TIMESTAMP>,
-    #             "end_ts": <TIMESTAMP>,
-    #             "duration": <int>
-    #         }
-    #     }
-    # }
-    BOOK_MAP = dict()
-    SQLITE_FILE_URL = str
-    USER_ID = int
-
-    def __init__(self, sqlite_file_url: str, user_id: int):
-        # Map KoReader book IDs to
-        self.SQLITE_FILE_URL = sqlite_file_url
-        self.USER_ID = user_id
-        self.importing_user = User.objects.filter(id=user_id).first()
-
-    def _get_author_str_from_row(self, row):
-        """Given a the raw author string from KoReader, convert it to a single line and
-        strip the middle initials, as OpenLibrary lookup usually fails with those.
-        """
-        ko_authors = row[KoReaderBookColumn.AUTHORS.value].replace("\n", ", ")
-        # Strip middle initials, OpenLibrary often fails with these
-        return re.sub(" [A-Z]. ", " ", ko_authors)
-
-    def _lookup_or_create_authors_from_author_str(
-        self, ko_author_str: str
-    ) -> list:
-        author_str_list = ko_author_str.split(", ")
-        author_list = []
-        for author_str in author_str_list:
-            logger.debug(f"Looking up author {author_str}")
-            # KoReader gave us nothing, bail
-            if author_str == "N/A":
-                logger.warn(
-                    f"KoReader author string is N/A, no authors to find"
-                )
-                continue
+SESSION_GAP_SECONDS = 1800  #  a half hour
+
+
+def get_author_str_from_row(row):
+    """Given a the raw author string from KoReader, convert it to a single line and
+    strip the middle initials, as OpenLibrary lookup usually fails with those.
+    """
+    ko_authors = row[KoReaderBookColumn.AUTHORS.value].replace("\n", ", ")
+    # Strip middle initials, OpenLibrary often fails with these
+    return re.sub(" [A-Z]. ", " ", ko_authors)
+
+def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list:
+    """Takes a string of authors from KoReader and returns a list
+    of Authors from our database
+    """
+    author_str_list = ko_author_str.split(", ")
+    author_list = []
+    for author_str in author_str_list:
+        logger.debug(f"Looking up author {author_str}")
+        # KoReader gave us nothing, bail
+        if author_str == "N/A":
+            logger.warn(
+                f"KoReader author string is N/A, no authors to find"
+            )
+            continue
 
-            author = Author.objects.filter(name=author_str).first()
-            if not author:
-                author = Author.objects.create(
-                    name=author_str,
-                    openlibrary_id=get_author_openlibrary_id(author_str),
-                )
-                author.fix_metadata()
-                logger.debug(f"Created author {author}")
-            author_list.append(author)
-        return author_list
-
-    def get_or_create_books(self, rows):
-        """Given an interable of sqlite rows from the books table, lookup existing
-        books, create ones that don't exist, and return a mapping of koreader IDs to
-        primary key IDs for page creation.
-
-        """
-        book_id_map = {}
-
-        for book_row in rows:
-            book = Book.objects.filter(
-                koreader_md5=book_row[KoReaderBookColumn.MD5.value]
-            ).first()
-
-            if not book:
-                # No KoReader book yet, create it
-                author_str = self._get_author_str_from_row(book_row)
-                total_pages = book_row[KoReaderBookColumn.PAGES.value]
-                run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
-
-                book = Book.objects.create(
-                    koreader_md5=book_row[KoReaderBookColumn.MD5.value],
-                    title=book_row[KoReaderBookColumn.TITLE.value],
-                    koreader_id=book_row[KoReaderBookColumn.ID.value],
-                    koreader_authors=author_str,
-                    pages=total_pages,
-                    run_time_seconds=run_time,
-                )
-                book.fix_metadata()
+        author = Author.objects.filter(name=author_str).first()
+        if not author:
+            author = Author.objects.create(
+                name=author_str,
+                openlibrary_id=get_author_openlibrary_id(author_str),
+            )
+            author.fix_metadata()
+            logger.debug(f"Created author {author}")
+        author_list.append(author)
+    return author_list
+
+
+def create_book_from_row(row: list):
+    # No KoReader book yet, create it
+    author_str = get_author_str_from_row(row)
+    total_pages = row[KoReaderBookColumn.PAGES.value]
+    run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
+
+    book = Book.objects.create(
+        koreader_md5=row[KoReaderBookColumn.MD5.value],
+        title=row[KoReaderBookColumn.TITLE.value],
+        koreader_id=row[KoReaderBookColumn.ID.value],
+        koreader_authors=author_str,
+        pages=total_pages,
+        run_time_seconds=run_time,
+    )
+    book.fix_metadata()
+
+    # Add authors
+    author_list = lookup_or_create_authors_from_author_str(
+        author_str
+    )
+    if author_list:
+        book.authors.add(*author_list)
+
+    # self._lookup_authors
+    return book
+
+def build_book_map(rows) -> dict:
+    """Given an interable of sqlite rows from the books table, lookup existing
+    books, create ones that don't exist, and return a mapping of koreader IDs to
+    primary key IDs for page creation.
+
+    """
+    book_id_map = {}
+
+    for book_row in rows:
+        book = Book.objects.filter(
+            koreader_md5=book_row[KoReaderBookColumn.MD5.value]
+        ).first()
+
+        if not book:
+            book = create_book_from_row(book_row)
+
+        book.refresh_from_db()
+        total_seconds = 0
+        if book_row[KoReaderBookColumn.TOTAL_READ_TIME.value]:
+            total_seconds = book_row[
+                KoReaderBookColumn.TOTAL_READ_TIME.value
+            ]
+
+        book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
+            "book_id": book.id,
+            "total_seconds": total_seconds,
+        }
+    return book_id_map
+
+def build_page_data(page_rows: list, book_map: dict, user_tz = None) -> dict:
+    """Given rows of page data from KoReader, parse each row and build
+    scrobbles for our user, loading the page data into the page_data
+    field on the scrobble instance.
+    """
+    for page_row in page_rows:
+        koreader_book_id = page_row[KoReaderPageStatColumn.ID_BOOK.value]
+        if "pages" not in book_map[koreader_book_id].keys():
+            book_map[koreader_book_id]["pages"] = {}
+
+        if koreader_book_id not in book_map.keys():
+            logger.warn(
+                f"Found a page without a corresponding book ID ({koreader_book_id}) in KoReader DB",
+                {"page_row": page_row},
+            )
+            continue
+
+        page_number = page_row[KoReaderPageStatColumn.PAGE.value]
+        duration = page_row[KoReaderPageStatColumn.DURATION.value]
+        start_ts = page_row[KoReaderPageStatColumn.START_TIME.value]
+        if user_tz:
+            start_ts = timestamp_user_tz_to_utc(
+                page_row[KoReaderPageStatColumn.START_TIME.value],
+                pytz.timezone(user_tz),
+            ).timestamp()
+        else:
+            logger.warning(f"Page data built with out user timezone, defaulting to UTC")
+
+        book_map[koreader_book_id]["pages"][page_number] = {
+            "duration": duration,
+            "start_ts": start_ts,
+            "end_ts": start_ts + duration,
+        }
+    return book_map
+
+def build_scrobbles_from_book_map(book_map: dict, user: "User") -> list["Scrobble"]:
+    Scrobble = apps.get_model("scrobbles", "Scrobble")
+
+    scrobbles_to_create = []
+
+    for koreader_book_id, book_dict in book_map.items():
+        book_id = book_dict["book_id"]
+        if "pages" not in book_dict.keys():
+            logger.warn(f"No page data found in book map for {book_id}")
+            continue
+
+        should_create_scrobble = False
+        scrobble_page_data = {}
+        playback_position_seconds = 0
+        prev_page_stats = {}
+
+        pages_processed = 0
+        total_pages = len(book_map[koreader_book_id]["pages"])
+
+        for page_number, stats in book_map[koreader_book_id][
+            "pages"
+        ].items():
+            pages_processed += 1
+            # Accumulate our page data for this scrobble
+            scrobble_page_data[page_number] = stats
+
+            seconds_from_last_page = 0
+            if prev_page_stats:
+                seconds_from_last_page = stats.get(
+                    "end_ts"
+                ) - prev_page_stats.get("start_ts")
+            playback_position_seconds = (
+                playback_position_seconds + stats.get("duration")
+            )
 
-                # Add authors
-                author_list = self._lookup_or_create_authors_from_author_str(
-                    author_str
-                )
-                if author_list:
-                    book.authors.add(*author_list)
-
-                # self._lookup_authors
-
-            book.refresh_from_db()
-            total_seconds = 0
-            if book_row[KoReaderBookColumn.TOTAL_READ_TIME.value]:
-                total_seconds = book_row[
-                    KoReaderBookColumn.TOTAL_READ_TIME.value
-                ]
-
-            book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
-                "book_id": book.id,
-                "total_seconds": total_seconds,
-            }
-        self.BOOK_MAP = book_id_map
-
-    def load_page_data_to_map(self, rows: Iterable) -> List[Scrobble]:
-        """Given rows of page data from KoReader, parse each row and build
-        scrobbles for our user, loading the page data into the page_data
-        field on the scrobble instance.
-        """
-        for page_row in rows:
-            koreader_book_id = page_row[KoReaderPageStatColumn.ID_BOOK.value]
-            if "pages" not in self.BOOK_MAP[koreader_book_id].keys():
-                self.BOOK_MAP[koreader_book_id]["pages"] = {}
-
-            if koreader_book_id not in self.BOOK_MAP.keys():
-                logger.warn(
-                    f"Found a page without a corresponding book ID ({koreader_book_id}) in KoReader DB",
-                    {"page_row": page_row},
-                )
-                continue
-
-            page_number = page_row[KoReaderPageStatColumn.PAGE.value]
-            duration = page_row[KoReaderPageStatColumn.DURATION.value]
-            start_ts = page_row[KoReaderPageStatColumn.START_TIME.value]
-            if self.importing_user:
-                start_ts = timestamp_user_tz_to_utc(
-                    page_row[KoReaderPageStatColumn.START_TIME.value],
-                    self.importing_user.timezone,
+            end_of_reading = pages_processed == total_pages
+            if (
+                seconds_from_last_page > SESSION_GAP_SECONDS
+                or end_of_reading
+            ):
+                should_create_scrobble = True
+
+            logger.info(f"Book {koreader_book_id} - {page_number} {seconds_from_last_page} read seconds")
+            if should_create_scrobble:
+                first_page_in_scrobble = list(scrobble_page_data.keys())[0]
+                timestamp = timestamp_user_tz_to_utc(
+                    int(scrobble_page_data.get(first_page_in_scrobble).get("start_ts")),
+                    pytz.timezone(user.profile.timezone),
                 )
 
-            self.BOOK_MAP[koreader_book_id]["pages"][page_number] = {
-                "duration": duration,
-                "start_ts": start_ts,
-                "end_ts": start_ts + duration,
-            }
-
-    def build_scrobbles_from_pages(self) -> List[Scrobble]:
-        scrobbles_to_create = []
-
-        for koreader_book_id, book_dict in self.BOOK_MAP.items():
-            book_id = book_dict["book_id"]
-            if "pages" not in book_dict.keys():
-                logger.warn(f"No page data found in book map for {book_id}")
-                continue
-
-            should_create_scrobble = False
-            scrobble_page_data = {}
-            playback_position_seconds = 0
-            prev_page_stats = {}
-
-            pages_processed = 0
-            total_pages = len(self.BOOK_MAP[koreader_book_id]["pages"])
-
-            for page_number, stats in self.BOOK_MAP[koreader_book_id][
-                "pages"
-            ].items():
-                pages_processed += 1
-                # Accumulate our page data for this scrobble
-                scrobble_page_data[page_number] = stats
-
-                seconds_from_last_page = 0
-                if prev_page_stats:
-                    seconds_from_last_page = stats.get(
-                        "end_ts"
-                    ) - prev_page_stats.get("start_ts")
-                playback_position_seconds = (
-                    playback_position_seconds + stats.get("duration")
-                )
+                scrobble = Scrobble.objects.filter(
+                    timestamp=timestamp,
+                    book_id=book_id,
+                    user_id=user.id,
+                ).first()
+                if scrobble:
+                    logger.info(f"Found existing scrobble {scrobble}, updating")
+                    scrobble.book_page_data = scrobble_page_data
+                    scrobble.playback_position_seconds = scrobble.calc_reading_duration()
+                    scrobble.save(update_fields=["book_page_data", "playback_position_seconds"])
+                if not scrobble:
+                    logger.info(
+                        f"Queueing scrobble for {book_id}, page {page_number}"
+                    )
+                    scrobbles_to_create.append(
+                        Scrobble(
+                            book_id=book_id,
+                            user_id=user.id,
+                            source="KOReader",
+                            media_type=Scrobble.MediaType.BOOK,
+                            timestamp=timestamp,
+                            played_to_completion=True,
+                            playback_position_seconds=playback_position_seconds,
+                            in_progress=False,
+                            book_page_data=scrobble_page_data,
+                            book_pages_read=page_number,
+                            long_play_complete=False,
+                        )
+                    )
+                    # Then start over
+                    should_create_scrobble = False
+                    playback_position_seconds = 0
+                    scrobble_page_data = {}
+
+            prev_page_stats = stats
+    return scrobbles_to_create
+
+
+def fix_long_play_stats_for_scrobbles(scrobbles: list) -> None:
+    """Given a list of scrobbles, update pages read, long play seconds and check
+    for media completion"""
+
+    for scrobble in scrobbles:
+        # But if there's a next scrobble, set pages read to their starting page
+        #
+        if scrobble.previous:
+            scrobble.long_play_seconds = scrobble.playback_position_seconds + scrobble.previous.long_play_seconds
+            scrobble.book_pages_read = scrobble.book_pages_read + scrobble.previous.book_pages_read
+        else:
+            scrobble.long_play_seconds = scrobble.playback_position_seconds
+            scrobble.book_pages_read = scrobble.calc_pages_read()
 
-                if (
-                    seconds_from_last_page > SESSION_GAP_SECONDS
-                    or pages_processed == total_pages
-                ):
-                    should_create_scrobble = True
+        scrobble.save(
+            update_fields=["book_pages_read", "long_play_seconds"]
+        )
 
-                print(
-                    f"Seconds: {seconds_from_last_page} - {should_create_scrobble}"
-                )
-                if should_create_scrobble:
-                    first_page_in_scrobble = list(scrobble_page_data.keys())[0]
-                    timestamp = datetime.utcfromtimestamp(
-                        int(
-                            scrobble_page_data.get(first_page_in_scrobble).get(
-                                "start_ts"
-                            )
-                        )
-                    ).replace(tzinfo=pytz.utc)
-
-                    scrobble = Scrobble.objects.filter(
-                        timestamp=timestamp,
-                        book_id=book_id,
-                        # user_id=self.importing_user.id,
-                    ).first()
-                    if not scrobble:
-                        logger.info(
-                            f"Queueing scrobble for {book_id}, page {page_number}"
-                        )
-                        scrobbles_to_create.append(
-                            Scrobble(
-                                book_id=book_id,
-                                # user_id=self.importing_user.id,
-                                source="KOReader",
-                                media_type=Scrobble.MediaType.BOOK,
-                                timestamp=timestamp,
-                                played_to_completion=True,
-                                playback_position_seconds=playback_position_seconds,
-                                in_progress=False,
-                                book_page_data=scrobble_page_data,
-                                book_pages_read=page_number,
-                                long_play_complete=False,
-                            )
-                        )
-                        # Then start over
-                        should_create_scrobble = False
-                        playback_position_seconds = 0
-                        scrobble_page_data = {}
-
-                prev_page_stats = stats
-        return scrobbles_to_create
-
-    def _enrich_koreader_scrobbles(self, scrobbles: list) -> None:
-        """Given a list of scrobbles, update pages read, long play seconds and check
-        for media completion"""
-
-        for scrobble in scrobbles:
-            # But if there's a next scrobble, set pages read to their starting page
-            #
-            if scrobble.next:
-                scrobble.book_pages_read = scrobble.next.book_pages_read - 1
-            scrobble.long_play_seconds = scrobble.book.page_set.filter(
-                number__lte=scrobble.book_pages_read
-            ).aggregate(Sum("duration_seconds"))["duration_seconds__sum"]
-
-            scrobble.save(
-                update_fields=["book_pages_read", "long_play_seconds"]
-            )
+def process_koreader_sqlite_file(file_path, user_id) -> list:
+    """Given a sqlite file from KoReader, open the book table, iterate
+    over rows creating scrobbles from each book found"""
+    Scrobble = apps.get_model("scrobbles", "Scrobble")
 
-    def process_file(self):
-        new_scrobbles = []
+    new_scrobbles = []
+    user = User.objects.filter(id=user_id).first()
+    tz = pytz.utc
+    if user:
+        tz = user.profile.timezone
 
+    is_os_file = "https://" not in file_path
+    if is_os_file:
+        con = sqlite3.connect(file_path)
+        cur = con.cursor()
+        book_map = build_book_map(cur.execute("SELECT * FROM book"))
+        book_map = build_page_data(cur.execute("SELECT * from page_stat_data"), book_map, tz)
+        new_scrobbles = build_scrobbles_from_book_map(book_map, user)
+    else:
         for table_name, pragma_table_info, rows in stream_sqlite(
-            _sqlite_bytes(self.FILE_URL), max_buffer_size=1_048_576
+            _sqlite_bytes(file_path), max_buffer_size=1_048_576
         ):
             logger.debug(f"Found table {table_name} - processing")
             if table_name == "book":
-                self.get_or_create_books(rows)
+                book_map = build_book_map(rows)
 
             if table_name == "page_stat_data":
-                self.build_scrobbles_from_page_data(rows)
-
-                # new_scrobbles = build_scrobbles_from_pages(
-                #    rows, book_id_map, user_id
-                # )
-                # logger.debug(f"Creating {len(new_scrobbles)} new scrobbles")
-
-        created = []
-        if new_scrobbles:
-            created = Scrobble.objects.bulk_create(new_scrobbles)
-            self._enrich_koreader_scrobbles(created)
-            logger.info(
-                f"Created {len(created)} scrobbles",
-                extra={"created_scrobbles": created},
-            )
-        return created
-
-    def process_koreader_sqlite_file(self, file_path, user_id) -> list:
-        """Given a sqlite file from KoReader, open the book table, iterate
-        over rows creating scrobbles from each book found"""
-        # Create a SQL connection to our SQLite database
-        con = sqlite3.connect(file_path)
-        cur = con.cursor()
-
-        book_id_map = self.get_or_create_books(
-            cur.execute("SELECT * FROM book")
+                book_map = build_page_data(rows, book_map, tz)
+                new_scrobbles = build_scrobbles_from_book_map(book_map, user)
+
+    logger.info(f"Creating {len(new_scrobbles)} new scrobbles")
+    created = []
+    if new_scrobbles:
+        created = Scrobble.objects.bulk_create(new_scrobbles)
+        fix_long_play_stats_for_scrobbles(created)
+        logger.info(
+            f"Created {len(created)} scrobbles",
+            extra={"created_scrobbles": created},
         )
-        new_scrobbles = self.build_scrobbles_from_pages(
-            cur.execute("SELECT * from page_stat_data"), book_id_map, user_id
-        )
-
-        created = []
-        if new_scrobbles:
-            created = Scrobble.objects.bulk_create(new_scrobbles)
-            self._enrich_koreader_scrobbles(created)
-            logger.info(
-                f"Created {len(created)} scrobbles",
-                extra={"created_scrobbles": created},
-            )
-        return created
-
-    def process_koreader_sqlite(file_path: str, user_id: int) -> list:
-        is_os_file = "https://" not in file_path
-
-        if is_os_file:
-            created = process_koreader_sqlite_file(file_path, user_id)
-        else:
-            created = process_koreader_sqlite_url(file_path, user_id)
-        return created
+    return created

+ 1 - 20
vrobbler/apps/books/tests/conftest.py

@@ -2,11 +2,7 @@ import hashlib
 import pytest
 import random
 
-from vrobbler.apps.books.koreader import (
-    KoReaderBookColumn,
-    KoReaderImporter,
-    KoReaderPageStatColumn,
-)
+from vrobbler.apps.books.koreader import KoReaderBookColumn
 
 ordinal = lambda n: "%d%s" % (
     n,
@@ -113,21 +109,6 @@ class KoReaderBookRows:
         self._generate_random_page_stats_rows()
 
 
-@pytest.fixture
-def koreader_book_row():
-    return KoReaderBookRows(book_count=1).BOOK_ROWS[0]
-
-
-@pytest.fixture
-def koreader_book_rows():
-    return KoReaderBookRows(book_count=4).BOOK_ROWS
-
-
 @pytest.fixture
 def koreader_rows():
     return KoReaderBookRows(book_count=1)
-
-
-@pytest.fixture
-def koreader_rows_for_pages():
-    return KoReaderBookRows(book_count=1)

+ 14 - 15
vrobbler/apps/books/tests/test_koreader.py

@@ -1,28 +1,26 @@
 import pytest
 from unittest import mock
 
-from books.koreader import KoReaderImporter, KoReaderBookColumn
+from books.koreader import KoReaderBookColumn, build_book_map, build_page_data, build_scrobbles_from_book_map
 
 
 @pytest.mark.django_db
 @mock.patch("requests.get")
-def test_get_or_create_books(get_mock, koreader_book_rows, valid_response):
+def test_build_book_map(get_mock, koreader_rows, valid_response):
     get_mock.return_value = valid_response
-    importer = KoReaderImporter("test.sqlite3", user_id=1)
-    importer.get_or_create_books(koreader_book_rows)
-    assert len(importer.BOOK_MAP) == 4
+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
+    assert len(book_map) == 1
 
 
 @pytest.mark.django_db
 @mock.patch("requests.get")
 def test_load_page_data_to_map(get_mock, koreader_rows, valid_response):
     get_mock.return_value = valid_response
-    importer = KoReaderImporter("test.sqlite3", user_id=1)
-    importer.get_or_create_books(koreader_rows.BOOK_ROWS)
+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
 
-    importer.load_page_data_to_map(koreader_rows.PAGE_STATS_ROWS)
+    book_map = build_page_data(koreader_rows.PAGE_STATS_ROWS, book_map)
     assert (
-        len(importer.BOOK_MAP[1]["pages"])
+        len(book_map[1]["pages"])
         == koreader_rows.BOOK_ROWS[0][
             KoReaderBookColumn.TOTAL_READ_PAGES.value
         ]
@@ -32,15 +30,16 @@ def test_load_page_data_to_map(get_mock, koreader_rows, valid_response):
 @pytest.mark.django_db
 @mock.patch("requests.get")
 def test_build_scrobbles_from_pages(
-    get_mock, koreader_rows_for_pages, valid_response
+    get_mock, koreader_rows, valid_response
 ):
     get_mock.return_value = valid_response
-    importer = KoReaderImporter("test.sqlite3", user_id=1)
-    importer.get_or_create_books(koreader_rows.BOOK_ROWS)
-    importer.load_page_data_to_map(koreader_rows.PAGE_STATS_ROWS)
-    scrobbles = importer.build_scrobbles_from_pages()
+    book_map = build_book_map(koreader_rows.BOOK_ROWS)
+    book_map = build_page_data(koreader_rows.PAGE_STATS_ROWS, book_map)
+
+    scrobbles = build_scrobbles_from_book_map(book_map)
     # Corresponds to number of sessions per book ( 20 pages per session, 120 +/- 15 pages read )
-    assert len(scrobbles) == 6
+    expected_scrobbles = 6 * len(book_map.keys())
+    assert len(scrobbles) == expected_scrobbles
     assert len(scrobbles[0].book_page_data.keys()) == 22
     assert len(scrobbles[1].book_page_data.keys()) == 20
     assert len(scrobbles[2].book_page_data.keys()) == 20

+ 17 - 2
vrobbler/apps/scrobbles/models.py

@@ -5,6 +5,7 @@ from typing import Optional
 from uuid import uuid4
 
 from boardgames.models import BoardGame
+from books.koreader import process_koreader_sqlite_file
 from books.models import Book
 from django.conf import settings
 from django.contrib.auth import get_user_model
@@ -172,7 +173,6 @@ class KoReaderImport(BaseFileImportMixin):
     sqlite_file = models.FileField(upload_to=get_path, **BNULL)
 
     def process(self, force=False):
-        from books.koreader import process_koreader_sqlite
 
         if self.processed_finished and not force:
             logger.info(
@@ -181,7 +181,7 @@ class KoReaderImport(BaseFileImportMixin):
             return
 
         self.mark_started()
-        scrobbles = process_koreader_sqlite(
+        scrobbles = process_koreader_sqlite_file(
             self.upload_file_path, self.user.id
         )
         self.record_log(scrobbles)
@@ -741,6 +741,21 @@ class Scrobble(TimeStampedModel):
         timestamp = self.timestamp.strftime("%Y-%m-%d")
         return f"Scrobble of {self.media_obj} ({timestamp})"
 
+    def calc_reading_duration(self) -> int:
+        duration = 0
+        if self.book_page_data:
+            for k, v in self.book_page_data.items():
+                duration += v.get("duration")
+        return duration
+
+    def calc_pages_read(self) -> int:
+        pages_read = 0
+        if self.book_page_data:
+            pages = [int(k) for k in self.book_page_data.keys()]
+            pages.sort()
+            pages_read = pages[-1] - pages[0]
+        return pages_read
+
     @classmethod
     def create_or_update(
         cls, media, user_id: int, scrobble_data: dict, **kwargs

+ 6 - 11
vrobbler/apps/scrobbles/urls.py

@@ -9,26 +9,21 @@ urlpatterns = [
         views.ManualScrobbleView.as_view(),
         name="lookup-manual-scrobble",
     ),
-    path(
-        "manual/audioscrobbler/",
-        views.AudioScrobblerImportCreateView.as_view(),
-        name="audioscrobbler-file-upload",
-    ),
-    path(
-        "manual/koreader/",
-        views.KoReaderImportCreateView.as_view(),
-        name="koreader-file-upload",
-    ),
     path(
         "long-play-finish/<slug:uuid>/",
         views.scrobble_longplay_finish,
         name="longplay-finish",
     ),
     path(
-        "upload/",
+        "upload/audioscrobbler/",
         views.AudioScrobblerImportCreateView.as_view(),
         name="audioscrobbler-file-upload",
     ),
+    path(
+        "upload/koreader/",
+        views.KoReaderImportCreateView.as_view(),
+        name="koreader-file-upload",
+    ),
     path(
         "lastfm-import/",
         views.lastfm_import,