Przeglądaj źródła

[books] Clean up fragmented KoReader scrobbling

Colin Powell 1 rok temu
rodzic
commit
525c7c4e2b

+ 22 - 28
vrobbler/apps/books/koreader.py

@@ -215,26 +215,34 @@ def build_scrobbles_from_book_map(
         scrobble_page_data = {}
         playback_position_seconds = 0
         prev_page_stats = {}
+        last_page_number = 0
 
         pages_processed = 0
         total_pages = len(book_map[koreader_book_id]["pages"])
 
-        for page_number, stats in book_map[koreader_book_id]["pages"].items():
+        for cur_page_number, stats in book_map[koreader_book_id][
+            "pages"
+        ].items():
             pages_processed += 1
             # Accumulate our page data for this scrobble
-            scrobble_page_data[page_number] = stats
+            scrobble_page_data[cur_page_number] = stats
 
             seconds_from_last_page = 0
             if prev_page_stats:
                 seconds_from_last_page = stats.get(
                     "end_ts"
                 ) - prev_page_stats.get("start_ts")
+
             playback_position_seconds = playback_position_seconds + stats.get(
                 "duration"
             )
 
             end_of_reading = pages_processed == total_pages
-            if seconds_from_last_page > SESSION_GAP_SECONDS or end_of_reading:
+            big_jump_to_this_page = (cur_page_number - last_page_number) > 10
+            if (
+                seconds_from_last_page > SESSION_GAP_SECONDS
+                and not big_jump_to_this_page
+            ):
                 should_create_scrobble = True
 
             if should_create_scrobble:
@@ -271,23 +279,10 @@ def build_scrobbles_from_book_map(
                     book_id=book_id,
                     user_id=user.id,
                 ).first()
-                # if scrobble:
-                #    logger.info(
-                #        f"Found existing scrobble {scrobble}, updating"
-                #    )
-                #    scrobble.book_page_data = scrobble_page_data
-                #    scrobble.playback_position_seconds = (
-                #        scrobble.calc_reading_duration
-                #    )
-                #    scrobble.save(
-                #        update_fields=[
-                #            "book_page_data",
-                #            "playback_position_seconds",
-                #        ]
-                #    )
+
                 if not scrobble:
                     logger.info(
-                        f"Queueing scrobble for {book_id}, page {page_number}"
+                        f"Queueing scrobble for {book_id}, page {cur_page_number}"
                     )
                     scrobbles_to_create.append(
                         Scrobble(
@@ -300,7 +295,7 @@ def build_scrobbles_from_book_map(
                             playback_position_seconds=playback_position_seconds,
                             book_koreader_hash=book_dict.get("hash"),
                             book_page_data=scrobble_page_data,
-                            book_pages_read=page_number,
+                            book_pages_read=cur_page_number,
                             in_progress=False,
                             played_to_completion=True,
                             long_play_complete=False,
@@ -311,6 +306,7 @@ def build_scrobbles_from_book_map(
                     playback_position_seconds = 0
                     scrobble_page_data = {}
 
+            last_page_number = cur_page_number
             prev_page_stats = stats
     return scrobbles_to_create
 
@@ -321,19 +317,13 @@ def fix_long_play_stats_for_scrobbles(scrobbles: list) -> None:
 
     for scrobble in scrobbles:
         # But if there's a next scrobble, set pages read to their starting page
-        #
-        if scrobble.previous:
+        if scrobble.previous and not scrobble.previous.long_play_complete:
             scrobble.long_play_seconds = scrobble.playback_position_seconds + (
                 scrobble.previous.long_play_seconds or 0
             )
-            if not scrobble.book_pages_read:
-                scrobble.book_pages_read = (
-                    scrobble.calc_pages_read()
-                    + scrobble.previous.book_pages_read
-                )
         else:
             scrobble.long_play_seconds = scrobble.playback_position_seconds
-            scrobble.book_pages_read = scrobble.calc_pages_read()
+        scrobble.book_pages_read = scrobble.calc_pages_read()
 
         scrobble.save(update_fields=["book_pages_read", "long_play_seconds"])
 
@@ -355,7 +345,11 @@ def process_koreader_sqlite_file(file_path, user_id) -> list:
         cur = con.cursor()
         book_map = build_book_map(cur.execute("SELECT * FROM book"))
         book_map = build_page_data(
-            cur.execute("SELECT * from page_stat_data"), book_map, tz
+            cur.execute(
+                "SELECT * from page_stat_data ORDER BY id_book, start_time"
+            ),
+            book_map,
+            tz,
         )
         new_scrobbles = build_scrobbles_from_book_map(book_map, user)
     else:

+ 9 - 1
vrobbler/apps/books/management/commands/migrate_koreader_data_to_json.py

@@ -4,6 +4,7 @@ from datetime import datetime
 from books.models import Book
 from django.core.management.base import BaseCommand
 from scrobbles.models import Scrobble
+from vrobbler.apps.books.koreader import fix_long_play_stats_for_scrobbles
 from vrobbler.apps.scrobbles.utils import timestamp_user_tz_to_utc
 
 
@@ -61,9 +62,14 @@ class Command(BaseCommand):
                 )
 
                 end_of_reading = pages_processed == total_pages
+                big_jump_to_this_page = False
+                if prev_page:
+                    big_jump_to_this_page = (
+                        page.number - prev_page.number
+                    ) > 10
                 if (
                     seconds_from_last_page > SESSION_GAP_SECONDS
-                    or end_of_reading
+                    and not big_jump_to_this_page
                 ):
                     should_create_scrobble = True
 
@@ -146,3 +152,5 @@ class Command(BaseCommand):
                 prev_page = page
 
         created = Scrobble.objects.bulk_create(scrobbles_to_create)
+        for scrobble in created:
+            fix_long_play_stats_for_scrobbles(scrobble)

+ 11 - 11
vrobbler/apps/books/models.py

@@ -150,16 +150,15 @@ class Book(LongPlayScrobblableMixin):
                 author_name = self.author.name
 
             if not data:
-                if not data:
-                    logger.warn(f"rChecking openlibrary for {self.title}")
-                    if self.openlibrary_id and force_update:
-                        data = lookup_book_from_openlibrary(
-                            str(self.openlibrary_id)
-                        )
-                    else:
-                        data = lookup_book_from_openlibrary(
-                            str(self.title), author_name
-                        )
+                logger.warn(f"rChecking openlibrary for {self.title}")
+                if self.openlibrary_id and force_update:
+                    data = lookup_book_from_openlibrary(
+                        str(self.openlibrary_id)
+                    )
+                else:
+                    data = lookup_book_from_openlibrary(
+                        str(self.title), author_name
+                    )
 
             if not data:
                 if self.locg_slug:
@@ -183,6 +182,7 @@ class Book(LongPlayScrobblableMixin):
                 self.get_author_from_locg(data.pop("locg_writer_slug", ""))
 
             ol_title = data.get("title", "")
+            data.pop("ol_author_id", "")
 
             # Kick out a little warning if we're about to change KoReader's title
             if (
@@ -288,7 +288,7 @@ class Book(LongPlayScrobblableMixin):
         last_scrobble = get_scrobbles_for_media(self, user).last()
         progress = 0
         if last_scrobble:
-            progress = int((last_scrobble.book_pages_read / self.pages) * 100)
+            progress = int((last_scrobble.last_page_read / self.pages) * 100)
         return progress
 
     @classmethod

+ 15 - 4
vrobbler/apps/scrobbles/models.py

@@ -683,9 +683,6 @@ class Scrobble(TimeStampedModel):
                 ((playback_seconds + long_play_secs) / run_time_secs) * 100
             )
 
-        # if percent > 100:
-        #    percent = 100
-
         return percent
 
     @property
@@ -743,9 +740,23 @@ class Scrobble(TimeStampedModel):
         if self.book_page_data:
             pages = [int(k) for k in self.book_page_data.keys()]
             pages.sort()
-            pages_read = pages[-1] - pages[0]
+            if len(pages) == 1:
+                pages_read = 1
+            elif len(pages) >= 2:
+                pages_read += pages[-1] - pages[0]
+            else:
+                pages_read = pages[-1] - pages[0]
         return pages_read
 
+    @property
+    def last_page_read(self) -> int:
+        last_page = 0
+        if self.book_page_data:
+            pages = [int(k) for k in self.book_page_data.keys()]
+            pages.sort()
+            last_page = pages[-1]
+        return last_page
+
     @classmethod
     def create_or_update(
         cls, media, user_id: int, scrobble_data: dict, **kwargs