Pārlūkot izejas kodu

[books] Fix importing order of page data

Colin Powell 8 mēneši atpakaļ
vecāks
revīzija
c484dab210

+ 8 - 2
vrobbler/apps/books/koreader.py

@@ -93,7 +93,7 @@ def create_book_from_row(row: list):
         title=row[KoReaderBookColumn.TITLE.value],
         pages=total_pages,
         koreader_data_by_hash={
-            row[KoReaderBookColumn.MD5.value]: {
+            str(row[KoReaderBookColumn.MD5.value]): {
                 "title": row[KoReaderBookColumn.TITLE.value],
                 "author_str": author_str,
                 "book_id": row[KoReaderBookColumn.ID.value],
@@ -233,7 +233,7 @@ def build_scrobbles_from_book_map(
                 should_create_scrobble = True
 
             if should_create_scrobble:
-                scrobble_page_data = OrderedDict(
+                scrobble_page_data = dict(
                     sorted(
                         scrobble_page_data.items(),
                         key=lambda x: x[1]["start_ts"],
@@ -359,6 +359,7 @@ def process_koreader_sqlite_file(file_path, user_id) -> list:
 
     is_os_file = "https://" not in file_path
     if is_os_file:
+        # Loading sqlite file from local filesystem
         con = sqlite3.connect(file_path)
         cur = con.cursor()
         try:
@@ -376,6 +377,8 @@ def process_koreader_sqlite_file(file_path, user_id) -> list:
         )
         new_scrobbles = build_scrobbles_from_book_map(book_map, user)
     else:
+        # Streaming the sqlite file off S3
+        book_map = {}
         for table_name, pragma_table_info, rows in stream_sqlite(
             _sqlite_bytes(file_path), max_buffer_size=1_048_576
         ):
@@ -383,6 +386,9 @@ def process_koreader_sqlite_file(file_path, user_id) -> list:
             if table_name == "book":
                 book_map = build_book_map(rows)
 
+        for table_name, pragma_table_info, rows in stream_sqlite(
+            _sqlite_bytes(file_path), max_buffer_size=1_048_576
+        ):
             if table_name == "page_stat_data":
                 book_map = build_page_data(rows, book_map, tz)
                 new_scrobbles = build_scrobbles_from_book_map(book_map, user)

+ 16 - 6
vrobbler/apps/books/models.py

@@ -34,6 +34,7 @@ from vrobbler.apps.books.locg import (
     lookup_comic_from_locg,
     lookup_comic_writer_by_locg_slug,
 )
+from vrobbler.apps.scrobbles.dataclasses import BookLogData
 
 COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")
 
@@ -137,6 +138,10 @@ class Book(LongPlayScrobblableMixin):
     def subtitle(self):
         return f" by {self.author}"
 
+    @property
+    def logdata_cls(self):
+        return BookLogData
+
     @property
     def primary_image_url(self) -> str:
         url = ""
@@ -278,19 +283,24 @@ class Book(LongPlayScrobblableMixin):
                 author.headshot.save(fname, ContentFile(r.content), save=True)
         self.authors.add(author)
 
-
-    def page_data_for_user(self, user_id: int, convert_timestamps: bool=True) -> dict:
+    def page_data_for_user(
+        self, user_id: int, convert_timestamps: bool = True
+    ) -> dict:
         scrobbles = self.scrobble_set.filter(user=user_id)
 
         pages = {}
         for scrobble in scrobbles:
-            if scrobble.book_page_data:
-                for page, data in scrobble.book_page_data.items():
+            if scrobble.logdata.page_data:
+                for page, data in scrobble.logdata.page_data.items():
                     if convert_timestamps:
-                        data["start_ts"] = datetime.fromtimestamp(data["start_ts"])
+                        data["start_ts"] = datetime.fromtimestamp(
+                            data["start_ts"]
+                        )
                         data["end_ts"] = datetime.fromtimestamp(data["end_ts"])
                     pages[page] = data
-        sorted_pages = OrderedDict(sorted(pages.items(), key=lambda x: x[1]["start_ts"]))
+        sorted_pages = OrderedDict(
+            sorted(pages.items(), key=lambda x: x[1]["start_ts"])
+        )
 
         return sorted_pages
 

+ 1 - 1
vrobbler/apps/books/tests/conftest.py

@@ -48,7 +48,7 @@ class KoReaderBookRows:
             300 + wiggle,
             self.DEFAULT_STR,
             self.DEFAULT_STR,
-            hashlib.md5(title.encode()),
+            hashlib.md5(title.encode()).hexdigest(),
             i * wiggle * 20,
             120,
         ]

+ 10 - 9
vrobbler/apps/books/tests/test_koreader.py

@@ -21,9 +21,10 @@ def test_build_book_map(get_mock, koreader_rows, valid_response):
 @mock.patch("requests.get")
 def test_load_page_data_to_map(get_mock, koreader_rows, valid_response):
     get_mock.return_value = valid_response
-    book_map = build_book_map(koreader_rows.BOOK_ROWS)
-
-    book_map = build_page_data(koreader_rows.PAGE_STATS_ROWS, book_map)
+    book_map = build_page_data(
+        koreader_rows.PAGE_STATS_ROWS,
+        build_book_map(koreader_rows.BOOK_ROWS),
+    )
     assert (
         len(book_map[1]["pages"])
         == koreader_rows.BOOK_ROWS[0][
@@ -45,9 +46,9 @@ def test_build_scrobbles_from_pages(
     # Corresponds to number of sessions per book ( 20 pages per session, 120 +/- 15 pages read )
     expected_scrobbles = 6 * len(book_map.keys())
     assert len(scrobbles) == expected_scrobbles
-    assert len(scrobbles[0].book_page_data.keys()) == 22
-    assert len(scrobbles[1].book_page_data.keys()) == 20
-    assert len(scrobbles[2].book_page_data.keys()) == 20
-    assert len(scrobbles[3].book_page_data.keys()) == 20
-    assert len(scrobbles[4].book_page_data.keys()) == 20
-    assert len(scrobbles[5].book_page_data.keys()) == 18
+    assert len(scrobbles[0].logdata.page_data.keys()) == 21
+    assert len(scrobbles[1].logdata.page_data.keys()) == 20
+    assert len(scrobbles[2].logdata.page_data.keys()) == 20
+    assert len(scrobbles[3].logdata.page_data.keys()) == 20
+    assert len(scrobbles[4].logdata.page_data.keys()) == 20
+    assert len(scrobbles[5].logdata.page_data.keys()) == 18

+ 3 - 4
vrobbler/apps/scrobbles/dataclasses.py

@@ -123,13 +123,12 @@ class BookPageLogData(JSONDataclass):
 
 @dataclass
 class BookLogData(LongPlayLogData):
-    serial_scrobble_id: Optional[int]
-    long_play_complete: bool = False
+    long_play_complete: Optional[bool] = None
     koreader_hash: Optional[str] = None
+    page_data: Optional[dict[int, BookPageLogData]] = None
     pages_read: Optional[int] = None
-    page_data: Optional[list[BookPageLogData]] = None
-    page_end: Optional[int] = None
     page_start: Optional[int] = None
+    page_end: Optional[int] = None
     serial_scrobble_id: Optional[int] = None