浏览代码

[books] Add utility urls to model and scrobbles

Colin Powell 3 周之前
父节点
当前提交
050add8543

+ 5 - 1
PROJECT.org

@@ -92,7 +92,7 @@ fetching and simple saving.
 :LOGBOOK:
 :LOGBOOK:
 CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] =>  0:20
 CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] =>  0:20
 :END:
 :END:
-* Backlog [1/25]
+* Backlog [2/26]
 ** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
 ** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
 ** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
 ** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
 :PROPERTIES:
 :PROPERTIES:
@@ -483,6 +483,10 @@ https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
 :PROPERTIES:
 :PROPERTIES:
 :ID:       f1ef3945-e6e4-66c1-b72e-3cede7a0f84a
 :ID:       f1ef3945-e6e4-66c1-b72e-3cede7a0f84a
 :END:
 :END:
+** DONE [#B] Move comic resume URL to next page and check if it exists :vrobbler:feature:books:personal:project:
+:PROPERTIES:
+:ID:       9fe09567-11a3-7083-53c7-07458a9591d0
+:END:
 * Version 31.0 [3/3]
 * Version 31.0 [3/3]
 ** DONE [#A] Stop comic book webpage scrobbles from overwriting old scrobbles :vrobbler:personal:bug:books:scrobbling:
 ** DONE [#A] Stop comic book webpage scrobbles from overwriting old scrobbles :vrobbler:personal:bug:books:scrobbling:
 :PROPERTIES:
 :PROPERTIES:

+ 18 - 0
vrobbler/apps/books/migrations/0030_book_readcomics_url.py

@@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-10-22 16:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('books', '0029_book_comicvine_id_book_issue_number_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='book',
+            name='readcomics_url',
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]

+ 18 - 0
vrobbler/apps/books/migrations/0031_book_next_readcomics_url.py

@@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-10-22 17:42
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('books', '0030_book_readcomics_url'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='book',
+            name='next_readcomics_url',
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]

+ 29 - 14
vrobbler/apps/books/models.py

@@ -1,15 +1,19 @@
+import logging
 from collections import OrderedDict
 from collections import OrderedDict
 from dataclasses import dataclass
 from dataclasses import dataclass
-import logging
 from datetime import datetime
 from datetime import datetime
 from typing import Optional
 from typing import Optional
 from uuid import uuid4
 from uuid import uuid4
 
 
 import requests
 import requests
+from books.constants import READCOMICSONLINE_URL
 from books.openlibrary import (
 from books.openlibrary import (
     lookup_author_from_openlibrary,
     lookup_author_from_openlibrary,
     lookup_book_from_openlibrary,
     lookup_book_from_openlibrary,
 )
 )
+from books.sources.google import lookup_book_from_google
+from books.sources.semantic import lookup_paper_from_semantic
+from books.utils import get_comic_issue_url
 from django.conf import settings
 from django.conf import settings
 from django.contrib.auth import get_user_model
 from django.contrib.auth import get_user_model
 from django.core.files.base import ContentFile
 from django.core.files.base import ContentFile
@@ -18,27 +22,25 @@ from django.urls import reverse
 from django_extensions.db.models import TimeStampedModel
 from django_extensions.db.models import TimeStampedModel
 from imagekit.models import ImageSpecField
 from imagekit.models import ImageSpecField
 from imagekit.processors import ResizeToFit
 from imagekit.processors import ResizeToFit
+from scrobbles.dataclasses import BaseLogData, LongPlayLogData
 from scrobbles.mixins import (
 from scrobbles.mixins import (
     LongPlayScrobblableMixin,
     LongPlayScrobblableMixin,
     ObjectWithGenres,
     ObjectWithGenres,
     ScrobblableConstants,
     ScrobblableConstants,
 )
 )
-from scrobbles.utils import get_scrobbles_for_media
+from scrobbles.utils import get_scrobbles_for_media, next_url_if_exists
 from taggit.managers import TaggableManager
 from taggit.managers import TaggableManager
 from thefuzz import fuzz
 from thefuzz import fuzz
-from vrobbler.apps.books.sources.comicvine import (
-    ComicVineClient,
-    lookup_comic_from_comicvine,
-)
 
 
 from vrobbler.apps.books.locg import (
 from vrobbler.apps.books.locg import (
     lookup_comic_by_locg_slug,
     lookup_comic_by_locg_slug,
     lookup_comic_from_locg,
     lookup_comic_from_locg,
     lookup_comic_writer_by_locg_slug,
     lookup_comic_writer_by_locg_slug,
 )
 )
-from books.sources.google import lookup_book_from_google
-from books.sources.semantic import lookup_paper_from_semantic
-from scrobbles.dataclasses import BaseLogData, LongPlayLogData
+from vrobbler.apps.books.sources.comicvine import (
+    ComicVineClient,
+    lookup_comic_from_comicvine,
+)
 
 
 COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")
 COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")
 
 
@@ -63,7 +65,6 @@ class BookLogData(BaseLogData, LongPlayLogData):
     page_start: Optional[int] = None
     page_start: Optional[int] = None
     page_end: Optional[int] = None
     page_end: Optional[int] = None
     resume_url: Optional[str] = None
     resume_url: Optional[str] = None
-    restart_url: Optional[str] = None
 
 
     _excluded_fields = {"koreader_hash", "page_data"}
     _excluded_fields = {"koreader_hash", "page_data"}
 
 
@@ -150,6 +151,8 @@ class Book(LongPlayScrobblableMixin):
     first_sentence = models.TextField(**BNULL)
     first_sentence = models.TextField(**BNULL)
     # ComicVine
     # ComicVine
     comicvine_id = models.CharField(max_length=255, **BNULL)
     comicvine_id = models.CharField(max_length=255, **BNULL)
+    readcomics_url = models.CharField(max_length=255, **BNULL)
+    next_readcomics_url = models.CharField(max_length=255, **BNULL)
     issue_number = models.IntegerField(max_length=5, **BNULL)
     issue_number = models.IntegerField(max_length=5, **BNULL)
     volume_number = models.IntegerField(max_length=5, **BNULL)
     volume_number = models.IntegerField(max_length=5, **BNULL)
     # OpenLibrary
     # OpenLibrary
@@ -171,7 +174,11 @@ class Book(LongPlayScrobblableMixin):
 
 
     genre = TaggableManager(through=ObjectWithGenres)
     genre = TaggableManager(through=ObjectWithGenres)
 
 
-    def __str__(self):
+    def __str__(self) -> str:
+        if self.issue_number and "Issue" not in str(self.title):
+            return f"{self.title} - Issue {self.issue_number}"
+        if self.volume_number and "Volume" not in str(self.title):
+            return f"{self.title} - Volume {self.volume_number}"
         return f"{self.title}"
         return f"{self.title}"
 
 
     @property
     @property
@@ -234,7 +241,7 @@ class Book(LongPlayScrobblableMixin):
 
 
     @classmethod
     @classmethod
     def find_or_create(
     def find_or_create(
-        cls, title: str, enrich: bool = False, commit: bool = True
+            cls, title: str, url: str = "", enrich: bool = False, commit: bool = True
     ):
     ):
         """Given a title, get a Book instance.
         """Given a title, get a Book instance.
 
 
@@ -258,9 +265,17 @@ class Book(LongPlayScrobblableMixin):
             )
             )
             return book
             return book
 
 
-        book_dict = lookup_book_from_google(title)
-        if not book_dict or not book_dict.get("isbn_10"):
+        book_dict = None
+        if READCOMICSONLINE_URL in url:
             book_dict = lookup_comic_from_comicvine(title)
             book_dict = lookup_comic_from_comicvine(title)
+            book_dict["readcomics_url"] = get_comic_issue_url(url)
+            book_dict["next_readcomics_url"] = next_url_if_exists(book_dict["readcomics_url"])
+
+        if not book_dict:
+            book_dict = lookup_book_from_google(title)
+
+        if not book_dict:
+            logger.warning("No book found in any source, using data as is", extra={"title": title})
 
 
         author_list = []
         author_list = []
         authors = book_dict.pop("authors", [])
         authors = book_dict.pop("authors", [])

+ 1 - 1
vrobbler/apps/books/sources/comicvine.py

@@ -264,7 +264,7 @@ def lookup_comic_from_comicvine(title: str) -> dict:
         "comicvine_data": found_result,
         "comicvine_data": found_result,
         "summary": found_result.get("description"),
         "summary": found_result.get("description"),
         "publish_date": found_result.get("cover_date"),
         "publish_date": found_result.get("cover_date"),
-        "first_publish_year": found_result.get("store_date").year,
+        "first_publish_year": found_result.get("cover_date", "")[:4]
     }
     }
 
 
     return data_dict
     return data_dict

+ 38 - 0
vrobbler/apps/books/utils.py

@@ -1,5 +1,9 @@
+import re
+from urllib.parse import urlparse, urlunparse
+
 from titlecase import titlecase
 from titlecase import titlecase
 
 
+
 def parse_readcomicsonline_uri(uri: str) -> tuple:
 def parse_readcomicsonline_uri(uri: str) -> tuple:
     try:
     try:
         path = uri.split("comic/")[1]
         path = uri.split("comic/")[1]
@@ -19,3 +23,37 @@ def parse_readcomicsonline_uri(uri: str) -> tuple:
         page = parts[2]
         page = parts[2]
 
 
     return title, volume, page
     return title, volume, page
+
+
+def get_comic_issue_url(url: str) -> str:
+    parsed = urlparse(url)
+    parts = [p for p in parsed.path.strip('/').split('/') if p]
+
+    # Find the index of "comic"
+    try:
+        comic_index = parts.index("comic")
+    except ValueError:
+        raise ValueError("URL does not contain '/comic/' segment")
+
+    # Extract title (next part after 'comic')
+    if len(parts) <= comic_index + 1:
+        raise ValueError("No comic title found after '/comic/'")
+    title = parts[comic_index + 1]
+
+    # Look for the first numeric segment after the title
+    number = None
+    for segment in parts[comic_index + 2:]:
+        if segment.isdigit():
+            number = segment
+            break
+
+    # Build normalized path
+    new_parts = ["comic", title]
+    if number:
+        new_parts.append(number)
+
+    normalized_path = "/" + "/".join(new_parts)
+
+    # Rebuild full URL (same scheme and host)
+    simplified_url = urlunparse(parsed._replace(path=normalized_path, query='', fragment=''))
+    return simplified_url

+ 9 - 5
vrobbler/apps/scrobbles/scrobblers.py

@@ -29,7 +29,12 @@ from scrobbles.constants import (
 )
 )
 from scrobbles.models import Scrobble
 from scrobbles.models import Scrobble
 from scrobbles.notifications import ScrobbleNtfyNotification
 from scrobbles.notifications import ScrobbleNtfyNotification
-from scrobbles.utils import convert_to_seconds, extract_domain
+from scrobbles.utils import (
+    convert_to_seconds,
+    extract_domain,
+    remove_last_part,
+    next_url_if_exists,
+)
 from sports.models import SportEvent
 from sports.models import SportEvent
 from sports.thesportsdb import lookup_event_from_thesportsdb
 from sports.thesportsdb import lookup_event_from_thesportsdb
 from tasks.models import Task
 from tasks.models import Task
@@ -260,7 +265,7 @@ def manual_scrobble_book(
     log = {}
     log = {}
     source = "Vrobbler"
     source = "Vrobbler"
     page = None
     page = None
-    url = None
+    url = ""
 
 
     if READCOMICSONLINE_URL in title:
     if READCOMICSONLINE_URL in title:
         url = title
         url = title
@@ -287,7 +292,7 @@ def manual_scrobble_book(
 
 
     # TODO: Check for scrobble of this book already and if so, update the page count
     # TODO: Check for scrobble of this book already and if so, update the page count
 
 
-    book = Book.find_or_create(title, enrich=True)
+    book = Book.find_or_create(title, url=url, enrich=True)
 
 
     scrobble_dict = {
     scrobble_dict = {
         "user_id": user_id,
         "user_id": user_id,
@@ -312,8 +317,7 @@ def manual_scrobble_book(
 
 
     if action == "stop":
     if action == "stop":
         if url:
         if url:
-            scrobble.log["resume_url"] = url
-            scrobble.log["restart_url"] = remove_last_part(url)
+            scrobble.log["resume_url"] = next_url_if_exists(url)
             scrobble.save(update_fields=["log"])
             scrobble.save(update_fields=["log"])
         scrobble.stop(force_finish=True)
         scrobble.stop(force_finish=True)
 
 

+ 33 - 0
vrobbler/apps/scrobbles/utils.py

@@ -1,5 +1,6 @@
 import hashlib
 import hashlib
 import logging
 import logging
+import requests
 import re
 import re
 from datetime import date, datetime, timedelta
 from datetime import date, datetime, timedelta
 from typing import TYPE_CHECKING, Optional
 from typing import TYPE_CHECKING, Optional
@@ -414,3 +415,35 @@ def remove_last_part(url: str) -> str:
     if '/' not in url:
     if '/' not in url:
         return url
         return url
     return url.rsplit('/', 1)[0]
     return url.rsplit('/', 1)[0]
+
+def next_url_if_exists(url: str) -> str:
+    # Normalize (remove trailing slash)
+    url = url.rstrip('/')
+
+    # Find last number in the URL path
+    match = re.search(r'(\d+)(?:/?$)', url)
+    if not match:
+        logger.info("No numeric segment found in the URL", extra={"url": url})
+        return ""
+
+    number = int(match.group(1))
+    new_number = number + 1
+
+    # Replace only the last occurrence of that number
+    new_url = re.sub(rf'{number}(?:/?$)', f'{new_number}/', url + '/', 1)
+
+    # Check if the new URL exists
+    try:
+        resp = requests.head(new_url, allow_redirects=True, timeout=5)
+        if resp.status_code == 200:
+            return new_url
+        else:
+            # Fallback: some sites may not support HEAD well — try GET
+            resp = requests.get(new_url, timeout=5)
+            if resp.status_code == 200:
+                return new_url
+    except requests.RequestException:
+        pass
+
+    # If it doesn’t exist
+    return ""

+ 10 - 2
vrobbler/templates/books/book_detail.html

@@ -26,11 +26,19 @@
     </div>
     </div>
 </div>
 </div>
 <div class="row">
 <div class="row">
+    <p><a href="{{s.logdata.restart_url}}">Read again</a></p>
+    {% if object.readcomics_url %}
+    <p><a href="{{object.readcomics_url}}">Read next issue</a></p>
+    {% endif %}
+    {% if object.next_readcomics_url %}
+    <p><a href="{{object.next_readcomics_url}}">Read next issue</a></p>
+    {% endif %}
+
     <p>{{scrobbles.count}} scrobbles</p>
     <p>{{scrobbles.count}} scrobbles</p>
+
     {% for s in scrobbles %}
     {% for s in scrobbles %}
     {% if forloop.first %}
     {% if forloop.first %}
-    <p><a href="{{s.logdata.restart_url}}">Re-read</a></p>
-    <p><a href="{{s.logdata.resume_url}}">Resume</a></p>
+    <p><a href="{{s.logdata.resume_url}}">Resume reading</a></p>
     {% endif %}
     {% endif %}
     {% endfor %}
     {% endfor %}
 </div>
 </div>