16 часов назад · e7203cdb9b
--- a/PROJECT.org
+++ b/PROJECT.org
@@ -92,7 +92,7 @@ fetching and simple saving.
 
				 :LOGBOOK:
			
 
				 CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] =>  0:20
			
 
				 :END:
			
 
				-* Backlog [0/24]
			
 
				+* Backlog [1/28]
			
 
				 ** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
			
 
				 ** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
			
 
				 :PROPERTIES:
			
@@ -491,6 +491,26 @@ https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
 
				 - Note taken on [2025-09-30 Tue 09:33]
			
 
				 
			
 
				   This may have already been resolved ... need to just confirm it.
			
 
				+** TODO
			
 
				+
			
 
				+** DONE [#A] Add RSS feed lookups to podcasts :vrobbler:personal:feature:podcasts:
			
 
				+:PROPERTIES:
			
 
				+:ID:       d60645b0-7578-97c1-0278-05bd9de4269c
			
 
				+:END:
			
 
				+
			
 
				+- Note taken on [2025-10-14 Tue 10:08]
			
 
				+
			
 
				+  Turns out the Podcast plugin for mopidy does a pretty good job of showing the
			
 
				+  latest file without having to scroll the bottom using only Muse to not parse
			
 
				+  the podcast title name. BUT, now we're getting urls like this:
			
 
				+
			
 
				+  https://nsf.libsyn.com/rss#77e01251-cb20-4609-b577-d48e985d2e7b
			
 
				+
			
 
				+  This is great, because there's more context there, but it has to read out of
			
 
				+  the RSS feed. We should add a check in the podcast util to sniff out the file
			
 
				+  referenced in the # in that url and populate the info from there. This should
			
 
				+  actually be much more reliable than the current state of the podcast lookup
			
 
				+  which depends on the file to be name properly.
			
 
				 
			
 
				 * Version 26.0 [3/3]
			
 
				 ** DONE Clean up templates for scrobble details :vrobbler:personal:bug:templates:
			
--- a/poetry.lock
+++ b/poetry.lock
@@ -1602,6 +1602,21 @@ files = [
 
				 [package.extras]
			
 
				 devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
			
 
				 
			
 
				+[[package]]
			
 
				+name = "feedparser"
			
 
				+version = "6.0.12"
			
 
				+description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
			
 
				+optional = false
			
 
				+python-versions = ">=3.6"
			
 
				+groups = ["main"]
			
 
				+files = [
			
 
				+    {file = "feedparser-6.0.12-py3-none-any.whl", hash = "sha256:6bbff10f5a52662c00a2e3f86a38928c37c48f77b3c511aedcd51de933549324"},
			
 
				+    {file = "feedparser-6.0.12.tar.gz", hash = "sha256:64f76ce90ae3e8ef5d1ede0f8d3b50ce26bcce71dd8ae5e82b1cd2d4a5f94228"},
			
 
				+]
			
 
				+
			
 
				+[package.dependencies]
			
 
				+sgmllib3k = "*"
			
 
				+
			
 
				 [[package]]
			
 
				 name = "filelock"
			
 
				 version = "3.18.0"
			
@@ -4403,6 +4418,17 @@ enabler = ["pytest-enabler (>=2.2)"]
 
				 test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
			
 
				 type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"]
			
 
				 
			
 
				+[[package]]
			
 
				+name = "sgmllib3k"
			
 
				+version = "1.0.0"
			
 
				+description = "Py3k port of sgmllib."
			
 
				+optional = false
			
 
				+python-versions = "*"
			
 
				+groups = ["main"]
			
 
				+files = [
			
 
				+    {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
			
 
				+]
			
 
				+
			
 
				 [[package]]
			
 
				 name = "shellingham"
			
 
				 version = "1.5.4"
			
@@ -5499,4 +5525,4 @@ cffi = ["cffi (>=1.11)"]
 
				 [metadata]
			
 
				 lock-version = "2.1"
			
 
				 python-versions = ">=3.9,<3.12"
			
 
				-content-hash = "3a483aefea0a3afebf187b17b7df72a158788024ca8121b512b39567fb5ec8ca"
			
 
				+content-hash = "cd3b566597e09aa444f9af30f95f94f922bf3dca71fbd05c887fb10cbc11d7bf"
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ poetry-bumpversion = "^0.3.3"
 
				 orgparse = "^0.4.20250520"
			
 
				 tmdbv3api = "^1.9.0"
			
 
				 themoviedb = "^1.0.2"
			
 
				+feedparser = "^6.0.12"
			
 
				 
			
 
				 [tool.poetry.group.test]
			
 
				 optional = true
			
--- a/vrobbler/apps/podcasts/utils.py
+++ b/vrobbler/apps/podcasts/utils.py
@@ -1,7 +1,9 @@
 
				 import logging
			
 
				 import os
			
 
				+from typing import Any
			
 
				 from urllib.parse import unquote
			
 
				 
			
 
				+import feedparser
			
 
				 from dateutil.parser import ParserError, parse
			
 
				 from podcasts.models import PodcastEpisode
			
 
				 
			
@@ -10,26 +12,80 @@ logger = logging.getLogger(__name__)
 
				 # TODO This should be configurable in settings or per deploy
			
 
				 PODCAST_DATE_FORMAT = "YYYY-MM-DD"
			
 
				 
			
 
				+def parse_duration(d):
			
 
				+    if not d:
			
 
				+        return None
			
 
				+    if d.isdigit():
			
 
				+        return int(d)
			
 
				+    parts = [int(p) for p in d.split(":")]
			
 
				+    while len(parts) < 3:
			
 
				+        parts.insert(0, 0)
			
 
				+    h, m, s = parts
			
 
				+    return h * 3600 + m * 60 + s
			
 
				+
			
 
				+def fetch_metadata_from_rss(uri: str) -> dict[str, Any]:
			
 
				+    log_context = {"mopidy_uri": uri, "media_type": "Podcast"}
			
 
				+    podcast_data: dict[str, Any] = {}
			
 
				+
			
 
				+    try:
			
 
				+        feed = feedparser.parse(uri.split("#")[0])
			
 
				+        target_guid = uri.split("#")[1]
			
 
				+    except IndexError:
			
 
				+        logger.warning("Tried to parse uri as RSS feed, but no target found", extra=log_context)
			
 
				+        return podcast_data
			
 
				+
			
 
				+    podcast_data = {
			
 
				+        "podcast_name": feed.feed.get("title", "Unknown Podcast"),
			
 
				+        "podcast_description": feed.feed.get("description", ""),
			
 
				+        "podcast_link": feed.feed.get("link", ""),
			
 
				+    }
			
 
				+
			
 
				+    for entry in feed.entries:
			
 
				+        if target_guid in target_guid:
			
 
				+            logger.info("🎧 Episode found in RSS feed", extra=log_context)
			
 
				+            podcast_data["episode_name"] = entry.title
			
 
				+            podcast_data["episode_num"] = entry.guid
			
 
				+            podcast_data["episode_pub_date"] = entry.get("published", None)
			
 
				+            podcast_data["episode_description"] = entry.get("description", None)
			
 
				+            podcast_data["episode_url"] = entry.enclosures[0].href if entry.get("enclosures") else None
			
 
				+            podcast_data["episode_runtime_seconds"] = parse_duration(entry.get("itunes_duration", None))
			
 
				+            return podcast_data
			
 
				+    else:
			
 
				+        logger.info("Episode not found in RSS feed.")
			
 
				+
			
 
				+
			
 
				+def parse_mopidy_uri(uri: str) -> dict[str, Any]:
			
 
				+    podcast_data: dict[str, Any] = {}
			
 
				 
			
 
				-def parse_mopidy_uri(uri: str) -> dict:
			
 
				     logger.debug(f"Parsing URI: {uri}")
			
 
				+    if "https://" in uri:
			
 
				+        return fetch_metadata_from_rss(uri)
			
 
				+
			
 
				+
			
 
				     parsed_uri = os.path.splitext(unquote(uri))[0].split("/")
			
 
				 
			
 
				+    podcast_data = {
			
 
				+        "episode_filename": parsed_uri[-1],
			
 
				+        "episode_num": None,
			
 
				+        "podcast_name": parsed_uri[-2].strip(),
			
 
				+        "pub_date": None,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				     episode_str = parsed_uri[-1]
			
 
				-    podcast_name = parsed_uri[-2].strip()
			
 
				     episode_num = None
			
 
				     episode_num_pad = 0
			
 
				 
			
 
				     try:
			
 
				         # Without episode numbers the date will lead
			
 
				-        pub_date = parse(episode_str[0:10])
			
 
				+        podcast_data["pub_date"] = parse(episode_str[0:10])
			
 
				     except ParserError:
			
 
				-        episode_num = int(episode_str.split("-")[0])
			
 
				-        episode_num_pad = len(str(episode_num)) + 1
			
 
				+        podcast_data["episode_num"] = int(episode_str.split("-")[0])
			
 
				+        episode_num_pad = len(str(podcast_data["episode_num"])) + 1
			
 
				 
			
 
				         try:
			
 
				             # Beacuse we have epsiode numbers on
			
 
				-            pub_date = parse(
			
 
				+            podcast_data["pub_date"] = parse(
			
 
				                 episode_str[
			
 
				                     episode_num_pad : len(PODCAST_DATE_FORMAT)
			
 
				                     + episode_num_pad
			
@@ -39,22 +95,19 @@ def parse_mopidy_uri(uri: str) -> dict:
 
				             pub_date = ""
			
 
				 
			
 
				     gap_to_strip = 0
			
 
				-    if pub_date:
			
 
				+    if podcast_data["pub_date"]:
			
 
				         gap_to_strip += len(PODCAST_DATE_FORMAT)
			
 
				-    if episode_num:
			
 
				+    if podcast_data["episode_num"]:
			
 
				         gap_to_strip += episode_num_pad
			
 
				 
			
 
				-    episode_name = episode_str[gap_to_strip:].replace("-", " ").strip()
			
 
				+    podcast_data["episode_name"] = episode_str[gap_to_strip:].replace("-", " ").strip()
			
 
				 
			
 
				-    return {
			
 
				-        "episode_filename": episode_name,
			
 
				-        "episode_num": episode_num,
			
 
				-        "podcast_name": podcast_name,
			
 
				-        "pub_date": pub_date,
			
 
				-    }
			
 
				+    return podcast_data
			
 
				 
			
 
				 
			
 
				 def get_or_create_podcast(post_data: dict) -> PodcastEpisode:
			
 
				+    logger.info("Looking up podcast", extra={"post_data": post_data, "media_type": "Podcast"})
			
 
				+
			
 
				     mopidy_uri = post_data.get("mopidy_uri", "")
			
 
				     parsed_data = parse_mopidy_uri(mopidy_uri)
			
 
				 
			
@@ -66,9 +119,10 @@ def get_or_create_podcast(post_data: dict) -> PodcastEpisode:
 
				     podcast_dict = {"name": podcast_name}
			
 
				 
			
 
				     episode_name = parsed_data.get("episode_filename")
			
 
				+    run_time_seconds = parsed_data.get("episode_runtime_seconds", post_data.get("run_time", 2700))
			
 
				     episode_dict = {
			
 
				         "title": episode_name,
			
 
				-        "run_time_seconds": post_data.get("run_time"),
			
 
				+        "run_time_seconds": run_time_seconds,
			
 
				         "number": parsed_data.get("episode_num"),
			
 
				         "pub_date": parsed_data.get("pub_date"),
			
 
				         "mopidy_uri": mopidy_uri,