Selaa lähdekoodia

[music] Attempts to fix bad lookups from LastFM and Jellyfin

Broader issue was creating tracks without albums that were duplicates of
existing tracks because sometimes Jellyfin and LastFM do not have albums
sent with them.
Colin Powell 1 kuukausi sitten
vanhempi
commit
b0e9f13e11

+ 7 - 15
vrobbler/apps/music/lastfm.py

@@ -1,14 +1,10 @@
 import logging
-import time
-from datetime import datetime, timedelta, UTC
+from datetime import datetime, timedelta
 
 import pylast
 import pytz
 from django.conf import settings
-from django.utils import timezone
-from music.utils import (
-    get_or_create_track,
-)
+from music.models import Track
 
 logger = logging.getLogger(__name__)
 
@@ -47,14 +43,10 @@ class LastFM:
         lastfm_scrobbles = self.get_last_scrobbles(time_from=last_processed)
 
         for lfm_scrobble in lastfm_scrobbles:
-            track = get_or_create_track(
-                lfm_scrobble,
-                {
-                    "TRACK_TITLE": "title",
-                    "ARTIST_NAME": "artist",
-                    "ALBUM_NAME": "album",
-                    "RUN_TIME": "run_time_seconds",
-                },
+            track = Track.find_or_create(
+                title=lfm_scrobble.get("title"),
+                artist_name=lfm_scrobble.get("artist"),
+                album_name=lfm_scrobble.get("album"),
             )
 
             timezone = settings.TIME_ZONE
@@ -149,7 +141,7 @@ class LastFM:
                 continue
 
             # TODO figure out if this will actually work
-            #timestamp = datetime.fromtimestamp(int(scrobble.timestamp), UTC)
+            # timestamp = datetime.fromtimestamp(int(scrobble.timestamp), UTC)
             timestamp = datetime.utcfromtimestamp(
                 int(scrobble.timestamp)
             ).replace(tzinfo=pytz.utc)

+ 18 - 0
vrobbler/apps/music/migrations/0025_artist_alt_names.py

@@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-04-07 00:23
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("music", "0024_alter_track_run_time_seconds"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="artist",
+            name="alt_names",
+            field=models.TextField(blank=True, null=True),
+        ),
+    ]

+ 18 - 0
vrobbler/apps/music/migrations/0026_album_alt_names.py

@@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-04-07 00:34
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("music", "0025_artist_alt_names"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="album",
+            name="alt_names",
+            field=models.TextField(blank=True, null=True),
+        ),
+    ]

+ 223 - 34
vrobbler/apps/music/models.py

@@ -1,13 +1,11 @@
 import logging
-from tempfile import NamedTemporaryFile
 from typing import Dict, Optional
-from urllib.request import urlopen
 from uuid import uuid4
 
 import musicbrainzngs
 import requests
 from django.conf import settings
-from django.core.files.base import ContentFile, File
+from django.core.files.base import ContentFile
 from django.db import models
 from django.urls import reverse
 from django.utils.translation import gettext_lazy as _
@@ -16,6 +14,7 @@ from imagekit.models import ImageSpecField
 from imagekit.processors import ResizeToFit
 from music.allmusic import get_allmusic_slug, scrape_data_from_allmusic
 from music.bandcamp import get_bandcamp_slug
+from music.musicbrainz import lookup_album_dict_from_mb, lookup_track_from_mb
 from music.theaudiodb import lookup_album_from_tadb, lookup_artist_from_tadb
 from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin
 
@@ -24,6 +23,16 @@ BNULL = {"blank": True, "null": True}
 
 
 class Artist(TimeStampedModel):
+    """Represents a music artist.
+
+    # Lookup or create by title alone
+    >>> Artist.find_or_create(name="Bon Iver")
+
+    # Lookup or create by MB id alone
+    >>> Artist.find_or_create(musicbrainz_id="0307edfc-437c-4b48-8700-80680e66a228")
+
+    """
+
     uuid = models.UUIDField(default=uuid4, editable=False, **BNULL)
     name = models.CharField(max_length=255)
     biography = models.TextField(**BNULL)
@@ -46,6 +55,7 @@ class Artist(TimeStampedModel):
         format="JPEG",
         options={"quality": 75},
     )
+    alt_names = models.TextField(**BNULL)
 
     class Meta:
         unique_together = [["name", "musicbrainz_id"]]
@@ -62,8 +72,10 @@ class Artist(TimeStampedModel):
         return ""
 
     @property
-    def mb_link(self):
-        return f"https://musicbrainz.org/artist/{self.musicbrainz_id}"
+    def mb_link(self) -> str:
+        if self.musicbrainz_id:
+            return f"https://musicbrainz.org/artist/{self.musicbrainz_id}"
+        return ""
 
     @property
     def allmusic_link(self):
@@ -104,7 +116,9 @@ class Artist(TimeStampedModel):
         if not self.allmusic_id or force:
             slug = get_allmusic_slug(self.name)
             if not slug:
-                logger.info(f"No allmsuic link for {self}")
+                logger.info(
+                    "No allmusic link found", extra={"track_id": self.id}
+                )
                 return
             self.allmusic_id = slug
             self.save(update_fields=["allmusic_id"])
@@ -113,7 +127,9 @@ class Artist(TimeStampedModel):
         if not self.bandcamp_id or force:
             slug = get_bandcamp_slug(self.name)
             if not slug:
-                logger.info(f"No bandcamp link for {self}")
+                logger.info(
+                    "No bandcamp link found", extra={"track_id": self.id}
+                )
                 return
             self.bandcamp_id = slug
             self.save(update_fields=["bandcamp_id"])
@@ -153,6 +169,61 @@ class Artist(TimeStampedModel):
         artist = self.name.lower()
         return f"https://bandcamp.com/search?q={artist}&item_type=b"
 
+    @classmethod
+    def find_or_create(cls, name: str, musicbrainz_id: str = "") -> "Artist":
+        from music.musicbrainz import lookup_artist_from_mb
+        from music.utils import clean_artist_name
+
+        if not name:
+            raise Exception("Must have name to lookup artist")
+
+        artist = None
+        name = clean_artist_name(name)
+
+        # Check for name/mbid combo, just mbid and then just name
+        if musicbrainz_id:
+            artist = cls.objects.filter(
+                name=name, musicbrainz_id=musicbrainz_id
+            ).first()
+        if not artist:
+            artist = cls.objects.filter(musicbrainz_id=musicbrainz_id).first()
+        if not artist:
+            artist = cls.objects.filter(
+                models.Q(name=name) | models.Q(alt_names__icontains=name)
+            ).first()
+
+        # Does not exist, look it up from Musicbrainz
+        if not artist:
+            alt_name = None
+            try:
+                artist_dict = lookup_artist_from_mb(name)
+                musicbrainz_id = musicbrainz_id or artist_dict.get("id", "")
+                if name != artist_dict.get("name", ""):
+                    alt_name = name
+                    name = artist_dict.get("name", "")
+            except ValueError:
+                pass
+
+            if musicbrainz_id:
+                artist = cls.objects.filter(
+                    musicbrainz_id=musicbrainz_id
+                ).first()
+                if artist and alt_name:
+                    if not artist.alt_names:
+                        artist.alt_names = alt_name
+                    else:
+                        artist.alt_names += f"\\{alt_name}"
+                    artist.save(update_fields=["alt_names"])
+
+        if not artist:
+            artist = cls.objects.create(
+                name=name, musicbrainz_id=musicbrainz_id, alt_names=alt_name
+            )
+            # TODO maybe this should be spun off into an async task?
+            artist.fix_metadata()
+
+        return artist
+
 
 class Album(TimeStampedModel):
     uuid = models.UUIDField(default=uuid4, editable=False, **BNULL)
@@ -196,9 +267,10 @@ class Album(TimeStampedModel):
     wikipedia_slug = models.CharField(max_length=255, **BNULL)
     discogs_id = models.CharField(max_length=255, **BNULL)
     wikidata_id = models.CharField(max_length=255, **BNULL)
+    alt_names = models.TextField(**BNULL)
 
-    def __str__(self):
-        return self.name
+    def __str__(self) -> str:
+        return "{} by {}".format(self.name, self.album_artist)
 
     def get_absolute_url(self):
         return reverse("music:album_detail", kwargs={"slug": self.uuid})
@@ -402,6 +474,69 @@ class Album(TimeStampedModel):
         album = self.name.lower()
         return f"https://bandcamp.com/search?q={album} {artist}&item_type=a"
 
+    @classmethod
+    def find_or_create(
+        cls, name: str, artist_name: str, musicbrainz_id: str = ""
+    ) -> "Album":
+        if not name or not artist_name:
+            raise Exception(
+                "Must have at least name and artist name to lookup album"
+            )
+
+        album = None
+        if musicbrainz_id:
+            album = cls.objects.filter(
+                musicbrainz_id=musicbrainz_id,
+                name=name,
+                album_artist__name=artist_name,
+            ).first()
+        if not album and musicbrainz_id:
+            album = cls.objects.filter(
+                musicbrainz_id=musicbrainz_id,
+            ).first()
+        if not album:
+            album = cls.objects.filter(
+                models.Q(name=name) | models.Q(alt_names__icontains=name),
+                album_artist__name=artist_name,
+            ).first()
+
+        if not album:
+            alt_name = None
+            try:
+                album_dict = lookup_album_dict_from_mb(
+                    name, artist_name=artist_name
+                )
+                musicbrainz_id = musicbrainz_id or album_dict.get("mb_id", "")
+                found_name = album_dict.get("title", "")
+                if found_name and name != found_name:
+                    alt_name = name
+                    name = found_name
+            except ValueError:
+                pass
+
+            if musicbrainz_id:
+                album = cls.objects.filter(
+                    musicbrainz_id=musicbrainz_id
+                ).first()
+                if album and alt_name:
+                    if not album.alt_names:
+                        album.alt_names = alt_name
+                    else:
+                        album.alt_names += f"\\{alt_name}"
+                    album.save(update_fields=["alt_names"])
+            if not album:
+                artist = Artist.find_or_create(name=artist_name)
+                album = cls.objects.create(
+                    name=name,
+                    album_artist=artist,
+                    musicbrainz_id=musicbrainz_id,
+                    alt_names=alt_name,
+                )
+                # TODO maybe do this in a separate process?
+                album.fix_metadata()
+
+        return album
+
 
 class Track(ScrobblableMixin):
     COMPLETION_PERCENT = getattr(settings, "MUSIC_COMPLETION_PERCENT", 100)
@@ -425,8 +560,8 @@ class Track(ScrobblableMixin):
         return reverse("music:track_detail", kwargs={"slug": self.uuid})
 
     @property
-    def subtitle(self):
-        return self.artist
+    def subtitle(self) -> str:
+        return str(self.artist)
 
     @property
     def strings(self) -> ScrobblableConstants:
@@ -451,31 +586,85 @@ class Track(ScrobblableMixin):
 
     @classmethod
     def find_or_create(
-        cls, artist_dict: Dict, album_dict: Dict, track_dict: Dict
-    ) -> Optional["Track"]:
-        """Given a data dict from Jellyfin, does the heavy lifting of looking up
-        the video and, if need, TV Series, creating both if they don't yet
-        exist.
-
-        """
-        if not artist_dict.get("name") or not artist_dict.get(
-            "musicbrainz_id"
-        ):
-            logger.warning(
-                f"No artist or artist musicbrainz ID found in message from source, not scrobbling"
-            )
-            return
-
-        artist, artist_created = Artist.objects.get_or_create(**artist_dict)
-        album, album_created = Album.objects.get_or_create(**album_dict)
+        cls,
+        title: str = "",
+        musicbrainz_id: str = "",
+        album_name: str = "",
+        artist_name: str = "",
+        enrich: bool = True,
+        run_time_seconds: Optional[int] = None,
+    ) -> "Track":
+        # TODO we can use Q to build queries here based on whether we have mbid and album name
+        track = None
+        # Full look up with MB ID
+        if album_name:
+            track = cls.objects.filter(
+                musicbrainz_id=musicbrainz_id,
+                title=title,
+                artist__name=artist_name,
+                album__name=album_name,
+            ).first()
+        # Full look up without album
+        if not track:
+            track = cls.objects.filter(
+                musicbrainz_id=musicbrainz_id,
+                title=title,
+                artist__name=artist_name,
+            ).first()
 
-        album.fix_metadata()
-        if not album.cover_image:
-            album.fetch_artwork()
+        # Full look up without MB ID
+        if not track:
+            track = cls.objects.filter(
+                title=title,
+                artist__name=artist_name,
+                album__name=album_name,
+            ).first()
+        # Base look up without MB ID or album
+        if not track:
+            track = cls.objects.filter(
+                title=title,
+                artist__name=artist_name,
+            ).first()
 
-        track_dict["album_id"] = getattr(album, "id", None)
-        track_dict["artist_id"] = artist.id
+        if not track and enrich:
+            track_dict = lookup_track_from_mb(title, artist_name, album_name)
+            musicbrainz_id = musicbrainz_id or track_dict.get("id", "")
+            # TODO This only works some of the time
+            # try:
+            #    album_name = album_name or track_dict.get("release-list")[
+            #        0
+            #    ].get("title", "")
+            # except IndexError:
+            #    pass
+            if not run_time_seconds:
+                run_time_seconds = int(
+                    int(track_dict.get("length", 900000)) / 1000
+                )
+            if title != track_dict.get("name", "") and track_dict.get(
+                "name", False
+            ):
 
-        track, created = cls.objects.get_or_create(**track_dict)
+                title = track_dict.get("name", "")
+
+            if musicbrainz_id:
+                track = cls.objects.filter(
+                    musicbrainz_id=musicbrainz_id
+                ).first()
+            if not track:
+                artist = Artist.find_or_create(name=artist_name)
+                album = None
+                if album_name:
+                    album = Album.find_or_create(
+                        name=album_name, artist_name=artist_name
+                    )
+                track = cls.objects.create(
+                    title=title,
+                    album=album,
+                    musicbrainz_id=musicbrainz_id,
+                    artist=artist,
+                    run_time_seconds=run_time_seconds,
+                )
+                # TODO maybe do this in a separate process?
+                track.fix_metadata()
 
         return track

+ 38 - 16
vrobbler/apps/music/utils.py

@@ -16,9 +16,8 @@ logger = logging.getLogger(__name__)
 from music.models import Album, Artist, Track
 
 
-def get_or_create_artist(name: str, mbid: str = None) -> Artist:
-    artist = None
-
+def clean_artist_name(name: str) -> str:
+    """Remove featured names from artist string."""
     if "feat." in name.lower():
         name = re.split("feat.", name, flags=re.IGNORECASE)[0].strip()
     if "featuring" in name.lower():
@@ -26,18 +25,44 @@ def get_or_create_artist(name: str, mbid: str = None) -> Artist:
     if "&" in name.lower():
         name = re.split("&", name, flags=re.IGNORECASE)[0].strip()
 
-    artist_dict = lookup_artist_from_mb(name)
-    mbid = mbid or artist_dict.get("id", None)
+    return name
+
+
+# TODO These are depreacted, remove them eventually
+def get_or_create_artist(name: str, mbid: str = "") -> Artist:
+    """Get an Artist object from the database.
+
+    Check if an artist with this name or Musicbrainz ID already exists.
+    Otherwise, go lookup artist data from Musicbrainz and create one.
+
+    """
+    artist = None
+    name = clean_artist_name(name)
 
-    if mbid:
+    # Check for name/mbid combo, just mbid and then just name
+    artist = Artist.objects.filter(name=name, mbid=mbid).first()
+    if not artist:
         artist = Artist.objects.filter(musicbrainz_id=mbid).first()
+    if not artist:
+        artist = Artist.objects.filter(name=name).first()
+
+    # Does not exist, look it up from Musicbrainz
+    if not artist:
+        artist_dict = lookup_artist_from_mb(name)
+        mbid = mbid or artist_dict.get("id", "")
+
+        if mbid:
+            artist = Artist.objects.filter(musicbrainz_id=mbid).first()
+
     if not artist:
         artist = Artist.objects.create(name=name, musicbrainz_id=mbid)
+        # TODO maybe this should be spun off into an async task?
         artist.fix_metadata()
 
     return artist
 
 
+# TODO These are depreacted, remove them eventually
 def get_or_create_album(
     name: str, artist: Artist, mbid: str = None
 ) -> Optional[Album]:
@@ -90,6 +115,7 @@ def get_or_create_album(
     return album
 
 
+# TODO These are depreacted, remove them eventually
 def get_or_create_track(post_data: dict, post_keys: dict) -> Track:
     try:
         track_run_time_seconds = int(
@@ -107,16 +133,12 @@ def get_or_create_track(post_data: dict, post_keys: dict) -> Track:
     track_title = post_data.get(post_keys.get("TRACK_TITLE"), "")
     track_mb_id = post_data.get(post_keys.get("TRACK_MB_ID"), "")
 
-    artist = get_or_create_artist(
-        artist_name,
-        mbid=artist_mb_id,
-    )
+    artist = Artist.find_or_create(artist_name, artist_mb_id)
     album = None
-    if album_mb_id:
-        album = get_or_create_album(
-            album_title,
-            artist=artist,
-            mbid=album_mb_id,
+    # We may get no album ID or title, in which case, skip
+    if album_mb_id or album_title:
+        album = Album.find_or_create(
+            album_title, str(artist.name), album_mb_id
         )
 
     track = None
@@ -154,7 +176,7 @@ def get_or_create_track(post_data: dict, post_keys: dict) -> Track:
     return track
 
 
-def get_or_create_various_artists():
+def get_or_create_various_artists() -> Artist:
     artist = Artist.objects.filter(name="Various Artists").first()
     if not artist:
         artist = Artist.objects.create(**VARIOUS_ARTIST_DICT)

+ 3 - 3
vrobbler/apps/scrobbles/mixins.py

@@ -124,12 +124,12 @@ class ScrobblableMixin(TimeStampedModel):
         logger.warning("fix_metadata() not implemented yet")
 
     @classmethod
-    def find_or_create(cls) -> None:
+    def find_or_create(cls):
         logger.warning("find_or_create() not implemented yet")
 
-    def __str__(self):
+    def __str__(self) -> str:
         if self.title:
-            return self.title
+            return str(self.title)
         return str(self.uuid)
 
 

+ 11 - 5
vrobbler/apps/scrobbles/scrobblers.py

@@ -1,8 +1,7 @@
-from datetime import datetime
 import logging
 import re
+from datetime import datetime
 from typing import Optional
-from urllib.parse import parse_qs, urlparse
 
 import pendulum
 import pytz
@@ -56,7 +55,11 @@ def mopidy_scrobble_media(post_data: dict, user_id: int) -> Scrobble:
     if media_type == Scrobble.MediaType.PODCAST_EPISODE:
         media_obj = get_or_create_podcast(post_data)
     else:
-        media_obj = get_or_create_track(post_data, MOPIDY_POST_KEYS)
+        media_obj = Track.find_or_create(
+            title=post_data.get("title", ""),
+            album_name=post_data.get("album", ""),
+            run_time_seconds=post_data.get("run_time", 900000),
+        )
 
     log = {}
     try:
@@ -109,8 +112,11 @@ def jellyfin_scrobble_media(
             post_data.get("Provider_imdb", "").replace("tt", "")
         )
     else:
-        media_obj = get_or_create_track(
-            post_data, post_keys=JELLYFIN_POST_KEYS
+        media_obj = Track.find_or_create(
+            title=post_data.get("Name", ""),
+            album_name=post_data.get("Album", ""),
+            run_time_seconds=post_data.get("RunTime", 900000),
+            musicbrainz_id=post_data.get("Provider_musicbrainztrack", ""),
         )
         # A hack because we don't worry about updating music ... we either finish it or we don't
         playback_position_seconds = 0

+ 6 - 27
vrobbler/apps/scrobbles/tsv.py

@@ -1,18 +1,12 @@
 import codecs
 import csv
 import logging
-from datetime import datetime
 
 import pytz
 import requests
-from music.utils import (
-    get_or_create_album,
-    get_or_create_artist,
-    get_or_create_track,
-)
+from music.models import Track
 from scrobbles.constants import AsTsvColumn
 from scrobbles.models import Scrobble
-from music.constants import MOPIDY_POST_KEYS
 
 from scrobbles.utils import timestamp_user_tz_to_utc
 
@@ -50,27 +44,12 @@ def process_audioscrobbler_tsv_file(file_path, user_id, user_tz=None):
             )
             continue
 
-        track = get_or_create_track(
-            {
-                "title": row[AsTsvColumn["TRACK_NAME"].value],
-                "mbid": row[AsTsvColumn["MB_ID"].value],
-                "artist_name": row[AsTsvColumn["ARTIST_NAME"].value],
-                "album_name": row[AsTsvColumn["ALBUM_NAME"].value],
-                "run_time_seconds": int(
-                    row[AsTsvColumn["RUN_TIME_SECONDS"].value]
-                ),
-            },
-            {
-                "TRACK_MB_ID": "mbid",
-                "TRACK_TITLE": "track_title",
-                "ALBUM_NAME": "album_name",
-                "ARTIST_NAME": "artist_name",
-                "RUN_TIME": "run_time_seconds",
-            },
+        track = Track.find_or_create(
+            title=row[AsTsvColumn["TRACK_NAME"].value],
+            musicbrainz_id=row[AsTsvColumn["MB_ID"].value],
+            artist_name=row[AsTsvColumn["ARTIST_NAME"].value]
+            album_name=row[AsTsvColumn["ALBUM_NAME"].value]
         )
-        if not track:
-            logger.info(f"Skipping track {track} because not found")
-            continue
 
         # TODO Set all this up as constants
         if row[AsTsvColumn["COMPLETE"].value] == "S":