Ver código fonte

[music] Fix find_or_create for tracks

Colin Powell 4 dias atrás
pai
commit
b1b67528bf
2 arquivos alterados com 66 adições e 247 exclusões
  1. 64 87
      vrobbler/apps/music/models.py
  2. 2 160
      vrobbler/apps/music/utils.py

+ 64 - 87
vrobbler/apps/music/models.py

@@ -14,14 +14,14 @@ from imagekit.models import ImageSpecField
 from imagekit.processors import ResizeToFit
 from music.allmusic import get_allmusic_slug, scrape_data_from_allmusic
 from music.bandcamp import get_bandcamp_slug
-from music.musicbrainz import lookup_album_dict_from_mb, lookup_track_from_mb
+from music.musicbrainz import lookup_album_dict_from_mb, lookup_album_from_mb, lookup_track_from_mb, lookup_artist_from_mb
 from music.theaudiodb import lookup_album_from_tadb, lookup_artist_from_tadb
+from music.utils import clean_artist_name
 from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin
 
 logger = logging.getLogger(__name__)
 BNULL = {"blank": True, "null": True}
 
-
 class Artist(TimeStampedModel):
     """Represents a music artist.
 
@@ -170,23 +170,20 @@ class Artist(TimeStampedModel):
         return f"https://bandcamp.com/search?q={artist}&item_type=b"
 
     @classmethod
-    def find_or_create(cls, name: str, musicbrainz_id: str = "") -> "Artist":
-        from music.musicbrainz import lookup_artist_from_mb
-        from music.utils import clean_artist_name
+    def find_or_create(cls, name: str = "", musicbrainz_id: str = "") -> "Artist":
+        keys = {}
+        if name:
+            name = clean_artist_name(name)
+            keys["name"] = name
 
-        if not name:
-            raise Exception("Must have name to lookup artist")
+        if musicbrainz_id:
+            keys["musicbrainz_id"] = musicbrainz_id
 
-        artist = None
-        name = clean_artist_name(name)
+        if not keys:
+            raise Exception("Must have name, mb_id or both to lookup artist")
+
+        artist = cls.objects.filter(**keys).first()
 
-        # Check for name/mbid combo, just mbid and then just name
-        if musicbrainz_id:
-            artist = cls.objects.filter(
-                name=name, musicbrainz_id=musicbrainz_id
-            ).first()
-        if not artist:
-            artist = cls.objects.filter(musicbrainz_id=musicbrainz_id).first()
         if not artist:
             artist = cls.objects.filter(
                 models.Q(name=name) | models.Q(alt_names__icontains=name)
@@ -593,87 +590,67 @@ class Track(ScrobblableMixin):
             url = self.album.cover_image_medium.url
         return url
 
+
     @classmethod
     def find_or_create(
-        cls,
-        title: str = "",
-        musicbrainz_id: str = "",
-        album_name: str = "",
-        artist_name: str = "",
-        enrich: bool = True,
-        run_time_seconds: Optional[int] = None,
+            cls,
+            title: str = "",
+            artist_name: str = "",
+            musicbrainz_id: str = "",
+            album_name: str = "",
+            run_time_seconds: int = 900,
+            enrich: bool = False,
+            commit: bool = True
     ) -> "Track":
-        # TODO we can use Q to build queries here based on whether we have mbid and album name
-        track = None
-        # Full look up with MB ID
-        if album_name:
-            track = cls.objects.filter(
-                musicbrainz_id=musicbrainz_id,
-                title=title,
-                artist__name=artist_name,
-                album__name=album_name,
-            ).first()
-        # Full look up without album
-        if not track:
-            track = cls.objects.filter(
-                musicbrainz_id=musicbrainz_id,
-                title=title,
-                artist__name=artist_name,
-            ).first()
+        """Given a name, try to find the track by the artist from Musicbrainz.
 
-        # Full look up without MB ID
-        if not track:
-            track = cls.objects.filter(
-                title=title,
-                artist__name=artist_name,
-                album__name=album_name,
-            ).first()
-        # Base look up without MB ID or album
-        if not track:
-            track = cls.objects.filter(
-                title=title,
-                artist__name=artist_name,
-            ).first()
+        As a basic conceit we trust the source for giving us the track and artist
+        name
+
+        Optionally, we can update any found artists with overwrite."""
+        created = False
+        if musicbrainz_id:
+            track = cls.objects.filter(musicbrainz_id=musicbrainz_id).first()
+            artist = track.artist
+            if not track and not (title and album_name):
+                raise Exception("Cannot find track with musicbrainz_id and no track title or artist name provided.")
+        else:
+            artist = Artist.find_or_create(artist_name)
+            track, created = cls.objects.get_or_create(title=title, artist=artist)
+
+        if not created:
+            logger.info("Found exact match for track by name and artist", extra={"title": title, "artist_name": artist_name, "track_id": track.id})
+
+            if track.album and album_name != track.album.name:
+                # TODO found track, but it's on a different album ... associations?
+                logger.info("Found track by artist, but album is different.")
+                album = Album.find_or_create()
+
+        if enrich:
+            album = None
+            if album_name:
+                album = Album.find_or_create(album_name)
+
+            if artist.musicbrainz_id:
+                track_dict = lookup_track_from_mb(title, artist.musicbrainz_id)
+                musicbrainz_id = musicbrainz_id or track_dict.get("id", "")
+
+                found_title: bool = track_dict.get("name", False)
+                mismatched_title: bool = title != track_dict.get("name", "")
+                if found_title and mismatched_title:
+                    logger.warning("Source track title and found title do not match", extra={"title": title, "track_dict": track_dict})
 
-        if not track and enrich:
-            track_dict = lookup_track_from_mb(title, artist_name, album_name)
-            musicbrainz_id = musicbrainz_id or track_dict.get("id", "")
-            # TODO This only works some of the time
-            # try:
-            #    album_name = album_name or track_dict.get("release-list")[
-            #        0
-            #    ].get("title", "")
-            # except IndexError:
-            #    pass
             if not run_time_seconds:
                 run_time_seconds = int(
                     int(track_dict.get("length", 900000)) / 1000
                 )
-            if title != track_dict.get("name", "") and track_dict.get(
-                "name", False
-            ):
 
-                title = track_dict.get("name", "")
 
-            if musicbrainz_id:
-                track = cls.objects.filter(
-                    musicbrainz_id=musicbrainz_id
-                ).first()
-            if not track:
-                artist = Artist.find_or_create(name=artist_name)
-                album = None
-                if album_name:
-                    album = Album.find_or_create(
-                        name=album_name, artist_name=artist_name
-                    )
-                track = cls.objects.create(
-                    title=title,
-                    album=album,
-                    musicbrainz_id=musicbrainz_id,
-                    artist=artist,
-                    run_time_seconds=run_time_seconds,
-                )
-                # TODO maybe do this in a separate process?
-                track.fix_metadata()
+            track.album = album
+            track.artist = artist
+            track.run_time_seconds = run_time_seconds
+            if commit:
+                track.save()
+                # TODO Also set cover art and tags
 
         return track

+ 2 - 160
vrobbler/apps/music/utils.py

@@ -1,21 +1,10 @@
 import logging
 import re
-from typing import Optional
 
-from music.musicbrainz import (
-    lookup_album_dict_from_mb,
-    lookup_artist_from_mb,
-    lookup_track_from_mb,
-)
 from music.constants import VARIOUS_ARTIST_DICT
-from scrobbles.utils import convert_to_seconds
 
 logger = logging.getLogger(__name__)
 
-
-from music.models import Album, Artist, Track
-
-
 def clean_artist_name(name: str) -> str:
     """Remove featured names from artist string."""
     if "feat." in name.lower():
@@ -27,156 +16,9 @@ def clean_artist_name(name: str) -> str:
 
     return name
 
+def get_or_create_various_artists() -> "Artist":
+    from music.models import Artist
 
-# TODO These are depreacted, remove them eventually
-def get_or_create_artist(name: str, mbid: str = "") -> Artist:
-    """Get an Artist object from the database.
-
-    Check if an artist with this name or Musicbrainz ID already exists.
-    Otherwise, go lookup artist data from Musicbrainz and create one.
-
-    """
-    artist = None
-    name = clean_artist_name(name)
-
-    # Check for name/mbid combo, just mbid and then just name
-    artist = Artist.objects.filter(name=name, mbid=mbid).first()
-    if not artist:
-        artist = Artist.objects.filter(musicbrainz_id=mbid).first()
-    if not artist:
-        artist = Artist.objects.filter(name=name).first()
-
-    # Does not exist, look it up from Musicbrainz
-    if not artist:
-        artist_dict = lookup_artist_from_mb(name)
-        mbid = mbid or artist_dict.get("id", "")
-
-        if mbid:
-            artist = Artist.objects.filter(musicbrainz_id=mbid).first()
-
-    if not artist:
-        artist = Artist.objects.create(name=name, musicbrainz_id=mbid)
-        # TODO maybe this should be spun off into an async task?
-        artist.fix_metadata()
-
-    return artist
-
-
-# TODO These are depreacted, remove them eventually
-def get_or_create_album(
-    name: str, artist: Artist, mbid: str = None
-) -> Optional[Album]:
-    album = None
-    album_dict = lookup_album_dict_from_mb(name, artist_name=artist.name)
-
-    name = name or album_dict.get("title", None)
-    if not name:
-        logger.debug(
-            f"Cannot get or create album by {artist} with no name ({name})"
-        )
-        return
-
-    album = Album.objects.filter(
-        musicbrainz_id=mbid, name=name, artists__in=[artist]
-    ).first()
-
-    if not album:
-        mbid_group = album_dict.get("mb_group_id")
-        album = Album.objects.filter(
-            musicbrainz_releasegroup_id=mbid_group
-        ).first()
-
-    if not album and name:
-        mbid = mbid or album_dict["mb_id"]
-        album, album_created = Album.objects.get_or_create(musicbrainz_id=mbid)
-        if album_created:
-            album.name = name
-            album.year = album_dict["year"]
-            album.musicbrainz_releasegroup_id = album_dict["mb_group_id"]
-            album.musicbrainz_albumartist_id = artist.musicbrainz_id
-            album.save(
-                update_fields=[
-                    "name",
-                    "musicbrainz_id",
-                    "year",
-                    "musicbrainz_releasegroup_id",
-                    "musicbrainz_albumartist_id",
-                ]
-            )
-            album.artists.add(artist)
-            album.fix_album_artist()
-            album.fetch_artwork()
-            album.scrape_allmusic()
-
-    if not album:
-        logger.warn(f"No album found for {name} and {mbid}")
-
-    album.fix_album_artist()
-    return album
-
-
-# TODO These are depreacted, remove them eventually
-def get_or_create_track(post_data: dict, post_keys: dict) -> Track:
-    try:
-        track_run_time_seconds = int(
-            post_data.get(post_keys.get("RUN_TIME"), 0)
-        )
-    except ValueError:  # Sometimes we get run time as a string like "01:35"
-        track_run_time_seconds = convert_to_seconds(
-            post_data.get(post_keys.get("RUN_TIME"), 0)
-        )
-
-    artist_name = post_data.get(post_keys.get("ARTIST_NAME"), "")
-    artist_mb_id = post_data.get(post_keys.get("ARTIST_MB_ID"), "")
-    album_title = post_data.get(post_keys.get("ALBUM_NAME"), "")
-    album_mb_id = post_data.get(post_keys.get("ALBUM_MB_ID"), "")
-    track_title = post_data.get(post_keys.get("TRACK_TITLE"), "")
-    track_mb_id = post_data.get(post_keys.get("TRACK_MB_ID"), "")
-
-    artist = Artist.find_or_create(artist_name, artist_mb_id)
-    album = None
-    # We may get no album ID or title, in which case, skip
-    if album_mb_id or album_title:
-        album = Album.find_or_create(
-            album_title, str(artist.name), album_mb_id
-        )
-
-    track = None
-    if not track_mb_id and album:
-        try:
-            track_mb_id = lookup_track_from_mb(
-                track_title,
-                artist.musicbrainz_id,
-                album.musicbrainz_id,
-            ).get("id", 0)
-        except TypeError:
-            pass
-
-    if not track_title and not track_mb_id:
-        logger.info(
-            "Cannot find track without either title or MB ID",
-            extra={"post_data": post_data},
-        )
-        return
-
-    if track_mb_id:
-        track = Track.objects.filter(musicbrainz_id=track_mb_id).first()
-
-    if not track and track_title:
-        track = Track.objects.filter(title=track_title, artist=artist).first()
-
-    if not track:
-        track = Track.objects.create(
-            title=track_title,
-            artist=artist,
-            album=album,
-            musicbrainz_id=track_mb_id,
-            run_time_seconds=track_run_time_seconds,
-        )
-    return track
-
-
-def get_or_create_various_artists() -> Artist:
     artist = Artist.objects.filter(name="Various Artists").first()
     if not artist:
         artist = Artist.objects.create(**VARIOUS_ARTIST_DICT)