secstate
/
vrobbler


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
							import urllib
from typing import Optional
from bs4 import BeautifulSoup
import requests
import logging

logger = logging.getLogger(__name__)

ALLMUSIC_SEARCH_URL = "https://www.allmusic.com/search/{subpath}/{query}"


def strip_and_clean(text):
    return text.strip("\n").rstrip().lstrip()


def get_rating_from_soup(soup) -> Optional[int]:
    rating = None
    try:
        potential_rating = soup.find("div", class_="allmusic-rating")
        if potential_rating:
            rating = int(strip_and_clean(potential_rating.get_text()))
    except ValueError:
        pass
    return rating


def get_review_from_soup(soup) -> str:
    review = ""
    try:
        potential_text = soup.find("div", class_="text")
        if potential_text:
            review = strip_and_clean(potential_text.get_text())
    except ValueError:
        pass
    return review


def scrape_data_from_allmusic(url) -> dict:
    data_dict = {}
    headers = {"User-Agent": "Vrobbler 0.11.12"}
    r = requests.get(url, headers=headers)
    if r.status_code == 200:
        soup = BeautifulSoup(r.text, "html.parser")
        data_dict["rating"] = get_rating_from_soup(soup)
        data_dict["review"] = get_review_from_soup(soup)
    return data_dict


def get_allmusic_slug(artist_name=None, album_name=None) -> str:
    slug = ""
    if not artist_name:
        return slug

    subpath = "artists"
    class_ = "name"
    query = urllib.parse.quote(artist_name)
    if album_name:
        subpath = "albums"
        class_ = "title"
        query = "+".join([query, urllib.parse.quote(album_name)])

    url = ALLMUSIC_SEARCH_URL.format(subpath=subpath, query=query)
    headers = {"User-Agent": "Vrobbler 0.11.12"}
    r = requests.get(url, headers=headers)

    if r.status_code != 200:
        logger.info(f"Bad http response from Allmusic {r}")
        return slug

    soup = BeautifulSoup(r.text, "html.parser")
    results = soup.find("ul", class_="search-results")

    if not results:
        logger.info(f"No search results for {query}")
        return slug

    prime_result = results.findAll("div", class_=class_)

    if not prime_result:
        logger.info(f"Could not find specific result for search {query}")

    result_url = prime_result[0].find_all("a")[0]["href"]
    slug = result_url.split("/")[-1:][0]

    return slug