allmusic.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import urllib
  2. from typing import Optional
  3. from bs4 import BeautifulSoup
  4. import requests
  5. import logging
  6. logger = logging.getLogger(__name__)
  7. ALLMUSIC_SEARCH_URL = "https://www.allmusic.com/search/{subpath}/{query}"
  8. def strip_and_clean(text):
  9. return text.strip("\n").rstrip().lstrip()
  10. def get_rating_from_soup(soup) -> Optional[int]:
  11. rating = None
  12. try:
  13. potential_rating = soup.find("div", class_="allmusic-rating")
  14. if potential_rating:
  15. rating = int(strip_and_clean(potential_rating.get_text()))
  16. except ValueError:
  17. pass
  18. return rating
  19. def get_review_from_soup(soup) -> str:
  20. review = ""
  21. try:
  22. potential_text = soup.find("div", class_="text")
  23. if potential_text:
  24. review = strip_and_clean(potential_text.get_text())
  25. except ValueError:
  26. pass
  27. return review
  28. def scrape_data_from_allmusic(url) -> dict:
  29. data_dict = {}
  30. headers = {"User-Agent": "Vrobbler 0.11.12"}
  31. r = requests.get(url, headers=headers)
  32. if r.status_code == 200:
  33. soup = BeautifulSoup(r.text, "html.parser")
  34. data_dict["rating"] = get_rating_from_soup(soup)
  35. data_dict["review"] = get_review_from_soup(soup)
  36. return data_dict
  37. def get_allmusic_slug(artist_name=None, album_name=None) -> str:
  38. slug = ""
  39. if not artist_name:
  40. return slug
  41. subpath = "artists"
  42. class_ = "name"
  43. query = urllib.parse.quote(artist_name)
  44. if album_name:
  45. subpath = "albums"
  46. class_ = "title"
  47. query = "+".join([query, urllib.parse.quote(album_name)])
  48. url = ALLMUSIC_SEARCH_URL.format(subpath=subpath, query=query)
  49. headers = {"User-Agent": "Vrobbler 0.11.12"}
  50. r = requests.get(url, headers=headers)
  51. if r.status_code != 200:
  52. logger.info(f"Bad http response from Allmusic {r}")
  53. return slug
  54. soup = BeautifulSoup(r.text, "html.parser")
  55. results = soup.find("ul", class_="search-results")
  56. if not results:
  57. logger.info(f"No search results for {query}")
  58. return slug
  59. prime_result = results.findAll("div", class_=class_)
  60. if not prime_result:
  61. logger.info(f"Could not find specific result for search {query}")
  62. result_url = prime_result[0].find_all("a")[0]["href"]
  63. slug = result_url.split("/")[-1:][0]
  64. return slug