openlibrary.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. import json
  2. import logging
  3. import urllib
  4. import requests
  5. logger = logging.getLogger(__name__)
  6. SEARCH_URL = "https://openlibrary.org/search.json?title={title}"
  7. ISBN_URL = "https://openlibrary.org/isbn/{isbn}.json"
  8. SEARCH_URL = "https://openlibrary.org/search.json?title={title}"
  9. COVER_URL = "https://covers.openlibrary.org/b/olid/{id}-L.jpg"
  10. AUTHOR_URL = "https://openlibrary.org/authors/{id}.json"
  11. AUTHOR_IMAGE_URL = "https://covers.openlibrary.org/a/olid/{id}-L.jpg"
  12. def get_first(key: str, result: dict) -> str:
  13. obj = ""
  14. if obj_list := result.get(key):
  15. obj = obj_list[0]
  16. return obj
  17. def lookup_author_from_openlibrary(olid: str) -> dict:
  18. author_url = AUTHOR_URL.format(id=olid)
  19. response = requests.get(author_url)
  20. if response.status_code != 200:
  21. logger.warn(f"Bad response from OL: {response.status_code}")
  22. return {}
  23. results = json.loads(response.content)
  24. if not results:
  25. logger.warn(f"No author results found from OL for {olid}")
  26. return {}
  27. remote_ids = results.get("remote_ids", {})
  28. bio = ""
  29. if results.get("bio"):
  30. try:
  31. bio = results.get("bio").get("value")
  32. except AttributeError:
  33. bio = results.get("bio")
  34. return {
  35. "name": results.get("name"),
  36. "openlibrary_id": olid,
  37. "wikipedia_url": results.get("wikipedia"),
  38. "wikidata_id": remote_ids.get("wikidata"),
  39. "isni": remote_ids.get("isni"),
  40. "goodreads_id": remote_ids.get("goodreads"),
  41. "librarything_id": remote_ids.get("librarything"),
  42. "amazon_id": remote_ids.get("amazon"),
  43. "bio": bio,
  44. "author_headshot_url": AUTHOR_IMAGE_URL.format(id=olid),
  45. }
  46. def lookup_book_from_openlibrary(title: str, author: str = None) -> dict:
  47. title_quoted = urllib.parse.quote(title)
  48. search_url = SEARCH_URL.format(title=title_quoted)
  49. response = requests.get(search_url)
  50. if response.status_code != 200:
  51. logger.warn(f"Bad response from OL: {response.status_code}")
  52. return {}
  53. results = json.loads(response.content)
  54. if len(results.get("docs")) == 0:
  55. logger.warn(f"No results found from OL for {title}")
  56. return {}
  57. top = results.get("docs")[0]
  58. ol_id = top.get("cover_edition_key")
  59. ol_author_id = get_first("author_key", top)
  60. first_sentence = ""
  61. if top.get("first_sentence"):
  62. try:
  63. first_sentence = top.get("first_sentence")[0].get("value")
  64. except AttributeError:
  65. first_sentence = top.get("first_sentence")[0]
  66. isbn = None
  67. if top.get("isbn"):
  68. isbn = top.get("isbn")[0]
  69. return {
  70. "title": top.get("title"),
  71. "isbn": isbn,
  72. "openlibrary_id": ol_id,
  73. "goodreads_id": get_first("id_goodreads", top),
  74. "first_publish_year": top.get("first_publish_year"),
  75. "first_sentence": first_sentence,
  76. "pages": top.get("number_of_pages_median", None),
  77. "cover_url": COVER_URL.format(id=ol_id),
  78. "ol_author_id": ol_author_id,
  79. }