Browse Source

Fix scraping of MAME games

Colin Powell 2 years ago
parent
commit
95da4c063e

+ 0 - 1
vrobbler/apps/podcasts/scrapers.py

@@ -1,4 +1,3 @@
-import urllib
 from typing import Optional
 from bs4 import BeautifulSoup
 import requests

+ 6 - 1
vrobbler/apps/videogames/igdb.py

@@ -55,11 +55,16 @@ def lookup_game_id_from_gdb(name: str) -> str:
     return results[0]["game"]
 
 
-def lookup_game_from_igdb(igdb_id: str) -> Dict:
+def lookup_game_from_igdb(name_or_igdb_id: str) -> Dict:
     """Given credsa and an IGDB game ID, lookup the game metadata and return it
     in a dictionary mapped to our internal game fields
 
     """
+    try:
+        igdb_id = int(name_or_igdb_id)
+    except ValueError:
+        igdb_id = lookup_game_id_from_gdb(name_or_igdb_id)
+
     headers = {
         "Authorization": f"Bearer {get_igdb_token()}",
         "Client-ID": IGDB_CLIENT_ID,

+ 6 - 1
vrobbler/apps/videogames/retroarch.py

@@ -9,6 +9,7 @@ from dateutil.parser import ParserError, parse
 from django.apps import apps
 
 from vrobbler.apps.scrobbles.utils import convert_to_seconds
+from vrobbler.apps.videogames.scrapers import scrape_game_name_from_adb
 from vrobbler.apps.videogames.utils import get_or_create_videogame
 
 logger = logging.getLogger(__name__)
@@ -45,7 +46,7 @@ def load_game_data(directory_path: str, user_tz=None) -> dict:
             )
             continue
 
-        game_name = filename.split(" (")[0]
+        game_name = filename.split(".lrtl")[0].split(" (")[0]
         with open("".join([directory_path, filename])) as f:
             games[game_name] = json.load(f)
             # Convert runtime to seconds
@@ -81,6 +82,10 @@ def import_retroarch_lrtl_files(playlog_path: str, user_id: int) -> List[dict]:
     for game_name, game_data in game_logs.items():
         # Use the retroarch name, because we can't change those but may want to
         # tweak the found game
+        mame_name = scrape_game_name_from_adb(game_name)
+        if mame_name:
+            game_name = mame_name
+
         found_game = VideoGame.objects.filter(retroarch_name=game_name).first()
 
         if not found_game:

+ 37 - 0
vrobbler/apps/videogames/scrapers.py

@@ -0,0 +1,37 @@
+import logging
+from typing import Optional
+
+import requests
+from bs4 import BeautifulSoup
+
+logger = logging.getLogger(__name__)
+
+MAME_LOOKUP_URL = "http://adb.arcadeitalia.net/dettaglio_mame.php?game_name={query}&search_id=2"
+
+
+def _strip_and_clean(text):
+    return text.replace("\n", " ").rstrip().lstrip()
+
+
+def _get_title_from_soup(soup) -> Optional[int]:
+    title = None
+    try:
+        title = soup.find("h1", id="page_title").get_text()
+    except ValueError:
+        pass
+    return title
+
+
+def scrape_game_name_from_adb(name: str) -> str:
+    title = ""
+    headers = {"User-Agent": "Vrobbler 0.11.12"}
+    url = MAME_LOOKUP_URL.format(query=name)
+    r = requests.get(url, headers=headers)
+    if r.status_code == 200:
+        soup = BeautifulSoup(r.text, "html.parser")
+        title = _get_title_from_soup(soup).split(" (")[0].split(" / ")[0]
+
+    if title == "Arcade Database":
+        title = ""
+
+    return title

+ 29 - 6
vrobbler/apps/videogames/utils.py

@@ -19,23 +19,37 @@ def get_or_create_videogame(
 
     game_dict = lookup_game_from_hltb(name_or_id)
 
+    if not game_dict:
+        game_dict = lookup_game_from_igdb(name_or_id)
+
     if not game_dict:
         return
 
     # Create missing platforms and prep for loading after create
     platform_ids = []
-    for platform in game_dict.get("platforms", []):
-        p, _created = VideoGamePlatform.objects.get_or_create(name=platform)
-        platform_ids.append(p.id)
-    game_dict.pop("platforms")
+    if "platforms" in game_dict.keys():
+        for platform in game_dict.get("platforms", []):
+            p, _created = VideoGamePlatform.objects.get_or_create(
+                name=platform
+            )
+            platform_ids.append(p.id)
+        game_dict.pop("platforms")
+
+    cover_url = game_dict.pop("cover_url")
+
+    screenshot_url = ""
+    if "screenshot_url" in game_dict.keys():
+        screenshot_url = game_dict.pop("screenshot_url")
+
+    genres = []
+    if "genres" in game_dict.keys():
+        genres = game_dict.pop("genres")
 
     game, game_created = VideoGame.objects.get_or_create(
         hltb_id=game_dict.get("hltb_id")
     )
 
     if game_created or force_update:
-        cover_url = game_dict.pop("cover_url")
-
         VideoGame.objects.filter(pk=game.id).update(**game_dict)
         game.refresh_from_db()
 
@@ -43,6 +57,15 @@ def get_or_create_videogame(
         if platform_ids:
             game.platforms.add(*platform_ids)
 
+        if genres:
+            game.genre.add(*genres)
+
+        if not game.screenshot and screenshot_url:
+            r = requests.get(screenshot_url)
+            if r.status_code == 200:
+                fname = f"{game.title}_{game.uuid}.jpg"
+                game.screenshot.save(fname, ContentFile(r.content), save=True)
+
         # Go get cover image if the URL is present
         if cover_url and not game.hltb_cover:
             headers = {"User-Agent": "Vrobbler 0.11.12"}