def get_game_datas(game_id, title): gamedata = { 'title': "", 'genre': "", 'release': "", 'studio': "", 'plot': "" } try: gamedata["title"] = title f = urllib.urlopen('http://www.mobygames.com' + game_id) page = f.read().replace('\r\n', '').replace('\n', '') game_genre = re.findall('<a href="/genre/(.*?)">(.*?)</a>', page) if game_genre: gamedata["genre"] = scrapers.format_html_codes(game_genre[0][1]) game_release = re.findall('/release-info">(.*?)</a>', page) if game_release: gamedata["release"] = game_release[1][-4:] game_studio = re.findall('Developed by(.*?)<a href="(.*?)">(.*?)</a>', page) if game_studio: gamedata["studio"] = scrapers.format_html_codes(game_studio[0][2]) game_plot = re.findall('Description</h2>(.*?)<div class', page) if game_plot: p = re.compile(r'<.*?>') gamedata["plot"] = scrapers.format_html_codes(p.sub('', game_plot[0])) return scrapers.format_html_codes(gamedata) except: return gamedata
def get_game_datas(game_id, title): gamedata = { 'title': "", 'genre': "", 'release': "", 'studio': "", 'plot': "" } try: gamedata["title"] = title f = urllib.urlopen("http://thegamesdb.net/api/GetGame.php?id=" + game_id) page = f.read().replace('\n', '') game_genre = ' / '.join(re.findall('<genre>(.*?)</genre>', page)) if game_genre: gamedata["genre"] = scrapers.format_html_codes(game_genre) game_release = ''.join(re.findall('<ReleaseDate>(.*?)</ReleaseDate>', page)) if game_release: gamedata["release"] = scrapers.format_html_codes(game_release[-4:]) game_studio = ''.join(re.findall('<Developer>(.*?)</Developer>', page)) if game_studio: gamedata["studio"] = scrapers.format_html_codes(game_studio) game_plot = ''.join(re.findall('<Overview>(.*?)</Overview>', page)) if game_plot: gamedata["plot"] = scrapers.format_html_codes(game_plot) return gamedata except: return None
def get_games_by_name(search): results = [] try: f = urllib.urlopen('http://www.mobygames.com/search/quick?q=' + search.replace(' ', '+') + '&sFilter=1&sG=on') for line in f.readlines(): if 'searchNumber' in line: split_games = re.findall('Game: (.*?)</span></div>', line) for games in split_games: game_title = re.findall('<a href="(.*?)">(.*?)</a>', games) split_versions = re.findall('nowrap"><a href="(.*?)">(.*?)</a> ', games) if split_versions: for version in split_versions: game = {} game["title"] = scrapers.format_html_codes(game_title[0][1]) game["id"] = 'http://www.mobygames.com' + version[0] game["system"] = version[1] results.append(game) else: game = {} game["title"] = scrapers.format_html_codes(game_title[0][1].replace('&', '&').replace(''', "'")) one_version = re.findall('nowrap">(.*?) \(', games) game["id"] = 'http://www.mobygames.com' + game_title[0][0] game["system"] = one_version[0] results.append(game) return results except: return results
def get_games_with_system(search, system): scraper_sysid = __scrapermap__[system] results = [] try: f = urllib.urlopen('http://www.mobygames.com/search/quick?q=' + search.replace(' ', '+') + '&p=' + scraper_sysid + '&sFilter=1&sG=on') for line in f.readlines(): if 'searchNumber' in line: split_games = re.findall('Game: (.*?)</span></div>', line) for games in split_games: game_title = re.findall('<a href="(.*?)">(.*?)</a>', games) split_versions = re.findall('nowrap"><a href="(.*?)">(.*?)</a> ', games) if split_versions: for version in split_versions: game = {} game["title"] = scrapers.format_html_codes(game_title[0][1]) game["id"] = 'http://www.mobygames.com' + version[0] game["system"] = system results.append(game) else: game = {} game["title"] = scrapers.format_html_codes(game_title[0][1]) game["id"] = game_title[0][0] game["system"] = system results.append(game) return results except: return results
def get_game_datas(game_id, title): gamedata = { 'title': "", 'genre': "", 'release': "", 'studio': "", 'plot': "" } try: gamedata["title"] = title f = urllib.urlopen("http://www.gamefaqs.com/" + str(game_id)) page = f.read().replace('\r\n', '') game_genre = re.findall(r'</a> » <a href="(.*?)">(.*?)</a> » <a href="/', page) if game_genre: gamedata["genre"] = game_genre[0][1] game_release = re.findall(r'Release: <a href="(.*?)">(.*?) »</a>', page) if game_release: gamedata["release"] = game_release[0][1][-4:] game_studio = re.findall(r'<ul><li><a href="/features/company/(.*?)">(.*?)</a></li>', page) if game_studio: p = re.compile(r'<.*?>') gamedata["studio"] = p.sub('', game_studio[0][1]) game_plot = re.findall(r'Description</h2></div><div class="body"><div class="details">(.*?)</div></div>', page) if game_plot: gamedata["plot"] = scrapers.format_html_codes(game_plot[0]) return gamedata except: return gamedata
def get_game_datas(game_id, title): gamedata = { 'title': "", 'genre': "", 'release': "", 'studio': "", 'plot': "" } try: gamedata["title"] = title f = urllib.urlopen('http://www.allgame.com/game.php?id=' + game_id) page = str(f.readlines()) game_genre = ''.join(re.findall('<a href="genre.php[^>]*>(.*?)</a>', page)) if game_genre: gamedata["genre"] = game_genre release_date = re.findall('<h3>Release Date</h3>[^>]*>(.*?)</p>', page) if release_date: gamedata["release"] = release_date[0][-4:] game_studio = re.findall('<h3>Developer</h3>[^>]*>(.*?)</p>', page) if game_studio: p = re.compile(r'<.*?>') gamestudio = p.sub('', game_studio[0]) if gamestudio: gamedata["studio"] = gamestudio.rstrip() plot = re.findall('<h2[^>]*>(.*?)</p>(.*?)<p>(.*?)</p>', page) if plot: p = re.compile(r'<.*?>') gamedata["plot"] = scraperutils.format_html_codes(p.sub('', plot[0][2])) return gamedata except: return gamedata
def get_games_with_system(search, system): scraper_sysid = __scrapermap__[system] results = [] try: f = urllib.urlopen('http://www.gamefaqs.com/search/index.html?platform={0}&game={1}' .format(scraper_sysid, search.replace(' ', '+'))) gets = re.findall(r'\s+?<a href="(.*?)"\s+?>(.*?)</a></td>', f.read().replace('\r\n', '')) for get in gets: game = {} game["id"] = get[0].split('/')[2].split('-')[0] game["title"] = scrapers.format_html_codes(get[1]) game["system"] = system results.append(game) return results except: return results
def get_games_by_name(search): results = [] try: f = urllib.urlopen('http://www.gamefaqs.com/search/index.html?platform=0&game=' + search.replace(' ', '+')) gets = {} gets = re.findall(r'\s+?<a href="(.*?)"\s+?>(.*?)</a></td>', f.read().replace('\r\n', '')) for get in gets: game = {} system = get[0].split('/') game["id"] = get[0].split('/')[2].split('-')[0] game["title"] = scrapers.format_html_codes(get[1]) game["system"] = system[1].upper() results.append(game) return results except: return results
def get_games_with_system(game_name, system): scraper_sysid = __scrapermap__[system] params = urllib.urlencode({'sql': game_name, 'opt1': 81}) results = [] try: f = urllib.urlopen('http://www.allgame.com/search.php', params) for line in f.readlines(): if '"game.php?id=' in line: game = {} game["id"] = ''.join(re.findall('<a[^>]*id=(.*?)">', line)) game["title"] = scraperutils.format_html_codes(''.join(re.findall('<a[^>]*>(.*?)</a>', line))) if '"platform.php?id=' in line: game["system"] = ''.join(re.findall('<a[^>]*>(.*?)</a>', line)) if game["system"].lower() == scraper_sysid.lower(): game["system"] = system results.append(game) return results except: return results