def download_lgw_content(): """ :return: """ # parameters base_url = 'https://libregamewiki.org' destination_path = os.path.join(constants.root_path, 'code', 'lgw-import') utils.recreate_directory(destination_path) # read and process the base url (get all games and categories) url = base_url + '/Category:Games' games = [] while True: text = requests.get(url).text soup = BeautifulSoup(text, 'html.parser') # categories = soup.find('div', id='mw-subcategories').find_all('li') # categories = [(x.a['href'], x.a.string) for x in categories] # game pages pages = soup.find('div', id='mw-pages').find_all('li') games.extend(((x.a['href'], x.a.string) for x in pages)) # next page next_page = soup.find('a', string='next page') if not next_page: break url = base_url + next_page['href'] # remove all those that start with user games = [ game for game in games if not any(game[1].startswith(x) for x in ('User:'******'Template:', 'Bullet')) ] print('current number of games in LGW {}'.format(len(games))) for game in games: print(game[1]) url = base_url + game[0] destination_file = os.path.join( destination_path, osg.canonical_name(game[0][1:]) + '.html') text = requests.get(url).text utils.write_text(destination_file, text)
def preprocess(list, key, url): """ :param list: :param key: :return: """ _ = set() for item in list: # add unique anchor ref anchor = osg.canonical_name(item[key]) while anchor in _: anchor += 'x' _.add(anchor) item['anchor-id'] = anchor # for alphabetic sorting start = item[key][0].upper() if not start in alphabet: start = extra item['letter'] = start item['href'] = url + ['{}.html#{}'.format(start, anchor)]
genre_icon_map = { 'Action': 'target', 'Arcade': 'pacman', 'Visual novel': 'book', 'Puzzle': 'puzzle-piece', 'Cards': 'spades', 'Music': 'music' } plurals = {k: k+'s' for k in ('Assets license', 'Contact', 'Code language', 'Code license', 'Developer', 'Download', 'Inspiration', 'Game', 'Keyword', 'Home', 'Organization', 'Platform', 'Tag')} for k in ('Media', 'Play', 'State'): plurals[k] = k for k in ('Code repository', 'Code dependency'): plurals[k] = k[:-1] + 'ies' code_language_references = {l: games_by_language_path[:-1] + ['{}#{}'.format(games_by_language_path[-1], osg.canonical_name(l))] for l in c.known_languages} def get_plural_or_singular(name, amount): if not name in plurals.keys(): raise RuntimeError('"{}" not a known singular!'.format(name)) if amount == 1: return name return plurals[name] framework_names = { 'tool': 'Tools', 'framework': 'Frameworks', 'library': 'Libraries' }
if p: print('{}\n{}'.format(name, p)) if not is_included: # a new entry, that we have never seen, maybe we should make an entry of our own # TODO we could use the write capabilities to write the entry in our own format, the hardcoded format here might be brittle, on the other hand we can also write slightly wrong stuff here without problems if newly_created_entries >= maximal_newly_created_entries: continue game_type = osgc_entry.get('type', None) osgc_status = [osgc_status_map[osgc_entry.get('status', None)]] # determine file name print('create new entry for {}'.format(osgc_name)) file_name = osg.canonical_name(osgc_name) + '.md' target_file = os.path.join(c.entries_path, file_name) if os.path.isfile(target_file): print('warning: file {} already existing, save under slightly different name'.format(file_name)) target_file = os.path.join(c.entries_path, file_name[:-3] + '-duplicate.md') if os.path.isfile(target_file): continue # just for safety reasons # add Title and File entry = {'Title': osgc_name, 'File': file_name} # add home if 'url' in osgc_entry: home = osgc_entry['url'] if type(home) == str: home = [home]
} plurals = { k: k + 's' for k in ('Assets license', 'Contact', 'Code language', 'Code license', 'Developer', 'Download', 'Inspiration', 'Game', 'Keyword', 'Home', 'Organization', 'Platform') } for k in ('Media', 'Play', 'State'): plurals[k] = k for k in ('Code repository', 'Code dependency'): plurals[k] = k[:-1] + 'ies' code_language_references = { l: games_by_language_path[:-1] + ['{}#{}'.format(games_by_language_path[-1], osg.canonical_name(l))] for l in c.known_languages } def get_plural_or_singular(name, amount): if not name in plurals.keys(): raise RuntimeError('"{}" not a known singular!'.format(name)) if amount == 1: return name return plurals[name] framework_names = { 'tool': 'Tools', 'framework': 'Frameworks',
# TODO developer (need to introduce a field with us first) if p: print('{}\n{}'.format(name, p)) if not is_included: # a new entry, that we have never seen, maybe we should make an entry of our own # TODO we could use the write capabilities to write the entry in our own format, the hardcoded format here might be brittle, on the other hand we can also write slightly wrong stuff here without problems if newly_created_entries >= maximal_newly_created_entries: continue # determine file name print('create new entry for {}'.format(lgw_name)) file_name = osg.canonical_name(lgw_name) + '.md' target_file = os.path.join(constants.entries_path, file_name) if os.path.isfile(target_file): print( 'warning: file {} already existing, save under slightly different name' .format(file_name)) target_file = os.path.join(constants.entries_path, file_name[:-3] + '-duplicate.md') if os.path.isfile(target_file): continue # just for safety reasons # add name entry = '# {}\n\n'.format(lgw_name) # empty home (mandatory on our side) home = lgw_entry.get('home', None)