def read_entry(file): """ Reads a single entry :param file: the entry file (without path) :return: the entry """ # setup parser and transformer grammar_file = os.path.join(c.code_path, 'grammar_entries.lark') grammar = utils.read_text(grammar_file) parse = osg_parse.create(grammar, osg_parse.EntryTransformer) # read entry file content = utils.read_text(os.path.join(c.entries_path, file)) if not content.endswith('\n'): content += '\n' # parse and transform entry content try: entry = parse(content) entry = [ ('File', file), ] + entry # add file information to the beginning entry = check_and_process_entry(entry) except Exception as e: print('{} - {}'.format(file, e)) raise RuntimeError(e) return entry
def clean_backlog(self): """ :return: """ if not self.entries: print('entries not yet loaded') return # get urls from entries included_urls = osg.all_urls(self.entries) included_urls = list(included_urls.keys()) # only need the URLs here # get urls from rejected file text = utils.read_text(c.rejected_file) regex = re.compile(r"\((http.*?)\)", re.MULTILINE) matches = regex.findall(text) rejected_urls = [] for match in matches: urls = match.split(',') urls = [x.strip() for x in urls] rejected_urls.extend(urls) included_urls.extend(rejected_urls) # those that only have a web archive version, also get the original version more_urls = [] for url in included_urls: if url.startswith('https://web.archive.org/web'): # print(url) # sometimes the http is missing in archive links (would need proper parsing) url = url[url.index('http', 5):] more_urls.append(url) included_urls.extend(more_urls) # now we strip the urls stripped_urls = [utils.strip_url(x) for x in included_urls] stripped_urls = set( stripped_urls) # removes duplicates for performance # read backlog and get urls from there text = utils.read_text(c.backlog_file) text = text.split('\n') # remove those that are in stripped_game_urls text = [x for x in text if utils.strip_url(x) not in stripped_urls] # remove duplicates and sort text = sorted(list(set(text)), key=str.casefold) print('backlog contains {} items'.format(len(text))) # join and save again text = '\n'.join(text) utils.write_text(c.backlog_file, text) print('backlog cleaned')
def read_and_parse(content_file, grammar_file, transformer): """ :param content_file: :param grammar_file: :param transformer: :return: """ content = utils.read_text(content_file) grammar = utils.read_text(grammar_file) parser = lark.Lark(grammar, debug=False) tree = parser.parse(content) return transformer.transform(tree)
def read_and_parse(content_file: str, grammar_file: str, Transformer: lark.Transformer): """ Reads a content file and a grammar file and parses the content with the grammar following by transforming the parsed output and returning the transformed result. :param content_file: :param grammar_file: :param transformer: :return: """ grammar = utils.read_text(grammar_file) parse = create(grammar, Transformer) content = utils.read_text(content_file) return parse(content)
def read_screenshots_overview(): """ :return: """ # read screenshots readme and parse overview = {} text = utils.read_text(c.screenshots_file) for entry in text.split('\n# ')[1:]: # skip first paragraph lines = entry.split('\n') # split into lines name = lines[0] if name not in overview: overview[name] = {} lines = [line for line in lines[1:] if line] # include only non-empty lines # for every screenshot for line in lines: values = line.split(' ') # split into values values = [value for value in values if value] id = int(values[0]) # id (must be there) width = int(values[1]) # width can be 0, will be updated height = int(values[2]) # height can be 0, will be updated if len(values) > 3: # optional an url url = values[3] else: url = None overview[name][id] = [width, height, url] return overview
def sort_text_file(file, name): """ Reads a text file, splits in lines, removes duplicates, sort, writes back. """ text = utils.read_text(file) text = text.split('\n') text = sorted(list(set(text)), key=str.casefold) print('{} contains {} items'.format(name, len(text))) text = '\n'.join(text) utils.write_text(file, text)
def github_starring_synchronization(): """ Which Github repositories haven't I starred yet. """ private_properties = json.loads(utils.read_text(c.private_properties_file)) files = json.loads(utils.read_text(gh_entries_file)) # loop over each entry and collect list of repos all_repos = [] for index, file in enumerate(files): # read entry entry = osg.read_entry(file) # get repos code_repositories = entry.get('Code repository', []) repos = [x for x in code_repositories if x.startswith(prefix)] repos[0] += ' @add' repos = [x for x in repos if '@add' in x] repos = [x.split(' ')[0] for x in repos] repos = [x for x in repos if x not in ignored_repos] all_repos.extend(repos) all_repos = set(all_repos) print('found {} Github repos'.format(len(all_repos))) # get my Github user user = osg_github.get_user(private_properties['github-name'], token=private_properties['github-token']) # get starred repos starred = user.get_starred() starred = [repo.clone_url for repo in starred] starred = set(starred) print('starred {} Github repos'.format(len(starred))) # and now the difference unstarred = all_repos - starred print('not yet starred {} repos'.format(len(unstarred))) print(', '.join(unstarred))
def gitlab_starring_synchronization(): """ Which Gitlab repositories haven't I starred yet. """ private_properties = json.loads(utils.read_text(c.private_properties_file)) files = json.loads(utils.read_text(gl_entries_file)) # loop over each entry and collect list of repos all_repos = [] for index, file in enumerate(files): # read entry entry = osg.read_entry(file) # get repos code_repositories = entry.get('Code repository', []) repos = [x for x in code_repositories if x.startswith(prefix)] repos[0] += ' @add' repos = [x for x in repos if '@add' in x] repos = [x.split(' ')[0] for x in repos] repos = [x for x in repos if x not in ignored_repos] all_repos.extend(repos) all_repos = set(all_repos) print('found {} Gitlab repos'.format(len(all_repos)))
def clean_backlog(stripped_game_urls): # read backlog and split file = os.path.join(c.root_path, 'tools', 'backlog.txt') text = utils.read_text(file) text = text.split('\n') # remove those that are in stripped_game_urls text = [x for x in text if utils.strip_url(x) not in stripped_game_urls] # remove duplicates and sort text = sorted(list(set(text)), key=str.casefold) print('backlog contains {} items'.format(len(text))) # join and save again text = '\n'.join(text) utils.write_text(file, text)
def check_template_leftovers(self): """ Checks for template leftovers. Should be run only occasionally. """ # load template and get all lines text = utils.read_text(os.path.join(c.root_path, 'template.md')) text = text.split('\n') check_strings = [x for x in text if x and not x.startswith('##')] # iterate over all entries for _, entry_path, content in osg.entry_iterator(): for check_string in check_strings: if content.find(check_string) >= 0: print('{}: found {}'.format(os.path.basename(entry_path), check_string)) print('checked for template leftovers')
def entry_iterator(): """ """ # get all entries (ignore everything starting with underscore) entries = os.listdir(c.entries_path) # iterate over all entries for entry in entries: entry_path = os.path.join(c.entries_path, entry) # ignore directories ("tocs" for example) if os.path.isdir(entry_path): continue # read entry content = utils.read_text(entry_path) # yield yield entry, entry_path, content
def check_validity_backlog(): import requests # read backlog and split file = os.path.join(c.root_path, 'code', 'backlog.txt') text = utils.read_text(file) urls = text.split('\n') urls = [x.split(' ')[0] for x in urls] headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'} for url in urls: try: r = requests.get(url, headers=headers, timeout=5) except Exception as e: print('{} gave error: {}'.format(url, e)) else: if r.status_code != requests.codes.ok: print('{} returned status code: {}'.format(url, r.status_code)) if r.is_redirect or r.history: print('{} redirected to {}, {}'.format(url, r.url, r.history))
def read_entries(): """ Parses all entries and assembles interesting infos about them. """ # setup parser and transformer grammar_file = os.path.join(c.code_path, 'grammar_entries.lark') grammar = utils.read_text(grammar_file) parse = osg_parse.create(grammar, osg_parse.EntryTransformer) # a database of all important infos about the entries entries = [] # iterate over all entries exception_happened = None for file, _, content in entry_iterator(): if not content.endswith('\n'): content += '\n' # parse and transform entry content try: entry = parse(content) entry = [ ('File', file), ] + entry # add file information to the beginning entry = check_and_process_entry(entry) except Exception as e: print('{} - {}'.format(file, e)) exception_happened = e # just store last one continue # add to list entries.append(entry) if exception_happened: print('error(s) while reading entries') raise exception_happened return entries
def read_rejected_file(): """ Read list of rejected games informations. Uses very simple parsing. :return: List of dictionaries with keys: Title, URLs, Description """ text = u.read_text(c.rejected_file) rejected = [] for line in text.split('\n'): # print(line) matches = matcher.findall(line)[ 0] # we know there will be exactly one match on every line name = matches[0].strip() links = matches[1].split(',') links = [link.strip() for link in links] description = matches[2].strip() rejected.append({ 'Title': name, 'URLs': links, 'Description': description }) return rejected
def write_screenshots_overview(overview): """ :param overview: :return: """ # get preamble text = utils.read_text(c.screenshots_file) text = text.split('\n# ')[0] + '\n' # write out each entry sorted by name for name in sorted(overview.keys()): a = overview[name] t = '# {}\n\n'.format(name) # write out each line sorted by id for id in sorted(a.keys()): ai = a[id] if ai[-1] is None: ai = ai[:-1] t += ' '.join(['{:02d}'.format(id)] + [str(x) for x in ai]) + '\n' t += '\n' text += t utils.write_text(c.screenshots_file, text)
def parse_lgw_content(): # paths import_path = os.path.join(constants.root_path, 'code', 'lgw-import') entries_file = os.path.join(import_path, '_lgw.json') # iterate over all imported files files = os.listdir(import_path) entries = [] for file in files: if file.startswith('_lgw'): continue text = utils.read_text(os.path.join(import_path, file)) # parse the html soup = BeautifulSoup(text, 'html.parser') title = soup.h1.get_text() print(title) entry = {'name': title} # get all external links ignored_external_links = ('libregamewiki.org', 'freegamedev.net', 'freegamer.blogspot.com', 'opengameart.org', 'gnu.org', 'creativecommons.org', 'freesound.org', 'freecode.com', 'freenode.net') links = [(x['href'], x.get_text()) for x in soup.find_all('a', href=True)] links = [ x for x in links if x[0].startswith('http') and not any([y in x[0] for y in ignored_external_links]) ] entry['external links'] = links # get meta description description = soup.find('meta', attrs={"name": "description"}) entry['description'] = description['content'] # parse gameinfobox infos = soup.find('div', class_='gameinfobox') if not infos: print(' no gameinfobox') else: infos = infos.find_all('tr') for x in infos: if x.th and x.td: # row with header key = x.th.get_text() content = x.td.get_text() content = content.split(',') content = [x.strip() for x in content] entry[key] = content if not x.th and x.td: # row without header: contribute section x = x.find_all('li') x = [(x.a.string, x.a['href']) for x in x if x.a] for key, content in x: entry[key] = content # parse "for available as package in" tables = soup.find_all('table', class_='wikitable') tables = [ table for table in tables if table.caption and table.caption.string.startswith('Available as package') ] if len(tables) > 0: if len(tables) > 1: raise RuntimeError() table = tables[0] packages = table.find_all('tr') packages = [x.td.a['href'] for x in packages] entry['linux-packages'] = packages # categories categories = soup.find_all('div', id='mw-normal-catlinks') if not categories: print(' no categories') categories = [] else: if len(categories) > 1: raise RuntimeError() categories = categories[0] categories = categories.find_all('li') categories = [x.a.string for x in categories] if 'Games' not in categories: print(' "Games" not in categories') else: categories.remove('Games') # should be there # strip games at the end phrase = ' games' categories = [ x[:-len(phrase)] if x.endswith(phrase) else x for x in categories ] ignored_categories = ['Articles lacking reference', 'Stubs'] categories = [x for x in categories if x not in ignored_categories] entry['categories'] = categories entries.append(entry) # save entries text = json.dumps(entries, indent=1) utils.write_text(entries_file, text)
def clean_lgw_content(): # paths import_path = os.path.join(constants.root_path, 'code', 'lgw-import') entries_file = os.path.join(import_path, '_lgw.json') cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json') # load entries text = utils.read_text(entries_file) entries = json.loads(text) # rename keys key_replacements = (('developer', ('Developer', 'Developers')), ('code license', ('Code license', 'Code licenses')), ('engine', ('Engine', 'Engines')), ('genre', ('Genre', 'Genres')), ('library', ('Library', 'Libraries')), ('assets license', ('Media license', 'Media licenses')), ('code language', ('P. language', 'P. languages')), ('home', ('Homepage', )), ('platform', ('Platforms', )), ('tracker', ('Bug/Feature Tracker', )), ('repo', ('Source Code', )), ('forum', ('Forum', )), ('chat', ('Chat', )), ('origin', ('Origin', )), ('dev home', ('Development Project', )), ('last active', ('Release date', ))) for index, entry in enumerate(entries): for new_key, old_keys in key_replacements: for key in old_keys: if key in entry: entry[new_key] = entry[key] del entry[key] break entries[index] = entry # ignore keys ignored_keys = ('origin', 'Latest\xa0release') for index, entry in enumerate(entries): for key in ignored_keys: if key in entry: del entry[key] entries[index] = entry # check for unique field names unique_fields = set() for entry in entries: unique_fields.update(entry.keys()) print('unique lgw fields: {}'.format(sorted(list(unique_fields)))) # which fields are mandatory mandatory_fields = unique_fields.copy() for entry in entries: remove_fields = [ field for field in mandatory_fields if field not in entry ] mandatory_fields -= set(remove_fields) print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields)))) # statistics before print('field contents before') fields = sorted( list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories')))) for field in fields: content = [entry[field] for entry in entries if field in entry] # flatten flat_content = [] for c in content: if isinstance(c, list): flat_content.extend(c) else: flat_content.append(c) statistics = utils.unique_elements_and_occurrences(flat_content) print('{}: {}'.format(field, ', '.join(statistics))) # content replacements entries = remove_parenthized_content( entries, ('assets license', 'code language', 'code license', 'engine', 'genre', 'last active', 'library')) entries = remove_prefix_suffix(entries, ('code license', 'assets license'), ( '"', 'GNU', ), ('"', '[3]', '[2]', '[1]', 'only')) entries = replace_content(entries, ('code license', 'assets license'), 'GPL', ('General Public License', )) entries = replace_content( entries, ('code license', 'assets license'), 'GPL-2.0', ('GPLv2', )) # for LGW GPLv2 would be the correct writing entries = replace_content( entries, ('code license', 'assets license'), 'GPL-2', ('GPLv2', 'GPL v2', 'GPL version 2.0', 'GPL 2.0', 'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2')) entries = replace_content( entries, ('code license', 'assets license'), 'GPL-2', ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later')) entries = replace_content( entries, ('code license', 'assets license'), 'GPL-3.0', ('GPLv3', )) # for LGW GPLv3 would be the correct writing entries = replace_content(entries, ('code license', 'assets license'), 'GPL-3', ('GPL v3', 'GNU GPL v3', 'GPL 3')) entries = replace_content( entries, ('code license', 'assets license'), 'GPL-3', ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later')) entries = replace_content(entries, ('code license', 'assets license'), 'Public domain', ('public domain', 'Public Domain')) entries = replace_content(entries, ('code license', 'assets license'), 'zlib', ('zlib/libpng license', 'Zlib License')) entries = replace_content(entries, ('code license', 'assets license'), 'BSD', ('Original BSD License', )) entries = replace_content( entries, ('code license', 'assets license'), 'CC-BY-SA-3.0', ('Creative Commons Attribution-ShareAlike 3.0 Unported License', 'CC-BY-SA 3.0', 'CC BY-SA 3.0')) entries = replace_content(entries, ('code license', 'assets license'), 'CC-BY-SA', ('CC BY-SA', )) entries = replace_content(entries, ('code license', 'assets license'), 'MIT', ('MIT License', 'MIT"')) entries = replace_content(entries, 'platform', 'macOS', ('Mac', )) entries = remove_prefix_suffix(entries, ('code language', 'developer'), (), ('[3]', '[2]', '[1]')) entries = ignore_content(entries, 'code language', ('HTML5', 'HTML', 'English', 'XML', 'WML')) entries = replace_content(entries, 'code language', 'Lua', ('lua', 'LUA')) entries = remove_prefix_suffix(entries, 'genre', (), ('game', 'games')) entries = lower_case_content(entries, 'genre') entries = replace_content(entries, 'genre', 'platform', ('platformer', )) entries = replace_content(entries, 'genre', 'role playing', ('rpg', )) entries = replace_content(entries, 'genre', 'first person, shooter', ('fps', )) entries = replace_content(entries, 'genre', 'real time, strategy', ('rts', )) entries = replace_content(entries, 'genre', 'turn based, strategy', ('tbs', )) entries = ignore_content( entries, 'categories', ('GPL', 'C++', 'C', 'ECMAScript', 'Python', 'Java', 'CC BY-SA', 'Lua', 'LGPL', 'CC-BY', 'BSD', 'MIT', 'Qt', 'SDL', 'OpenGL', 'Pygame', 'PD', 'GLUT', 'Haskell', 'Allegro', 'Ruby', 'Zlib/libpng', 'OpenAL', 'Perl', 'Free Pascal', 'LÖVE', 'HTML5', 'Id Tech 1')) entries = replace_content(entries, 'library', 'pygame', ('Pygame', )) entries = replace_content(entries, 'library', 'Qt', ('QT', )) entries = ignore_content(entries, 'library', ('C++', 'Lua', 'Mozilla Firefox')) entries = ignore_nonnumbers(entries, 'last active') entries = ignore_content(entries, 'last active', ('2019', )) entries = ignore_content(entries, 'platform', ('DOS', )) # list for every unique field print('\nfield contents after') fields = sorted( list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories')))) for field in fields: content = [entry[field] for entry in entries if field in entry] # flatten flat_content = [] for c in content: if isinstance(c, list): flat_content.extend(c) else: flat_content.append(c) statistics = utils.unique_elements_and_occurrences(flat_content) print('{}: {}'.format(field, ', '.join(statistics))) # save entries text = json.dumps(entries, indent=1) utils.write_text(cleaned_entries_file, text)
def gitlab_import(): """ Import various information from Gitlab repositories (like contributors) or stars for Gitlab repos """ private_properties = json.loads(utils.read_text(c.private_properties_file)) files = json.loads(utils.read_text(gl_entries_file)) all_developers = osg.read_developers() print(' {} developers read'.format(len(all_developers))) # all exceptions that happen will be eaten (but will end the execution) try: # loop over each entry for index, file in enumerate(files): print(' process {} ({})'.format(file, index)) # read entry entry = osg.read_entry(file) code_repositories = entry['Code repository'] repos = [x for x in code_repositories if x.startswith(prefix)] repos[0] += ' @add' repos = [x for x in repos if '@add' in x] repos = [x.split(' ')[0] for x in repos] repos = [x for x in repos if x not in ignored_repos] for repo in repos: print(' GH repo {}'.format(repo)) info = osg_gitlab.retrieve_repo_info(repo) new_comments = [] # add created comment new_comments.append('@created {}'.format(info['created'].year)) # add stars new_comments.append('@stars {}'.format(info['stars'])) # add forks new_comments.append('@forks {}'.format(info['forks'])) # search for repository for r in code_repositories: if r.startswith(repo): break # update comment comments = r.comment if comments: comments = comments.split(',') comments = [c.strip() for c in comments] comments = [c for c in comments if not c.startswith('@')] # delete old ones comments += new_comments else: comments = new_comments r.comment = ', '.join(comments) # language in languages for language, usage in info['languages'].items(): if language in c.known_languages and usage > 5 and language not in entry[ 'Code language']: entry['Code language'].append(language) print(' added to languages: {}'.format(language)) entry['Code repository'] = code_repositories osg.write_entry(entry) except: raise finally: # shorten file list utils.write_text(gl_entries_file, json.dumps(files[index:], indent=1)) # osg.write_developers(all_developers) print('developers database updated')
for x in x: p += ' {} {} missing\n'.format(k, x) return p if __name__ == "__main__": similarity_threshold = 0.8 maximal_newly_created_entries = 40 # paths import_path = os.path.join(constants.root_path, 'tools', 'lgw-import') lgw_entries_file = os.path.join(import_path, '_lgw.cleaned.json') # import lgw import text = utils.read_text(lgw_entries_file) lgw_entries = json.loads(text) # perform name replacements lgw_entries = [x for x in lgw_entries if x['name'] not in ignored_names] for index, lgw_entry in enumerate(lgw_entries): if lgw_entry['name'] in name_replacements: lgw_entry['name'] = name_replacements[lgw_entry['name']] if 'code language' in lgw_entry: languages = lgw_entry['code language'] h = [] for l in languages: for g in ('/', 'and'): if g in l: l = l.split(g) l = [x.strip() for x in l]
""" # TODO do not add if already 3, but print warning instead import os import requests from io import BytesIO from PIL import Image from utils import utils as u, constants as c, osg as osg if __name__ == "__main__": # paths root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)) # read content of screenshots_bzt.txt info = u.read_text(os.path.join(root_path, 'code', 'synchronization', 'screenshots_bzt.txt')) info = info.split('\n') # split on line end info = [entry.split('\t') for entry in info] # split on tabs info = [[entry[0].strip(), entry[-1].strip()] for entry in info] # only keep first and last (in case multiple tabs were used) # read our screenshots screenshots = osg.read_screenshots_overview() # iterate over all new info for entry in info: name = entry[0] print('work on {}'.format(name)) url = entry[1] # is contained? our_screenshots = screenshots.get(name, {})
def update_readme_tocs(self): """ Recounts entries in sub categories and writes them to the readme. Also updates the _toc files in the categories directories. Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending on "A collection.." Needs to be performed regularly. """ # completely delete content of toc path for file in os.listdir(c.tocs_path): os.remove(os.path.join(c.tocs_path, file)) # read readme readme_file = os.path.join(c.root_path, 'README.md') readme_text = utils.read_text(readme_file) # compile regex for identifying the building blocks in the readme regex = re.compile( r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL) # apply regex matches = regex.findall(readme_text) if len(matches) != 1: raise RuntimeError('readme file has invalid structure') matches = matches[0] start = matches[0] end = matches[2] tocs_text = '' # split into games, tools, frameworks, libraries games = [ x for x in self.entries if not any( [y in x['Keyword'] for y in ('tool', 'framework', 'library')]) ] tools = [x for x in self.entries if 'tool' in x['Keyword']] frameworks = [x for x in self.entries if 'framework' in x['Keyword']] libraries = [x for x in self.entries if 'library' in x['Keyword']] # create games, tools, frameworks, libraries tocs title = 'Games' file = '_games.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format( title, file, title, len(games)) create_toc(title, file, games) title = 'Tools' file = '_tools.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format( title, file, title, len(tools)) create_toc(title, file, tools) title = 'Frameworks' file = '_frameworks.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format( title, file, title, len(frameworks)) create_toc(title, file, frameworks) title = 'Libraries' file = '_libraries.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format( title, file, title, len(libraries)) create_toc(title, file, libraries) # create by category categories_text = [] for keyword in c.recommended_keywords: filtered = [x for x in self.entries if keyword in x['Keyword']] title = keyword.capitalize() name = keyword.replace(' ', '-') file = '_{}.md'.format(name) categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format( title, file, name, len(filtered))) create_toc(title, file, filtered) categories_text.sort() tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text)) # create by platform platforms_text = [] for platform in c.valid_platforms: filtered = [ x for x in self.entries if platform in x.get('Platform', []) ] title = platform name = platform.lower() file = '_{}.md'.format(name) platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format( title, file, name, len(filtered))) create_toc(title, file, filtered) tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text)) # insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list) text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end # write to readme utils.write_text(readme_file, text) print('Readme and TOCs updated')
def github_import(): """ :return: """ private_properties = json.loads(utils.read_text(c.private_properties_file)) files = json.loads(utils.read_text(gh_entries_file)) all_developers = osg.read_developers() print(' {} developers read'.format(len(all_developers))) # all exceptions that happen will be eaten (but will end the execution) try: # loop over each entry for index, file in enumerate(files): print(' process {}'.format(file)) # read entry entry = osg.read_entry(file) code_repositories = entry['Code repository'] repos = [ x.value for x in code_repositories if x.startswith(prefix) ] repos[0] += ' @add' repos = [x for x in repos if '@add' in x] repos = [x.split(' ')[0] for x in repos] repos = [x for x in repos if x not in ignored_repos] for repo in repos: print(' GH repo {}'.format(repo)) info = osg_github.retrieve_repo_info( repo, private_properties['github-token']) new_comments = [] # is archived if info['archived']: if not osg.is_inactive(entry): print( 'warning: repo is archived but not inactive state??' ) # add archive to repo comment new_comments.append('@archived') # add created comment new_comments.append('@created {}'.format(info['created'].year)) # add stars new_comments.append('@stars {}'.format(info['stars'])) # add forks new_comments.append('@forks {}'.format(info['forks'])) # update comment for r in code_repositories: if r.value.startswith(repo): break comments = r.comment if comments: comments = comments.split(',') comments = [c.strip() for c in comments] comments = [c for c in comments if not c.startswith('@')] # delete old ones comments += new_comments else: comments = new_comments r.comment = ', '.join(comments) # language in languages language = info['language'] language = language_aliases.get(language, language) if language and language not in entry[ 'Code language'] and language not in ignored_languages: entry['Code language'].append( osg_parse.ValueWithComment(language)) print(' added to languages: {}'.format(language)) # contributors for contributor in info['contributors']: if contributor.type != 'User': continue if contributor.contributions < 4: continue # contributor.login/name/blog name = contributor.name if not name: name = contributor.login name = name_aliases.get(name, name) nickname = '{}@GH'.format(contributor.login) blog = contributor.blog if blog: blog = blog_alias[blog] if blog in blog_alias else blog if not blog.startswith('http'): blog = 'https://' + blog if blog in ignored_blogs: blog = None # look up author in entry developers if name not in entry.get('Developer', []): print(' dev "{}" added to entry {}'.format( name, file)) entry['Developer'] = entry.get('Developer', []) + [ osg_parse.ValueWithComment(name) ] # look up author in developers data base if name in all_developers: dev = all_developers[name] if not nickname in dev.get('Contact', []): print( ' existing dev "{}" added nickname ({}) to developer database' .format(name, nickname)) # check that name has not already @GH contact if any( x.endswith('@GH') for x in dev.get('Contact', [])): print('warning: already GH contact') dev['Contact'] = dev.get('Contact', []) + [nickname] if blog and blog not in dev.get('Home', []): dev['Home'] = dev.get('Home', []) + [blog] # TODO add to games entries! else: print(' dev "{}" ({}) added to developer database'. format(name, nickname)) all_developers[name] = { 'Name': name, 'Contact': [nickname], 'Games': [entry['Title']] } if blog: all_developers[name]['Home'] = [blog] entry['Code repository'] = code_repositories osg.write_entry(entry) except: raise finally: # shorten file list utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1)) osg.write_developers(all_developers) print('developers database updated')
def sourceforge_import(): """ :return: """ files = json.loads(utils.read_text(sf_entries_file)) all_developers = osg.read_developers() print(' {} developers read'.format(len(all_developers))) all_developers_changed = False # all exceptions that happen will be eaten (but will end the execution) try: # loop over each entry for index, file in enumerate(files): print(' process {}'.format(file)) # read entry entry = osg.read_entry(file) developers = entry.get('Developer', []) urls = [ x.value for x in entry['Home'] if x.startswith('https://sourceforge.net/projects/') ] entry_changed = False for url in urls: print(' sf project {}'.format(url)) if not url.endswith('/'): print('error: sf project does not end with slash') url += '/' # members url_members = 'https://sourceforge.net/p/' + url[ len(prefix):] + '_members/' response = requests.get(url_members) if response.status_code != 200: print('error: url {} not accessible, status {}'.format( url_members, response.status_code)) raise RuntimeError() soup = BeautifulSoup(response.text, 'html.parser') authors = soup.find( 'div', id='content_base').find('table').find_all('tr') authors = [author.find_all('td') for author in authors] authors = [ author[1].a['href'] for author in authors if len(author) == 3 ] for author in authors: # sometimes author already contains the full url, sometimes not url_author = 'https://sourceforge.net' + author if not author.startswith( 'http') else author response = requests.get(url_author) if response.status_code != 200 and author not in ( '/u/favorito/', ): print('error: url {} not accessible, status {}'.format( url_author, response.status_code)) raise RuntimeError() url_author = response.url # could be different now if 'auth/?return_to' in url_author or response.status_code != 200: # for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros) author_name = author[3:-1] nickname = author_name else: soup = BeautifulSoup(response.text, 'html.parser') author_name = soup.h1.get_text() author_name = SF_alias_list.get( author_name, author_name) # replace by alias if possible nickname = soup.find( 'dl', class_='personal-data').find('dd').get_text() nickname = nickname.replace('\n', '').strip() nickname += '@SF' # our indication of the platform to search for author_name = author_name.strip( ) # names can still have white spaces before or after if author_name in SF_ignore_list: continue # look author up in entry developers if author_name not in developers: print(' dev "{}" added to entry {}'.format( author_name, file)) entry['Developer'] = entry.get('Developer', []) + [ osg_parse.ValueWithComment(author_name) ] entry_changed = True developers = entry.get('Developer', []) # look author and SF nickname up in developers data base if author_name in all_developers: dev = all_developers[author_name] if not nickname in dev.get('Contact', []): print( ' existing dev "{}" added nickname ({}) to developer database' .format(author_name, nickname)) # check that name has not already @SF contact if any( x.endswith('@SF') for x in dev.get('Contact', [])): print('warning: already SF contact') all_developers[author_name]['Contact'] = dev.get( 'Contact', []) + [nickname] all_developers_changed = True else: print(' dev "{}" ({}) added to developer database'. format(author_name, nickname)) all_developers[author_name] = { 'Name': author_name, 'Contact': [nickname], 'Games': [entry['Title']] } all_developers_changed = True if entry_changed: # save entry osg.write_entry(entry) print(' entry updated') except: raise finally: # shorten file list utils.write_text(sf_entries_file, json.dumps(files[index:], indent=1)) # save entry osg.write_entry(entry) print(' entry updated') # maybe save all developers if all_developers_changed: # save all developers osg.write_developers(all_developers) print('developers database updated')
print('{} gave error: {}'.format(url, e)) else: if r.status_code != requests.codes.ok: print('{} returned status code: {}'.format(url, r.status_code)) if r.is_redirect or r.history: print('{} redirected to {}, {}'.format(url, r.url, r.history)) if __name__ == "__main__": # check_validity_backlog() # backlog game_urls = osg.extract_links() text = utils.read_text(os.path.join(c.root_path, 'tools', 'rejected.txt')) regex = re.compile(r"\((http.*?)\)", re.MULTILINE) matches = regex.findall(text) rejected_urls = [] for match in matches: urls = match.split(',') urls = [x.strip() for x in urls] rejected_urls.extend(urls) game_urls.extend(rejected_urls) more_urls = [] for url in game_urls: if url.startswith('https://web.archive.org/web'): url = url[url.index('http', 5):] more_urls.append(url) game_urls.extend(more_urls) stripped_game_urls = [utils.strip_url(x) for x in game_urls]