def update_statistics(self): """ Generates the statistics page. Should be done every time the entries change. """ if not self.entries: print('entries not yet loaded') return # start the page statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n' # total number number_entries = len(self.entries) rel = lambda x: x / number_entries * 100 # conversion to percent statistics += 'analyzed {} entries on {}\n\n'.format( number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # State (beta, mature, inactive) statistics += '## State\n\n' number_state_beta = sum(1 for x in self.entries if 'beta' in x['State']) number_state_mature = sum(1 for x in self.entries if 'mature' in x['State']) number_inactive = sum(1 for x in self.entries if osg.is_inactive(x)) statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format( number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive)) if number_inactive > 0: entries_inactive = [(x['Title'], osg.extract_inactive_year(x)) for x in self.entries if osg.is_inactive(x)] entries_inactive.sort( key=lambda x: str.casefold(x[0])) # first sort by name entries_inactive.sort( key=lambda x: x[1], reverse=True ) # then sort by inactive year (more recently first) entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive] statistics += '##### Inactive State\n\n' + ', '.join( entries_inactive) + '\n\n' # Language statistics += '## Code Languages\n\n' field = 'Code language' # get all languages together languages = [] for entry in self.entries: languages.extend(entry[field]) languages = [x.value for x in languages] unique_languages = set(languages) unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages] unique_languages.sort( key=lambda x: str.casefold(x[0])) # first sort by name # print languages to console print('\nLanguages\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_languages)) unique_languages.sort( key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first) unique_languages = [ '- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_languages ] statistics += '##### Language frequency\n\n' + ''.join( unique_languages) + '\n' # Licenses statistics += '## Code licenses\n\n' field = 'Code license' # get all licenses together licenses = [] for entry in self.entries: licenses.extend(entry[field]) licenses = [x.value for x in licenses] unique_licenses = set(licenses) unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses] unique_licenses.sort( key=lambda x: str.casefold(x[0])) # first sort by name # print licenses to console print('\nLicenses\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_licenses)) unique_licenses.sort(key=lambda x: -x[ 1]) # then sort by occurrence (highest occurrence first) unique_licenses = [ '- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_licenses ] statistics += '##### Licenses frequency\n\n' + ''.join( unique_licenses) + '\n' # Keywords statistics += '## Keywords\n\n' field = 'Keyword' # get all keywords together keywords = [] for entry in self.entries: keywords.extend(entry[field]) keywords = [x.value for x in keywords] # reduce those starting with "multiplayer" keywords = [ x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords ] unique_keywords = set(keywords) unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords] unique_keywords.sort( key=lambda x: str.casefold(x[0])) # first sort by name # print keywords to console print('\nKeywords\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords)) unique_keywords.sort(key=lambda x: -x[ 1]) # then sort by occurrence (highest occurrence first) unique_keywords = [ '- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords ] statistics += '##### Keywords frequency\n\n' + '\n'.join( unique_keywords) + '\n\n' # no download or play field statistics += '## Entries without download or play fields\n\n' entries = [] for entry in self.entries: if 'Download' not in entry and 'Play' not in entry: entries.append(entry['Title']) entries.sort(key=str.casefold) statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n' # code hosted not on github, gitlab, bitbucket, launchpad, sourceforge popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net') statistics += '## Entries with a code repository not on a popular site\n\n' entries = [] field = 'Code repository' for entry in self.entries: popular = False for repo in entry[field]: for popular_repo in popular_code_repositories: if popular_repo in repo.value: popular = True break # if there were repositories, but none popular, add them to the list if not popular: entries.append(entry['Title']) # print(info[field]) entries.sort(key=str.casefold) statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n' # Code dependencies statistics += '## Code dependencies\n\n' field = 'Code dependency' # get all code dependencies together code_dependencies = [] entries_with_code_dependency = 0 for entry in self.entries: if field in entry: code_dependencies.extend(entry[field]) entries_with_code_dependency += 1 code_dependencies = [x.value for x in code_dependencies] statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format( entries_with_code_dependency, rel(entries_with_code_dependency)) unique_code_dependencies = set(code_dependencies) unique_code_dependencies = [ (l, code_dependencies.count(l) / len(code_dependencies)) for l in unique_code_dependencies ] unique_code_dependencies.sort( key=lambda x: str.casefold(x[0])) # first sort by name # print code dependencies to console print('\nCode dependencies\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies)) unique_code_dependencies.sort(key=lambda x: -x[ 1]) # then sort by occurrence (highest occurrence first) unique_code_dependencies = [ '- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies ] statistics += '##### Code dependencies frequency\n\n' + '\n'.join( unique_code_dependencies) + '\n\n' # Build systems: statistics += '## Build systems\n\n' field = 'Build system' # get all build systems together build_systems = [] for entry in self.entries: if field in entry['Building']: build_systems.extend(entry['Building'][field]) build_systems = [x.value for x in build_systems] statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format( rel(len(build_systems))) unique_build_systems = set(build_systems) unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems] unique_build_systems.sort( key=lambda x: str.casefold(x[0])) # first sort by name # print build systems to console print('\nBuild systems\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems)) unique_build_systems.sort(key=lambda x: -x[ 1]) # then sort by occurrence (highest occurrence first) unique_build_systems = [ '- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems ] statistics += '##### Build systems frequency ({})\n\n'.format( len(build_systems)) + '\n'.join(unique_build_systems) + '\n\n' # C, C++ projects without build system information c_cpp_project_without_build_system = [] for entry in self.entries: if field not in entry and ('C' in entry['Code language'] or 'C++' in entry['Code language']): c_cpp_project_without_build_system.append(entry['Title']) c_cpp_project_without_build_system.sort(key=str.casefold) statistics += '##### C and C++ projects without build system information ({})\n\n'.format( len(c_cpp_project_without_build_system)) + ', '.join( c_cpp_project_without_build_system) + '\n\n' # C, C++ projects with build system information but without CMake as build system c_cpp_project_not_cmake = [] for entry in entries: if field in entry and 'CMake' in entry[field] and ( 'C' in entry['Code language'] or 'C++' in entry['Code language']): c_cpp_project_not_cmake.append(entry['Title']) c_cpp_project_not_cmake.sort(key=str.casefold) statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format( len(c_cpp_project_not_cmake)) + ', '.join( c_cpp_project_not_cmake) + '\n\n' # Platform statistics += '## Platform\n\n' field = 'Platform' # get all platforms together platforms = [] for entry in self.entries: if field in entry: platforms.extend(entry[field]) platforms = [x.value for x in platforms] statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format( rel(len(platforms))) unique_platforms = set(platforms) unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms] unique_platforms.sort( key=lambda x: str.casefold(x[0])) # first sort by name unique_platforms.sort(key=lambda x: -x[ 1]) # then sort by occurrence (highest occurrence first) unique_platforms = [ '- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms ] statistics += '##### Platforms frequency\n\n' + '\n'.join( unique_platforms) + '\n\n' # write to statistics file utils.write_text(c.statistics_file, statistics) print('statistics updated')
def update_repos(self): """ export to json for local repository update of primary repos """ if not self.entries: print('entries not yet loaded') return primary_repos = {'git': [], 'svn': [], 'hg': []} unconsumed_entries = [] # for every entry filter those that are known git repositories (add additional repositories) for entry in self.entries: repos = entry.get('Code repository', []) # keep the first and all others containing @add if not repos: continue repos = [repos[0]] + [x for x in repos[1:] if "@add" in x] for repo in repos: consumed = False repo = repo.split(' ')[0].strip() url = osg.git_repo(repo) if url: primary_repos['git'].append(url) consumed = True continue url = osg.svn_repo(repo) if url: primary_repos['svn'].append(url) consumed = True continue url = osg.hg_repo(repo) if url: primary_repos['hg'].append(url) consumed = True continue if not consumed: unconsumed_entries.append([entry['Title'], repo]) print('Entry "{}" unconsumed repo: {}'.format( entry['File'], repo)) # sort them alphabetically (and remove duplicates) for k, v in primary_repos.items(): primary_repos[k] = sorted(set(v)) # statistics of gits git_repos = primary_repos['git'] print('{} Git repositories'.format(len(git_repos))) for domain in ('repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily', 'github.com', 'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'): print('{} on {}'.format( sum(1 if domain in x else 0 for x in git_repos), domain)) # write them to code/git json_path = os.path.join(c.root_path, 'code', 'archives.json') text = json.dumps(primary_repos, indent=1) utils.write_text(json_path, text) print('Repositories updated')
def update_readme_tocs(self): """ Recounts entries in sub categories and writes them to the readme. Also updates the _toc files in the categories directories. Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending on "A collection.." Needs to be performed regularly. """ # completely delete content of toc path for file in os.listdir(c.tocs_path): os.remove(os.path.join(c.tocs_path, file)) # read readme readme_file = os.path.join(c.root_path, 'README.md') readme_text = utils.read_text(readme_file) # compile regex for identifying the building blocks in the readme regex = re.compile( r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL) # apply regex matches = regex.findall(readme_text) if len(matches) != 1: raise RuntimeError('readme file has invalid structure') matches = matches[0] start = matches[0] end = matches[2] tocs_text = '' # split into games, tools, frameworks, libraries games = [ x for x in self.entries if not any( [y in x['Keyword'] for y in ('tool', 'framework', 'library')]) ] tools = [x for x in self.entries if 'tool' in x['Keyword']] frameworks = [x for x in self.entries if 'framework' in x['Keyword']] libraries = [x for x in self.entries if 'library' in x['Keyword']] # create games, tools, frameworks, libraries tocs title = 'Games' file = '_games.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format( title, file, title, len(games)) create_toc(title, file, games) title = 'Tools' file = '_tools.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format( title, file, title, len(tools)) create_toc(title, file, tools) title = 'Frameworks' file = '_frameworks.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format( title, file, title, len(frameworks)) create_toc(title, file, frameworks) title = 'Libraries' file = '_libraries.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format( title, file, title, len(libraries)) create_toc(title, file, libraries) # create by category categories_text = [] for keyword in c.recommended_keywords: filtered = [x for x in self.entries if keyword in x['Keyword']] title = keyword.capitalize() name = keyword.replace(' ', '-') file = '_{}.md'.format(name) categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format( title, file, name, len(filtered))) create_toc(title, file, filtered) categories_text.sort() tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text)) # create by platform platforms_text = [] for platform in c.valid_platforms: filtered = [ x for x in self.entries if platform in x.get('Platform', []) ] title = platform name = platform.lower() file = '_{}.md'.format(name) platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format( title, file, name, len(filtered))) create_toc(title, file, filtered) tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text)) # insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list) text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end # write to readme utils.write_text(readme_file, text) print('Readme and TOCs updated')
'https://git') and not repo.endswith('.git'): # we have them with .git on github/gitlab repo += '.git' entry += '- Code repository: {}\n'.format(repo) # code language (mandatory on our side) entry += '- Code language: {}\n'.format(', '.join(lang)) # code license entry += '- Code license: {}\n'.format(', '.join( osgc_entry['license'])) # code dependencies (if existing) if 'framework' in osgc_entry: frameworks = osgc_entry['framework'] if type(frameworks) == str: frameworks = [frameworks] entry += '- Code dependencies: {}\n'.format( ', '.join(frameworks)) # write info (if existing) if 'info' in osgc_entry: entry += '\n{}\n'.format(osgc_entry['info']) # write ## Building entry += '\n## Building\n' # finally write to file utils.write_text(target_file, entry) newly_created_entries += 1
def sourceforge_import(): """ :return: """ files = json.loads(utils.read_text(sf_entries_file)) all_developers = osg.read_developers() print(' {} developers read'.format(len(all_developers))) all_developers_changed = False # all exceptions that happen will be eaten (but will end the execution) try: # loop over each entry for index, file in enumerate(files): print(' process {}'.format(file)) # read entry entry = osg.read_entry(file) developers = entry.get('Developer', []) urls = [ x.value for x in entry['Home'] if x.startswith('https://sourceforge.net/projects/') ] entry_changed = False for url in urls: print(' sf project {}'.format(url)) if not url.endswith('/'): print('error: sf project does not end with slash') url += '/' # members url_members = 'https://sourceforge.net/p/' + url[ len(prefix):] + '_members/' response = requests.get(url_members) if response.status_code != 200: print('error: url {} not accessible, status {}'.format( url_members, response.status_code)) raise RuntimeError() soup = BeautifulSoup(response.text, 'html.parser') authors = soup.find( 'div', id='content_base').find('table').find_all('tr') authors = [author.find_all('td') for author in authors] authors = [ author[1].a['href'] for author in authors if len(author) == 3 ] for author in authors: # sometimes author already contains the full url, sometimes not url_author = 'https://sourceforge.net' + author if not author.startswith( 'http') else author response = requests.get(url_author) if response.status_code != 200 and author not in ( '/u/favorito/', ): print('error: url {} not accessible, status {}'.format( url_author, response.status_code)) raise RuntimeError() url_author = response.url # could be different now if 'auth/?return_to' in url_author or response.status_code != 200: # for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros) author_name = author[3:-1] nickname = author_name else: soup = BeautifulSoup(response.text, 'html.parser') author_name = soup.h1.get_text() author_name = SF_alias_list.get( author_name, author_name) # replace by alias if possible nickname = soup.find( 'dl', class_='personal-data').find('dd').get_text() nickname = nickname.replace('\n', '').strip() nickname += '@SF' # our indication of the platform to search for author_name = author_name.strip( ) # names can still have white spaces before or after if author_name in SF_ignore_list: continue # look author up in entry developers if author_name not in developers: print(' dev "{}" added to entry {}'.format( author_name, file)) entry['Developer'] = entry.get('Developer', []) + [ osg_parse.ValueWithComment(author_name) ] entry_changed = True developers = entry.get('Developer', []) # look author and SF nickname up in developers data base if author_name in all_developers: dev = all_developers[author_name] if not nickname in dev.get('Contact', []): print( ' existing dev "{}" added nickname ({}) to developer database' .format(author_name, nickname)) # check that name has not already @SF contact if any( x.endswith('@SF') for x in dev.get('Contact', [])): print('warning: already SF contact') all_developers[author_name]['Contact'] = dev.get( 'Contact', []) + [nickname] all_developers_changed = True else: print(' dev "{}" ({}) added to developer database'. format(author_name, nickname)) all_developers[author_name] = { 'Name': author_name, 'Contact': [nickname], 'Games': [entry['Title']] } all_developers_changed = True if entry_changed: # save entry osg.write_entry(entry) print(' entry updated') except: raise finally: # shorten file list utils.write_text(sf_entries_file, json.dumps(files[index:], indent=1)) # save entry osg.write_entry(entry) print(' entry updated') # maybe save all developers if all_developers_changed: # save all developers osg.write_developers(all_developers) print('developers database updated')
def update_html(self): """ Parses all entries, collects interesting info and stores it in a json file suitable for displaying with a dynamic table in a browser. """ if not self.entries: print('entries not yet loaded') return # make database out of it db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keyword', 'Source']} entries = [] for info in self.entries: # game & description entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['Title'], info['Home'][0], r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' + info['File']), textwrap.shorten(info.get('Note', ''), width=60, placeholder='..')] # download field = 'Download' if field in info and info[field]: entry.append('<a href="{}">Link</a>'.format(info[field][0])) else: entry.append('') # state (field state is essential) entry.append('{} / {}'.format(info['State'][0], 'inactive since {}'.format(osg.extract_inactive_year(info)) if osg.is_inactive(info) else 'active')) # keywords keywords = info['Keyword'] keywords = [x.value for x in keywords] entry.append(', '.join(keywords)) # source text = [] field = 'Code repository' if field in info and info[field]: text.append('<a href="{}">Source</a>'.format(info[field][0].value)) languages = info['Code language'] languages = [x.value for x in languages] text.append(', '.join(languages)) licenses = info['Code license'] licenses = [x.value for x in licenses] text.append(', '.join(licenses)) entry.append(' - '.join(text)) # append to entries entries.append(entry) # sort entries by game name entries.sort(key=lambda x: str.casefold(x[0])) db['data'] = entries # output text = json.dumps(db, indent=1) utils.write_text(c.json_db_file, text) print('HTML updated')
def test(): # loop over infos developers = '' try: i = 0 # active = False for entry in entries: # if entry['Name'] == 'Aleph One': # active = True # if not active: # continue # for testing purposes i += 1 if i > 40: break # print entry_name = '{} - {}'.format(entry['file'], entry['Name']) print(entry_name) content = '' entry_developer = entry.get('developer', []) # parse home home = entry['home'] # sourceforge project site prefix = 'https://sourceforge.net/projects/' url = [x for x in home if x.startswith(prefix)] if len(url) == 1: url = url[0] print(' sourceforge project site: {}'.format(url)) url = 'https://sourceforge.net/p/' + url[len(prefix ):] + '_members/' response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') authors = soup.find( 'div', id='content_base').find('table').find_all('tr') authors = [author.find_all('td') for author in authors] authors = [ author[1].a['href'] for author in authors if len(author) == 3 ] for author in authors: # sometimes author already contains the full url, sometimes not url = 'https://sourceforge.net' + author if not author.startswith( 'http') else author response = requests.get(url) url = response.url # could be different now if 'auth/?return_to' in url: # for some reason authorisation is forbidden author_name = author nickname = author else: soup = BeautifulSoup(response.text, 'html.parser') author_name = soup.h1.get_text() author_name = SF_alias_list.get( author_name, author_name) # replace by alias if possible nickname = soup.find( 'dl', class_='personal-data').find('dd').get_text() nickname = nickname.replace('\n', '').strip() dev = developer_info_lookup(author_name) in_devs = dev and 'contact' in dev and nickname + '@SF' in dev[ 'contact'] in_entry = author_name in entry_developer if in_devs and in_entry: continue # already existing in entry and devs content += ' {} : {}@SF'.format(author_name, nickname) if not in_devs: content += ' (not in devs)' if not in_entry: content += ' (not in entry)' content += '\n' # parse source repository repos = entry.get('code repository', []) # Github urls = [x for x in repos if x.startswith('https://github.com/')] urls = [] for url in urls: print(' github repo: {}'.format(url)) github_info = osg_github.retrieve_repo_info(url) for contributor in github_info['contributors']: name = contributor.name dev = developer_info_lookup(name) in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev[ 'contact'] in_entry = name in entry_developer if in_devs and in_entry: continue # already existing in entry and devs content += ' {}: {}@GH'.format(name, contributor.login) if contributor.blog: content += ' url: {}'.format(contributor.blog) if not in_devs: content += ' (not in devs)' if not in_entry: content += ' (not in entry)' content += '\n' if content: developers += '{}\n\n{}\n'.format(entry_name, content) except RuntimeError as e: raise e # pass finally: # store developer info utils.write_text( os.path.join(c.root_path, 'collected_developer_info.txt'), developers)
def export_json(infos): """ Parses all entries, collects interesting info and stores it in a json file suitable for displaying with a dynamic table in a browser. """ print('export to json for web display') # make database out of it db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keywords', 'Source']} entries = [] for info in infos: # game & description entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['name'], info['home'][0], r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' + info['file']), textwrap.shorten(info['description'], width=60, placeholder='..')] # download field = 'download' if field in info and info[field]: entry.append('<a href="{}">Link</a>'.format(info[field][0])) else: entry.append('') # state (field state is essential) entry.append('{} / {}'.format(info['state'][0], 'inactive since {}'.format(info['inactive']) if 'inactive' in info else 'active')) # keywords field = 'keywords' if field in info and info[field]: entry.append(', '.join(info[field])) else: entry.append('') # source text = [] field = 'code repository' if field in info and info[field]: text.append('<a href="{}">Source</a>'.format(info[field][0])) field = 'code language' if field in info and info[field]: text.append(', '.join(info[field])) field = 'code license' if field in info and info[field]: text.append(info[field][0]) entry.append(' - '.join(text)) # append to entries entries.append(entry) # sort entries by game name entries.sort(key=lambda x: str.casefold(x[0])) db['data'] = entries # output json_path = os.path.join(c.entries_path, os.path.pardir, 'docs', 'data.json') text = json.dumps(db, indent=1) utils.write_text(json_path, text)
def export_primary_code_repositories_json(): """ """ print('export to json for local repository update') primary_repos = {'git':[],'svn':[],'hg':[],'bzr':[]} unconsumed_entries = [] # for every entry filter those that are known git repositories (add additional repositories) field = 'code repository-raw' for info in infos: # if field 'Code repository' is available if field in info: consumed = False repos = info[field] if repos: # split at comma repos = repos.split(',') # keep the first and all others containing "(+)" additional_repos = [x for x in repos[1:] if "(+)" in x] repos = repos[0:1] repos.extend(additional_repos) for repo in repos: # remove parenthesis and strip of white spaces repo = re.sub(r'\([^)]*\)', '', repo) repo = repo.strip() url = git_repo(repo) if url: primary_repos['git'].append(url) consumed = True continue url = svn_repo(repo) if url: primary_repos['svn'].append(url) consumed = True continue url = hg_repo(repo) if url: primary_repos['hg'].append(url) consumed=True continue url = bzr_repo(repo) if url: primary_repos['bzr'].append(url) consumed=True continue if not consumed: unconsumed_entries.append([info['name'], info[field]]) # print output #if info['code repository']: # print('Entry "{}" unconsumed repo: {}'.format(info['name'], info[field])) #if not info['code repository']: # print('Entry "{}" unconsumed repo: {}'.format(info['name'], info[field])) # sort them alphabetically (and remove duplicates) for k, v in primary_repos.items(): primary_repos[k] = sorted(set(v)) # write them to tools/git json_path = os.path.join(c.root_path, 'tools', 'archives.json') text = json.dumps(primary_repos, indent=1) utils.write_text(json_path, text)
def fix_entries(): """ Fixes the keywords, code dependencies, build systems, .. entries, mostly by automatically sorting them. """ keyword_synonyms = {'RTS': ('real time', 'strategy'), 'realtime': 'real time'} # TODO also sort other fields, only read once and then do all, move to separate file print('fix entries') # keywords regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL) # iterate over all entries for entry, entry_path, content in osg.entry_iterator(): # match with regex matches = regex.findall(content) if len(matches) != 1: raise RuntimeError('Could not find keywords in entry "{}"'.format(entry)) match = matches[0] # get elements out, split, strip, delete duplicates elements = match[1].split(',') elements = [x.strip() for x in elements] elements = list(set(elements)) # get category out for keyword in osg.recommended_keywords: if keyword in elements: elements.remove(keyword) category = keyword break # special treatments here elements = [x if x != 'TBS' and x != 'TB' else 'turn based' for x in elements] elements = [x if x != 'RTS' else 'real time' for x in elements] elements = [x if x != 'MMO' else 'massive multiplayer online' for x in elements] elements = [x if x != 'MMO' else 'multiplayer online' for x in elements] elements = [x if x != 'SP' else 'singleplayer' for x in elements] elements = [x if x != 'MP' else 'multiplayer' for x in elements] elements = [x if x != 'engine' else 'game engine' for x in elements] elements = [x if x != 'rpg' else 'role playing' for x in elements] elements = [x if x != 'turn based' else 'turn-based' for x in elements] for keyword in ('browser', 'misc', 'tools'): if keyword in elements: elements.remove(keyword) # sort elements.sort(key=str.casefold) # add category elements.insert(0, category) keywords = '- Keywords: {}'.format(', '.join(elements)) new_content = match[0] + keywords + match[2] if new_content != content: # write again utils.write_text(entry_path, new_content) # code dependencies regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL) # iterate over all entries for entry, entry_path, content in osg.entry_iterator(): # match with regex matches = regex.findall(content) if not matches: # no code dependencies given continue match = matches[0] # get code dependencies out, split, strip, delete duplicates elements = match[1].split(',') elements = [x.strip() for x in elements] elements = list(set(elements)) # special treatments here elements = [x if x != 'Blender' else 'Blender game engine' for x in elements] elements = [x if x.lower() != 'libgdx' else 'libGDX' for x in elements] elements = [x if x != 'SDL 2' else 'SDL2' for x in elements] elements = [x if x.lower() != "ren'py" else "Ren'Py" for x in elements] # sort elements.sort(key=str.casefold) code_dependencies = '- Code dependencies: {}'.format(', '.join(elements)) new_content = match[0] + code_dependencies + match[2] if new_content != content: # write again utils.write_text(entry_path, new_content) # build systems regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL) # iterate over all entries for entry, entry_path, content in osg.entry_iterator(): # match with regex matches = regex.findall(content) if not matches: # no build system given continue match = matches[0] # get code dependencies out, split, strip, delete duplicates elements = match[1].split(',') elements = [x.strip() for x in elements] elements = list(set(elements)) # special treatments here # sort elements.sort(key=str.casefold) build_system = '- Build system: {}'.format(', '.join(elements)) new_content = match[0] + build_system + match[2] if new_content != content: # write again utils.write_text(entry_path, new_content)
def update_statistics(infos): """ Generates the statistics page. Should be done every time the entries change. """ print('update statistics') # start the page statistics_file = os.path.join(c.root_path, 'statistics.md') statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n' # total number number_entries = len(infos) rel = lambda x: x / number_entries * 100 # conversion to percent statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # State (beta, mature, inactive) statistics += '## State\n\n' number_state_beta = sum(1 for x in infos if 'beta' in x['state']) number_state_mature = sum(1 for x in infos if 'mature' in x['state']) number_inactive = sum(1 for x in infos if 'inactive' in x) statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive)) if number_inactive > 0: entries_inactive = [(x['name'], x['inactive']) for x in infos if 'inactive' in x] entries_inactive.sort(key=lambda x: str.casefold(x[0])) # first sort by name entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first) entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive] statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n' # Language statistics += '## Code Languages\n\n' field = 'code language' # those without language tag # TODO the language tag is now an essential field, this cannot happen anymore # number_no_language = sum(1 for x in infois if field not in x) # if number_no_language > 0: # statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language)) # entries_no_language = [x['name'] for x in infois if field not in x] # entries_no_language.sort() # statistics += ', '.join(entries_no_language) + '\n\n' # get all languages together languages = [] for info in infos: if field in info: languages.extend(info[field]) unique_languages = set(languages) unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages] unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first) unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_languages] statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n' # Licenses statistics += '## Code licenses\n\n' field = 'code license' # those without license number_no_license = sum(1 for x in infos if field not in x) if number_no_license > 0: statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license)) entries_no_license = [x['name'] for x in infos if field not in x] entries_no_license.sort() statistics += ', '.join(entries_no_license) + '\n\n' # get all licenses together licenses = [] for info in infos: if field in info: licenses.extend(info[field]) unique_licenses = set(licenses) unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses] unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_licenses] statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n' # Keywords statistics += '## Keywords\n\n' field = 'keywords' # get all keywords together keywords = [] for info in infos: if field in info: keywords.extend(info[field]) # ignore those starting with "inspired by" keywords = [x for x in keywords if not x.startswith('inspired by ')] unique_keywords = set(keywords) unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords] unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_keywords] statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n' # no download or play field statistics += '## Entries without download or play fields\n\n' entries = [] for info in infos: if 'download' not in info and 'play' not in info: entries.append(info['name']) entries.sort(key=str.casefold) statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n' # code hosted not on github, gitlab, bitbucket, launchpad, sourceforge popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net') statistics += '## Entries with a code repository not on a popular site\n\n' entries = [] field = 'code repository' for info in infos: if field in info: popular = False for repo in info[field]: for popular_repo in popular_code_repositories: if popular_repo in repo: popular = True break # if there were repositories, but none popular, add them to the list if not popular: entries.append(info['name']) # print(info[field]) entries.sort(key=str.casefold) statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n' # Code dependencies statistics += '## Code dependencies\n\n' field = 'code dependencies' # get all code dependencies together code_dependencies = [] entries_with_code_dependency = 0 for info in infos: if field in info: code_dependencies.extend(info[field]) entries_with_code_dependency += 1 statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency, rel(entries_with_code_dependency)) unique_code_dependencies = set(code_dependencies) unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in unique_code_dependencies] unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_code_dependencies] statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n' # Build systems: statistics += '## Build systems\n\n' field = 'build system' # get all build systems together build_systems = [] for info in infos: if field in info: build_systems.extend(info[field]) statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(rel(len(build_systems))) unique_build_systems = set(build_systems) unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems] unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_build_systems] statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(unique_build_systems) + '\n\n' # C, C++ projects without build system information c_cpp_project_without_build_system = [] for info in infos: if field not in info and ('C' in info['code language'] or 'C++' in info['code language']): c_cpp_project_without_build_system.append(info['name']) c_cpp_project_without_build_system.sort(key=str.casefold) statistics += '##### C and C++ projects without build system information ({})\n\n'.format(len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n' # C, C++ projects with build system information but without CMake as build system c_cpp_project_not_cmake = [] for info in infos: if field in info and 'CMake' in info[field] and ('C' in info['code language'] or 'C++' in info['code language']): c_cpp_project_not_cmake.append(info['name']) c_cpp_project_not_cmake.sort(key=str.casefold) statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n' # Platform statistics += '## Platform\n\n' field = 'platform' # get all platforms together platforms = [] for info in infos: if field in info: platforms.extend(info[field]) statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms))) unique_platforms = set(platforms) unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms] unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_platforms] statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n' # write to statistics file utils.write_text(statistics_file, statistics)
def parse_lgw_content(): # paths import_path = os.path.join(constants.root_path, 'tools', 'lgw-import') entries_file = os.path.join(import_path, '_lgw.json') # iterate over all imported files files = os.listdir(import_path) entries = [] for file in files: file = files[56] if file.startswith('_lgw'): continue text = utils.read_text(os.path.join(import_path, file)) # parse the html soup = BeautifulSoup(text, 'html.parser') title = soup.h1.get_text() print(title) entry = {'name': title} # get all external links ignored_external_links = ('libregamewiki.org', 'freegamedev.net', 'freegamer.blogspot.com', 'opengameart.org', 'gnu.org', 'creativecommons.org', 'freesound.org', 'freecode.com', 'freenode.net') links = [(x['href'], x.get_text()) for x in soup.find_all('a', href=True)] links = [x for x in links if x[0].startswith('http') and not any([y in x[0] for y in ignored_external_links])] entry['external links'] = links # get meta description description = soup.find('meta', attrs={"name":"description"}) entry['description'] = description['content'] # parse gameinfobox infos = soup.find('div', class_='gameinfobox') if not infos: print(' no gameinfobox') else: infos = infos.find_all('tr') for x in infos: if x.th and x.td: # row with header key = x.th.get_text() content = x.td.get_text() content = content.split(',') content = [x.strip() for x in content] entry[key] = content if not x.th and x.td: # row without header: contribute section x = x.find_all('li') x = [(x.a.string, x.a['href']) for x in x if x.a] for key, content in x: entry[key] = content # parse "for available as package in" tables = soup.find_all('table', class_='wikitable') tables = [table for table in tables if table.caption and table.caption.string.startswith('Available as package')] if len(tables) > 0: if len(tables) > 1: raise RuntimeError() table = tables[0] packages = table.find_all('tr') packages = [x.td.a['href'] for x in packages] entry['linux-packages'] = packages # categories categories = soup.find_all('div', id='mw-normal-catlinks') if not categories: print(' no categories') categories = [] else: if len(categories) > 1: raise RuntimeError() categories = categories[0] categories = categories.find_all('li') categories = [x.a.string for x in categories] if 'Games' not in categories: print(' "Games" not in categories') else: categories.remove('Games') # should be there # strip games at the end phrase = ' games' categories = [x[:-len(phrase)] if x.endswith(phrase) else x for x in categories] ignored_categories = ['Articles lacking reference', 'Stubs'] categories = [x for x in categories if x not in ignored_categories] entry['categories'] = categories entries.append(entry) # save entries text = json.dumps(entries, indent=1) utils.write_text(entries_file, text)
def clean_lgw_content(): # paths import_path = os.path.join(constants.root_path, 'tools', 'lgw-import') entries_file = os.path.join(import_path, '_lgw.json') cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json') # load entries text = utils.read_text(entries_file) entries = json.loads(text) # rename keys key_replacements = (('developer', ('Developer', 'Developers')), ('code license', ('Code license', 'Code licenses')), ('engine', ('Engine', 'Engines')), ('genre', ('Genre', 'Genres')), ('library', ('Library', 'Libraries')), ('assets license', ('Media license', 'Media licenses')), ('code language', ('P. language', 'P. languages')), ('home', ('Homepage',)), ('platform', ('Platforms', )), ('tracker', ('Bug/Feature Tracker', )), ('repo', ('Source Code', )), ('forum', ('Forum', )), ('chat', ('Chat', )), ('origin', ('Origin', )), ('dev home', ('Development Project', )), ('last active', ('Release date', ))) for index, entry in enumerate(entries): for new_key, old_keys in key_replacements: for key in old_keys: if key in entry: entry[new_key] = entry[key] del entry[key] break entries[index] = entry # ignore keys ignored_keys = ('origin', 'Latest\xa0release') for index, entry in enumerate(entries): for key in ignored_keys: if key in entry: del entry[key] entries[index] = entry # check for unique field names unique_fields = set() for entry in entries: unique_fields.update(entry.keys()) print('unique lgw fields: {}'.format(sorted(list(unique_fields)))) # which fields are mandatory mandatory_fields = unique_fields.copy() for entry in entries: remove_fields = [field for field in mandatory_fields if field not in entry] mandatory_fields -= set(remove_fields) print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields)))) # content replacements entries = remove_parenthized_content(entries, ('assets license', 'code language', 'code license', 'engine', 'genre', 'last active', 'library')) entries = remove_prefix_suffix(entries, ('code license', 'assets license'), ('"', 'GNU', ), ('"', '[3]', '[2]', '[1]', 'only')) entries = replace_content(entries, ('code license', 'assets license'), 'GPL', ('General Public License', )) entries = replace_content(entries, ('code license', 'assets license'), 'GPLv2', ('GPL v2', 'GPL version 2.0', 'GPL 2.0', 'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2')) entries = replace_content(entries, ('code license', 'assets license'), 'GPLv2+', ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later')) entries = replace_content(entries, ('code license', 'assets license'), 'GPLv3', ('GPL v3', 'GNU GPL v3', 'GPL 3')) entries = replace_content(entries, ('code license', 'assets license'), 'GPLv3+', ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later')) entries = replace_content(entries, ('code license', 'assets license'), 'Public domain', ('public domain', 'Public Domain')) entries = replace_content(entries, ('code license', 'assets license'), 'zlib', ('zlib/libpng license', 'Zlib License')) entries = replace_content(entries, ('code license', 'assets license'), 'BSD', ('Original BSD License', )) entries = replace_content(entries, ('code license', 'assets license'), 'CC-BY-SA-3.0', ('Creative Commons Attribution-ShareAlike 3.0 Unported License', 'CC-BY-SA 3.0', 'CC BY-SA 3.0')) entries = replace_content(entries, ('code license', 'assets license'), 'CC-BY-SA', ('CC BY-SA',)) entries = replace_content(entries, ('code license', 'assets license'), 'MIT', ('MIT License', 'MIT"')) entries = replace_content(entries, 'platform', 'macOS', ('Mac', )) entries = remove_prefix_suffix(entries, ('code language', 'developer'), (), ('[3]', '[2]', '[1]')) entries = ignore_content(entries, 'code language', ('HTML5', 'HTML', 'English', 'XML', 'WML')) entries = replace_content(entries, 'code language', 'Lua', ('lua', 'LUA')) entries = remove_prefix_suffix(entries, 'genre', (), ('game', 'games')) entries = lower_case_content(entries, 'genre') entries = replace_content(entries, 'genre', 'platform', ('platformer', )) entries = replace_content(entries, 'genre', 'role playing', ('rpg', )) entries = replace_content(entries, 'genre', 'first person, shooter', ('fps', )) entries = replace_content(entries, 'genre', 'real time, strategy', ('rts',)) entries = replace_content(entries, 'genre', 'turn based, strategy', ('tbs',)) entries = ignore_content(entries, 'categories', ('GPL', 'C++', 'C', 'ECMAScript', 'Python', 'Java', 'CC BY-SA', 'Lua', 'LGPL', 'CC-BY', 'BSD', 'MIT', 'Qt', 'SDL', 'OpenGL', 'Pygame', 'PD', 'GLUT', 'Haskell', 'Allegro', 'Ruby', 'Zlib/libpng', 'OpenAL', 'Perl', 'Free Pascal', 'LÖVE', 'HTML5', 'Id Tech 1')) entries = replace_content(entries, 'library', 'pygame', ('Pygame', )) entries = replace_content(entries, 'library', 'Qt', ('QT', )) entries = ignore_content(entries, 'library', ('C++', 'Lua', 'Mozilla Firefox')) entries = ignore_nonnumbers(entries, 'last active') entries = ignore_content(entries, 'last active', ('2019', )) entries = ignore_content(entries, 'platform', ('DOS', )) # list for every unique field # fields = sorted(list(unique_fields)) fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories')))) for field in fields: content = [entry[field] for entry in entries if field in entry] # flatten flat_content = [] for c in content: if isinstance(c, list): flat_content.extend(c) else: flat_content.append(c) statistics = utils.unique_elements_and_occurrences(flat_content) print('\n{}: {}'.format(field, ', '.join(statistics))) # save entries text = json.dumps(entries, indent=1) utils.write_text(cleaned_entries_file, text)
def github_import(): """ :return: """ private_properties = json.loads(utils.read_text(c.private_properties_file)) files = json.loads(utils.read_text(gh_entries_file)) all_developers = osg.read_developers() print(' {} developers read'.format(len(all_developers))) # all exceptions that happen will be eaten (but will end the execution) try: # loop over each entry for index, file in enumerate(files): print(' process {}'.format(file)) # read entry entry = osg.read_entry(file) code_repositories = entry['Code repository'] repos = [ x.value for x in code_repositories if x.startswith(prefix) ] repos[0] += ' @add' repos = [x for x in repos if '@add' in x] repos = [x.split(' ')[0] for x in repos] repos = [x for x in repos if x not in ignored_repos] for repo in repos: print(' GH repo {}'.format(repo)) info = osg_github.retrieve_repo_info( repo, private_properties['github-token']) new_comments = [] # is archived if info['archived']: if not osg.is_inactive(entry): print( 'warning: repo is archived but not inactive state??' ) # add archive to repo comment new_comments.append('@archived') # add created comment new_comments.append('@created {}'.format(info['created'].year)) # add stars new_comments.append('@stars {}'.format(info['stars'])) # add forks new_comments.append('@forks {}'.format(info['forks'])) # update comment for r in code_repositories: if r.value.startswith(repo): break comments = r.comment if comments: comments = comments.split(',') comments = [c.strip() for c in comments] comments = [c for c in comments if not c.startswith('@')] # delete old ones comments += new_comments else: comments = new_comments r.comment = ', '.join(comments) # language in languages language = info['language'] language = language_aliases.get(language, language) if language and language not in entry[ 'Code language'] and language not in ignored_languages: entry['Code language'].append( osg_parse.ValueWithComment(language)) print(' added to languages: {}'.format(language)) # contributors for contributor in info['contributors']: if contributor.type != 'User': continue if contributor.contributions < 4: continue # contributor.login/name/blog name = contributor.name if not name: name = contributor.login name = name_aliases.get(name, name) nickname = '{}@GH'.format(contributor.login) blog = contributor.blog if blog: blog = blog_alias[blog] if blog in blog_alias else blog if not blog.startswith('http'): blog = 'https://' + blog if blog in ignored_blogs: blog = None # look up author in entry developers if name not in entry.get('Developer', []): print(' dev "{}" added to entry {}'.format( name, file)) entry['Developer'] = entry.get('Developer', []) + [ osg_parse.ValueWithComment(name) ] # look up author in developers data base if name in all_developers: dev = all_developers[name] if not nickname in dev.get('Contact', []): print( ' existing dev "{}" added nickname ({}) to developer database' .format(name, nickname)) # check that name has not already @GH contact if any( x.endswith('@GH') for x in dev.get('Contact', [])): print('warning: already GH contact') dev['Contact'] = dev.get('Contact', []) + [nickname] if blog and blog not in dev.get('Home', []): dev['Home'] = dev.get('Home', []) + [blog] # TODO add to games entries! else: print(' dev "{}" ({}) added to developer database'. format(name, nickname)) all_developers[name] = { 'Name': name, 'Contact': [nickname], 'Games': [entry['Title']] } if blog: all_developers[name]['Home'] = [blog] entry['Code repository'] = code_repositories osg.write_entry(entry) except: raise finally: # shorten file list utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1)) osg.write_developers(all_developers) print('developers database updated')
def export_primary_code_repositories_json(infos): """ """ print('export to json for local repository update') primary_repos = {'git': [], 'svn': [], 'hg': []} unconsumed_entries = [] # for every entry filter those that are known git repositories (add additional repositories) field = 'code repository-raw' for info in infos: # if field 'Code repository' is available if field in info: consumed = False repos = info[field] if repos: # split at comma repos = repos.split(',') # keep the first and all others containing "(+)" additional_repos = [x for x in repos[1:] if "(+)" in x] repos = repos[0:1] repos.extend(additional_repos) for repo in repos: # remove parenthesis and strip of white spaces repo = re.sub(r'\([^)]*\)', '', repo) repo = repo.strip() url = git_repo(repo) if url: primary_repos['git'].append(url) consumed = True continue url = svn_repo(repo) if url: primary_repos['svn'].append(url) consumed = True continue url = hg_repo(repo) if url: primary_repos['hg'].append(url) consumed=True continue if not consumed: unconsumed_entries.append([info['name'], info[field]]) # print output if 'code repository' in info: print('Entry "{}" unconsumed repo: {}'.format(info['name'], info[field])) # sort them alphabetically (and remove duplicates) for k, v in primary_repos.items(): primary_repos[k] = sorted(set(v)) # statistics of gits git_repos = primary_repos['git'] print('{} Git repositories'.format(len(git_repos))) for domain in ('repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily', 'github.com', 'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'): print('{} on {}'.format(sum(1 if domain in x else 0 for x in git_repos), domain)) # write them to code/git json_path = os.path.join(c.root_path, 'code', 'archives.json') text = json.dumps(primary_repos, indent=1) utils.write_text(json_path, text)
for url in urls: print(' github repo: {}'.format(url)) github_info = osg_github.retrieve_repo_info(url) for contributor in github_info['contributors']: name = contributor.name dev = developer_info_lookup(name) in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev[ 'contact'] in_entry = name in entry_developer if in_devs and in_entry: continue # already existing in entry and devs content += ' {}: {}@GH'.format(name, contributor.login) if contributor.blog: content += ' url: {}'.format(contributor.blog) if not in_devs: content += ' (not in devs)' if not in_entry: content += ' (not in entry)' content += '\n' if content: developers += '{}\n\n{}\n'.format(entry_name, content) except RuntimeError as e: raise (e) # pass finally: # store developer info utils.write_text( os.path.join(c.root_path, 'collected_developer_info.txt'), developers)