def check_for_wikipedia_links(self): """ Check the inspirations that haven't yet have a Wikipedia link in their Media field by searching for them on Wikipedia. """ if not self.inspirations: print('inspirations not yet loaded') return for inspiration in self.inspirations.values(): if 'Included' in inspiration: continue if 'Media' in inspiration and any( ('https://en.wikipedia.org/wiki/' in x for x in inspiration['Media'])): continue name = inspiration['Name'] # search in wikipedia results = osg_wikipedia.search(inspiration['Name']) # throw out all (disambiguation) pages results = [ r for r in results if not any(x in r for x in ('disambiguation', 'film')) ] # throw out those too dissimilar results = [ r for r in results if osg.name_similarity( str.casefold(inspiration['Name']), str.casefold(r)) > 0.6 ] # get pages for the remaining pages = osg_wikipedia.pages(results) # throw out those that are no video games pages = [ page for page in pages if any('video games' in category for category in page.categories) ] # sort by similarity to title and only keep highest pages.sort(key=lambda page: osg.name_similarity( str.casefold(name), str.casefold(page.title))) pages = pages[:min(1, len(pages))] # if there is still one left, use it if pages: url = pages[0].url inspiration['Media'] = inspiration.get('Media', []) + [url] print('{} : {}'.format(name, url)) print('finished checking for Wikipedia links')
def check_for_duplicates(self): if not self.developers: print('developers not yet loaded') return developer_names = list(self.developers.keys()) for index, name in enumerate(developer_names): for other_name in developer_names[index + 1:]: if osg.name_similarity(name, other_name) > 0.8: print(' {} - {} is similar'.format(name, other_name)) print('duplicates checked')
def check_for_duplicates(self): if not self.developers: print('developers not yet loaded') return start_time = time.process_time() developer_names = list(self.developers.keys()) for index, name in enumerate(developer_names): for other_name in developer_names[index + 1:]: if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.85: print(' {} - {} is similar'.format(name, other_name)) print('duplicates checked (took {:.1f}s)'.format(time.process_time()-start_time))
def check_for_duplicates(self): if not self.inspirations: print('inspirations not yet loaded') return inspiration_names = list(self.inspirations.keys()) for index, name in enumerate(inspiration_names): for other_name in inspiration_names[index + 1:]: if any((name.startswith(x) and other_name.startswith(x) for x in valid_duplicates)): continue if osg.name_similarity(name, other_name) > 0.8: print(' {} - {} is similar'.format(name, other_name)) print('duplicates checked')
def check_for_duplicates(self): if not self.inspirations: print('inspirations not yet loaded') return start_time = time.process_time() inspiration_names = list(self.inspirations.keys()) for index, name in enumerate(inspiration_names): for other_name in inspiration_names[index + 1:]: if any((name.startswith(x) and other_name.startswith(x) for x in valid_duplicates)): continue if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.9: print(' {} - {} is similar'.format(name, other_name)) print('duplicates checked took {:.1f}s'.format(time.process_time() - start_time))
def check_inconsistencies(self): """ :return: """ if not self.entries: print('entries not yet loaded') return # get all keywords and print similar keywords keywords = [] for entry in self.entries: keywords.extend(entry['Keyword']) if b'first\xe2\x80\x90person'.decode() in entry['Keyword']: print(entry['File']) keywords = [x.value for x in keywords] # reduce those starting with "multiplayer" keywords = [ x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords ] # check unique keywords unique_keywords = list(set(keywords)) unique_keywords_counts = [keywords.count(l) for l in unique_keywords] for index, name in enumerate(unique_keywords): for other_index in range(index + 1, len(unique_keywords)): other_name = unique_keywords[other_index] if osg.name_similarity(name, other_name) > 0.8: print(' Keywords {} ({}) - {} ({}) are similar'.format( name, unique_keywords_counts[index], other_name, unique_keywords_counts[other_index])) # get all names of frameworks and library also using osg.code_dependencies_aliases valid_dependencies = list( c.general_code_dependencies_without_entry.keys()) for entry in self.entries: if any((x in ('framework', 'library', 'game engine') for x in entry['Keyword'])): name = entry['Title'] if name in c.code_dependencies_aliases: valid_dependencies.extend( c.code_dependencies_aliases[name]) else: valid_dependencies.append(name) # get all referenced code dependencies referenced_dependencies = {} for entry in self.entries: deps = entry.get('Code dependency', []) for dependency in deps: dependency = dependency.value if dependency in referenced_dependencies: referenced_dependencies[dependency] += 1 else: referenced_dependencies[dependency] = 1 # delete those that are valid dependencies referenced_dependencies = [(k, v) for k, v in referenced_dependencies.items() if k not in valid_dependencies] # sort by number referenced_dependencies.sort(key=lambda x: x[1], reverse=True) # print out print('Code dependencies not included as entry') for dep in referenced_dependencies: print('{} ({})'.format(*dep)) # if there is the "Play" field, it should have "Web" as Platform for entry in self.entries: name = entry['File'] if 'Play' in entry: if not 'Platform' in entry: print( 'Entry "{}" has "Play" field but not "Platform" field, add it with "Web"' .format(name)) elif not 'Web' in entry['Platform']: print( 'Entry "{}" has "Play" field but not "Web" in "Platform" field' .format(name)) # javascript/typescript/php as language but not web as platform? ignored = ('0_ad.md', 'aussenposten.md', 'between.md', 'caesaria.md', 'cavepacker.md', 'citybound.md', 'gorillas.md', 'ika.md', 'inexor.md', 'maniadrive.md', 'oolite.md', 'freevikings.md', 'rolisteam.md', 'rpgboss.md', 'ruby-warrior.md', 'snelps.md', 'tenes_empanadas_graciela.md', 'thrive.md') for entry in self.entries: name = entry['File'] if name in ignored: continue if any(language in entry['Code language'] for language in ( 'JavaScript', 'TypeScript', 'PHP', 'CoffeeScript')) and ('Platform' not in entry or 'Web' not in entry['Platform']): print( 'Entry "{}" has language JavaScript/PHP but not Web as platform.' .format(name)) # space in name but not space as keyword ignored = ('burgerspace.md', 'crystal_space_3d_sdk.md', 'our_personal_space.md', 'space_harrier_clone.md') for entry in self.entries: name = entry['File'] if name in ignored: continue title = entry['Title'] if 'space' in title.lower() and not 'space' in entry['Keyword']: print( 'Entry "{}" has space in name but not as keyword.'.format( name)) # starts with j + capital letter but not java as language for entry in self.entries: name = entry['File'] title = entry['Title'] if title[0] == 'j' and title[1] == title[1].upper( ) and not 'Java' in entry['Code language']: print( 'Entry "{}" title starts with j? but Java is not a code language.' .format(name)) # search for duplicate keywords for entry in self.entries: keywords = entry['Keyword'] duplicates = [ keyword for keyword in keywords if keywords.count(keyword) > 1 ] if duplicates: print('"{}" has duplicate keywords: {}'.format( entry['File'], duplicates))
print('osgc-framework: {}'.format(unique_field_contents(osgc_entries, 'framework'))) print('osgc-content: {}'.format(unique_field_contents(osgc_entries, 'content'))) # just the names osgc_names = set([x['name'] for x in osgc_entries]) our_names = set([x['Title'] for x in our_entries]) common_names = osgc_names & our_names osgc_names -= common_names our_names -= common_names print('{} both, {} only osgameclones, {} only us'.format(len(common_names), len(osgc_names), len(our_names))) # find similar names among the rest if check_similar_names: print('look for similar names (theirs - ours)') for osgc_name in osgc_names: for our_name in our_names: if osg.name_similarity(osgc_name, our_name) > similarity_threshold: print(' {} - {}'.format(osgc_name, our_name)) newly_created_entries = 0 # iterate over their entries for osgc_entry in osgc_entries: osgc_name = osgc_entry['name'] is_included = False for our_entry in our_entries: our_name = our_entry['Title'] # find those that entries in osgameclones that are also in our database and compare them if osgc_name == our_name: is_included = True # a match, check the fields