def select_search_url(self, entity): search = entity.get_search_terms(entity.names[0]) results = wpapi.full_text_search(search) extra = "" if entity.type == 'politician': md = entity.politician_metadata.all()[0] extra += md.state + ", " + md.party print print '----------------------', search, "(%s)" % entity.type, extra print for i,result in enumerate(results): so_wide = [] for j in range(0, len(result['content']), 80): so_wide.append(result['content'][j:j + 80]) content = "\n ".join(so_wide) print i, result['title'] print " ", wpapi.article_url(result['title']) print " ", content print while True: selection = raw_input(search + " ?> ") if selection == 'n': return None try: i = int(selection) return wpapi.article_url(results[i]['title']) except ValueError: pass if selection.startswith("http"): return selection
def select_search_url(self, entity): search = entity.get_search_terms(entity.names[0]) results = wpapi.full_text_search(search) extra = "" if entity.type == 'politician': md = entity.politician_metadata.all()[0] extra += md.state + ", " + md.party print print '----------------------', search, "(%s)" % entity.type, extra print for i, result in enumerate(results): so_wide = [] for j in range(0, len(result['content']), 80): so_wide.append(result['content'][j:j + 80]) content = "\n ".join(so_wide) print i, result['title'] print " ", wpapi.article_url(result['title']) print " ", content print while True: selection = raw_input(search + " ?> ") if selection == 'n': return None try: i = int(selection) return wpapi.article_url(results[i]['title']) except ValueError: pass if selection.startswith("http"): return selection
def find_wikipedia_url(entity): """ Returns a tuple of (url, article excerpt, image url) for a given entity, or None if no matching article is found. """ empty_result = ['', '', ''] if entity.type in ['individual', 'industry']: return empty_result for ename in entity.names: # Search for exact title matches with redirects. Use for comparing # titles later -- we might exactly match a redirect title, but not # match the destination page at all. Full text search returns only # the destination pages, not the redirections. redirects = wpapi.title_search_redirects(ename.search_string()) # Full text search! results = wpapi.full_text_search(entity.get_search_terms(ename)) for result in results: article = wpapi.WikipediaArticle(result['title']) # Exclude special namespaced articles (e.g. User:, Template:, # etc.) if (article.namespace or article.title.startswith("List of") or article.is_disambiguation_page()): continue if article.title in redirects: name_comp = wpapi.WikipediaArticle(redirects.get( article.title)).name else: name_comp = article.name if entity.type == 'politician': if ename != PersonName(name_comp): continue if not article.is_politician(): continue if not article.is_american(): continue subject = article.get_subject() if subject and ename != PersonName(article.get_subject()): continue elif entity.type == 'organization': if ename.is_politician(): if ename.pname != PersonName(name_comp): continue if not article.is_politician(): continue if not article.is_american(): continue else: if ename != OrganizationName(name_comp): continue if article.is_person(): continue if ename.is_company() and not article.is_company(): continue wikipedia_url = wpapi.article_url(article.title) wikipedia_excerpt, image_url = wpapi.get_article_excerpt_and_image_url( article.title) return (wikipedia_url, wikipedia_excerpt, image_url) return empty_result