def setup(): """ Sets up global wiki object for Wikipedia lookups. """ global wiki, imdb wiki = WikiApi() imdb = Imdb(anonymize=True)
def runSearchInput(self): searchFor = self.getPluginParamValue("SearchFor") locale = self.getPluginParamValue("Locale") limitResultsTo = self.getPluginParamValueAsInt("LimitResultsTo") includeContent = self.getPluginParamValueAsTrueOrFalse( "IncludeContent") includeHeading = self.getPluginParamValueAsTrueOrFalse( "IncludeHeading") includeSummary = self.getPluginParamValueAsTrueOrFalse( "IncludeSummary") includeURL = self.getPluginParamValueAsTrueOrFalse("IncludeURL") wiki = WikiApi({"locale": locale}) content = "" cnt = 0 for result in wiki.find(searchFor): article = wiki.get_article(result) if includeHeading: content = "{0}\n{1}".format(content, article.heading) if includeURL: content = "{0}\n{1}".format(content, article.url) if includeSummary: content = "{0}\n{1}".format(content, article.summary) if includeContent: content = "{0}\n{1}".format(content, article.content) content = "{0}\n\n".format(content) cnt += 1 if cnt >= limitResultsTo: break content = content.strip() self.setInputContent(content) return content
def test_cache_not_populated_when_disabled(self): wiki = WikiApi({'cache': False}) assert self._get_cache_size(wiki) == 0 wiki.find('Bob Marley') assert self._get_cache_size(wiki) == 0 shutil.rmtree(wiki.cache_dir, ignore_errors=True)
def set_up(self): # using an Italian-Emilian locale that is full of unicode symbols wiki = WikiApi({'locale': 'eml'}) result = wiki.find('Bulaggna')[0] return { 'wiki': wiki, 'result': result, }
def set_up(self): wiki = WikiApi() results = wiki.find('Bill Clinton') article = wiki.get_article(results[0]) return { 'wiki': wiki, 'results': results, 'article': article, }
def get_url(query, log_file): wiki = WikiApi() results = wiki.find(query) if len(results) == 0: sys.stderr.write("No wikipedia article found for '" + query + "'\n") else: article = wiki.get_article(results[0]) print article.url with open(log_file, 'a') as f: f.write(article.url + "\n")
def get_wiki_phrases(word): wiki = WikiApi() wiki = WikiApi({'locale': 'en'}) results = wiki.find(word) print results phrase = "" for i in range(min(4, len(results))): article = wiki.get_article(results[i]) #print article.content phrase = phrase + " " + article.content #print phrase rake_object = rake.Rake("SmartStoplist.txt", 4, 3, 10) #Now, we have a RAKE object that extracts keywords where: # Each word has at least 4 characters # Each phrase has at most 3 words # Each keyword appears in the text at least 4 times keywords = rake_object.run(phrase) return keywords[0:20]
def __init__(self, add_gloss_list, del_gloss_list, category, label): """ Initialize the class. """ self.add_phrases = get_phrases(add_gloss_list) self.del_phrases = get_phrases(del_gloss_list) self.category = category self.corpus_dir = CORPUS_DIR + '/' + label + '/wikipedia/' + category self.raw_dir = RAW_DATA_DIR + '/' + label + '/wikipedia/' + category self.wiki = WikiApi({}) self.visited_results = self.get_results(self.del_phrases) self.count = 0
def get_wikipedia_details(keyword): wiki = WikiApi() results = wiki.find(keyword) if len(results) > 0: article = wiki.get_article(results[0]) if not 'Disambig' in article.image: return { 'heading': article.heading, 'image': article.image, 'summary': article.summary, 'url': article.url } return None
def test_cache_populated(self): wiki = WikiApi({'cache': True, 'cache_dir': '/tmp/wikiapi-test'}) assert self._get_cache_size(wiki) == 0 # Make multiple calls to ensure no duplicate cache items created assert wiki.find('Bob Marley') == wiki.find('Bob Marley') assert self._get_cache_size(wiki) == 1 # Check cache keys are unique assert wiki.find('Tom Hanks') != wiki.find('Bob Marley') assert self._get_cache_size(wiki) == 2 shutil.rmtree(wiki.cache_dir, ignore_errors=True)
def wiki_search(query): wiki = WikiApi() wikiurls = [] lst = query.split(",") num = 10 / len(lst) # print num for i in lst: results = wiki.find(i) cnt = 0 for j in results: cnt = cnt + 1 article = wiki.get_article(j) wikiurls.append(article.url) if cnt == num: break return wikiurls
def get_full_name_from_wiki(name): wiki = WikiApi() results = wiki.find(name) if len(results) > 0: article = wiki.get_article(results[0]) new_name = article.summary new_name = new_name[:new_name.find('(')-1] if new_name.find(' refer ') != -1: if len(results) > 1: article = wiki.get_article(results[1]) new_name = article.summary new_name = new_name[:new_name.find('(') - 1] else: return None table = str.maketrans({key: None for key in string.punctuation + '\r\n'}) new_name = new_name.translate(table) if len(new_name) > 4 and len(new_name) < 50: return new_name else: return None else: return None
def setUp(self): """Set up all of the requirements for testing """ self.pos_lex = naivebayes.generate('sentiment/pos.txt', naivebayes.lexicon()) self.neg_lex = naivebayes.generate('sentiment/neg.txt', naivebayes.lexicon()) self.wiki = WikiApi() self.r = praw.Reddit(client_id='l-Gz5blkt7GCUg', client_secret='_xLEgNing89k6__sWItU1_j9aR8', user_agent='testscript by /u/pbexe') self.test_sentence = 'The cat sat on the mat. The dog however, did not!' self.test_sentence_tokenized = [[('The', 'DT'), ('cat', 'NN'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('mat', 'NN'), ('.', '.')], [('The', 'DT'), ('dog', 'NN'), ('however', 'RB'), (',', ','), ('did', 'VBD'), ('not', 'RB'), ('!', '.')]] self.test_sentence_with_entities = 'Dr Foster went to Glouster' self.test_sentence_with_entities_nodes = ['Dr Foster', 'Glouster'] self.story = Story(source='http://example.com/', content='This is a title') self.story.save() self.node1 = Node(name='Key word 1', date=timezone.now(), collectedFrom=self.story) self.node1.save() self.node2 = Node(name='Key word 2', date=timezone.now(), collectedFrom=self.story) self.node2.save() self.node3 = Node(name='Key word 3', date=timezone.now(), collectedFrom=self.story) self.node3.save()
def getWikiArticle(word, locale): wiki = WikiApi({ 'locale' : locale}) results = wiki.find(word) result = next(iter(results or []), None) return wiki.get_article(result) if result else None
def jarvis(data): first = data.split(" ") if first[0] == "locate" or first[0] == "location": import location return location.loco(first[1]) if (first[0] == "play" or first[0] == "search") and first[1] == "youtube": del (first[0]) del (first[0]) a = "+".join(first) b = " ".join(first) import urllib.request import urllib.parse import re query_string = urllib.parse.urlencode({"search_query": a}) html_content = urllib.request.urlopen( "http://www.youtube.com/results?" + query_string) search_results = re.findall(r'href=\"\/watch\?v=(.{11})', html_content.read().decode()) print("playing:" + a) return webbrowser.open("http://www.youtube.com/watch?v=" + search_results[0]) if first[0] == "google" or first[0] == "search": del (first[0]) a = "+".join(first) return webbrowser.open('https://www.google.co.in/search?q=' + a) if first[0] == "connect": del (first[0]) a = "".join(first) return webbrowser.open(a + ".com") if first[0] == "who": del (first[0]) a = "".join(first) from wikiapi import WikiApi wiki = WikiApi() wiki = WikiApi({'locale': 'en'}) results = wiki.find(a) article = wiki.get_article(results[0]) print(article.summary) return webbrowser.open(article.image) while (1): if data in wikipedia: wiki() break if data in status: cpustatus() break if data in welcome: speak("hi there") break if data in play: speak("ok sir") playsong() break if data in newfile: writefile() break if data in readfile: readfile() break if data in searchweb: speak("ok sir") search() break if data in time: speak(ctime()) break if "close notepad" in data: clsnotepad() break if "close video" in data: clsvlc() break if "close browser" in data: clsbrowser() break if data in display: log.display() break if data in end: com = "close" return com break if data in shutdownpc: shutdown() break if data in folders: directory() break if data in closeprogram: close() break else: print("I don't understand the command!! Try again") break
except Exception as e: nltk.download('stopwords') nltk.download('punkt') nltk.download('words') nltk.download('wordnet') nltk.download('averaged_perceptron_tagger') from nltk.corpus import stopwords from nltk.corpus import wordnet import sys import os from wikiapi import WikiApi import urllib2 import html2text wiki_ = WikiApi() common_ = set(nltk.corpus.words.words()) isNoun = lambda x: x[:2] == 'NN' def url_exists(url): ret = urllib2.urlopen(url) if ret.code == 200: return True return False def wiki_link(query): wikiLink = 'https://en.wikipedia.org/wiki/%s' % query if url_exists(wikiLink):
def setUp(self): # using an Italian-Emilian locale that is full of unicode symbols self.wiki = WikiApi({'locale': 'eml'}) self.res = self.wiki.find('Bulaggna')[0] self.article = None
# -*- coding: utf-8 -*- from wikiapi import WikiApi import unittest wiki = WikiApi({}) results = wiki.find('Bill Clinton') article = wiki.get_article(results[0]) # taking first search result class TestWiki(unittest.TestCase): def test_heading(self): self.assertIsNotNone(article.heading) def test_image(self): self.assertTrue(isinstance(article.image, str)) def test_summary(self): self.assertGreater(len(article.summary), 100) def test_content(self): self.assertGreater(len(article.content), 200) def test_references(self): self.assertTrue(isinstance(article.references, list)) def test_url(self): self.assertTrue(article.url, u"http://en.wikipedia.org/wiki/Bill_Clinton") def test_get_relevant_article(self): keywords = ['president', 'hilary']
from bs4 import BeautifulSoup import urllib2 from wikiapi import WikiApi wiki = WikiApi() wiki = WikiApi({'locale': 'en'}) # b = wiki.get_article('High Crusade') # print(b.url) #results = wiki.find('Barack Obama').content #print(results) def get_title_from_search(string): return wiki.find(string)[0] def get_url_from_search(string): try: article_title = wiki.find(string)[0] except IndexError: return False article_contents = wiki.get_article(article_title) return article_contents.url # print(get_url_from_search('Stranger in a Strange Land'))
import lxml.etree import urllib from wikiapi import WikiApi title = "2016 Summer Olympics" params = { "format":"xml", "action":"query", "prop":"revisions", "rvprop":"timestamp|comment|content" } params["titles"] = "API|%s" % urllib.quote(title.encode("utf8")) qs = "&".join("%s=%s" % (k, v) for k, v in params.items()) url = "http://en.wikipedia.org/w/api.php?%s" % qs tree = lxml.etree.parse(urllib.urlopen(url)) print (tree) revs = tree.xpath('//rev') all_result_xml = revs[0].text wikiapi = WikiApi({ 'locale' : 'en'}) index_i = wikiapi.getIndex_substring("{{Infobox",all_result_xml) index_f = wikiapi.getIndex_substring("{{"+title,all_result_xml) print (index_i," ",index_f) infobox_result = all_result_xml[index_i:index_f] a = infobox_result.split("| ") for b in a: print (b)
''' Translate the names in VGGFace2 dataset into English. You will need to manually install the following libararies: unidecode googletrans wikiapi author: Feng Wang (UESTC) ''' import os import csv import string import unidecode from googletrans import Translator translator = Translator() from wikiapi import WikiApi wiki = WikiApi() def is_number(uchar): return uchar >= u'0' and uchar<=u'9' def is_alphabet(uchar): return (uchar >= u'a' and uchar<=u'z') or (uchar >= u'A' and uchar<=u'Z') def check_english(name): flag = True for uchar in name: if (not is_alphabet(uchar)) and (not is_number(uchar)) and (uchar != u'\u0020') and (uchar != u'-') and (uchar != u'.'): flag = False return flag def non_english_character_count(name):
#!/usr/bin/env python #_*_coding:utf8_*_ import os, json, re, codecs, sys, argparse, collections from pprint import pprint from wikiapi import WikiApi # from nltk.corpus import stopwords from math import sqrt json_data = {} mxspath = os.environ.get('MXS_PATH') n = 0 list_path = [] wiki = WikiApi() wiki = WikiApi({'locale': 'fr'}) def cut_word(content): text = re.sub("[^a-zA-Z]", " ", content) words = text.lower().split() # stops = set(stopwords.words('french')) tags = [w for w in words] return (tags) def merge_tag(tag1=None, tag2=None): v1 = [] v2 = [] tag_dict1 = collections.Counter(tag1) tag_dict2 = collections.Counter(tag2) merged_tag = set()
#!/usr/bin/python3 # -*- coding: utf-8 -*- from wikiapi import WikiApi import requests, pprint # This is suitable for extracting content that is organized by pages under a title # This code requires the wiki-api python library created by Richard O'Dwyer of UK # https://github.com/richardasaurus/wiki-api wiki = WikiApi() wiki = WikiApi({'locale': 'ta'}) # to specify your locale, 'en' is default # Get the page text of the article with the given title def getArticleParagraphs(title): print(title) articleFull = wiki.get_article(title) fullText = articleFull.content article = "" paragraphs = fullText.split('\n\n') # print(paragraphs) # We want only whole paragraphs that end in a ".", "!", "?" or '"' not fragments for paragraph in paragraphs: if len(paragraph) > 30: end = paragraph[-1] if end == '.' or end == '!' or end == '?' or end == '"': article = article + "\n\n" + paragraph return article
#!/usr/bin/env python # -*- coding: utf-8 -*- from articleData import my_articles import requests from wikiapi import WikiApi wiki = WikiApi({'locale': 'es'}) def getURL(searchQuery): results = wiki.find(searchQuery) try: article = wiki.get_article(results[0]) except: article = "no article exists for: " + searchQuery try: url = article.url except: url = "no url exists for: " + searchQuery # try: # summary = article.summary # except: # summary print url # print summary
def __init__(self): self.classifier = classifier.Classifier() self.wiki = WikiApi() self.bad_urls = set( [p['url'] for p in self.classifier.non_accepted_pages])
def set_up(self): self.wiki = WikiApi() self.results = self.wiki.find('Bill Clinton') self.article = self.wiki.get_article(self.results[0])
def wiki_search(self, text): wiki = WikiApi() results = wiki.find(text) article = wiki.get_article(results[0]) return article