Python WikiApi.get_article 예제들, wikiapi.WikiApi.get_article Python 예제들

예제 #1

0

파일 보기

파일: Wikipedia.py 프로젝트: johnlwhiteman/CoCoScatS

 def runSearchInput(self):
     searchFor = self.getPluginParamValue("SearchFor")
     locale = self.getPluginParamValue("Locale")
     limitResultsTo = self.getPluginParamValueAsInt("LimitResultsTo")
     includeContent = self.getPluginParamValueAsTrueOrFalse(
         "IncludeContent")
     includeHeading = self.getPluginParamValueAsTrueOrFalse(
         "IncludeHeading")
     includeSummary = self.getPluginParamValueAsTrueOrFalse(
         "IncludeSummary")
     includeURL = self.getPluginParamValueAsTrueOrFalse("IncludeURL")
     wiki = WikiApi({"locale": locale})
     content = ""
     cnt = 0
     for result in wiki.find(searchFor):
         article = wiki.get_article(result)
         if includeHeading:
             content = "{0}\n{1}".format(content, article.heading)
         if includeURL:
             content = "{0}\n{1}".format(content, article.url)
         if includeSummary:
             content = "{0}\n{1}".format(content, article.summary)
         if includeContent:
             content = "{0}\n{1}".format(content, article.content)
         content = "{0}\n\n".format(content)
         cnt += 1
         if cnt >= limitResultsTo:
             break
     content = content.strip()
     self.setInputContent(content)
     return content

예제 #2

0

파일 보기

파일: test_wikiapi.py 프로젝트: richardasaurus/wiki-api

 def set_up(self):
     wiki = WikiApi()
     results = wiki.find('Bill Clinton')
     article = wiki.get_article(results[0])
     return {
         'wiki': wiki,
         'results': results,
         'article': article,
     }

예제 #3

0

파일 보기

 def set_up(self):
     wiki = WikiApi()
     results = wiki.find('Bill Clinton')
     article = wiki.get_article(results[0])
     return {
         'wiki': wiki,
         'results': results,
         'article': article,
     }

예제 #4

0

파일 보기

파일: wikisearch.py 프로젝트: 070794/jedi_school3_batch2

def get_url(query, log_file):
  wiki = WikiApi()
  results = wiki.find(query)
  if len(results) == 0:
    sys.stderr.write("No wikipedia article found for '" + query + "'\n")
  else:
    article = wiki.get_article(results[0])
    print article.url
    with open(log_file, 'a') as f:
      f.write(article.url + "\n")

예제 #5

0

파일 보기

파일: wikisearch.py 프로젝트: witty123/jedi_school3_batch2

def get_url(query, log_file):
    wiki = WikiApi()
    results = wiki.find(query)
    if len(results) == 0:
        sys.stderr.write("No wikipedia article found for '" + query + "'\n")
    else:
        article = wiki.get_article(results[0])
        print article.url
        with open(log_file, 'a') as f:
            f.write(article.url + "\n")

예제 #6

0

파일 보기

파일: WikipediaBase.py 프로젝트: Willifme/Disce

    def wikiqueryresults(searchQuery):

        wiki = WikiApi({})

        wiki = WikiApi({ 'locale' : 'en' }) # Top specify your locale, 'en' is default

        wikiSearch = wiki.find(searchQuery)

        wikiArticle = wiki.get_article(wikiSearch[0])

        return wikiArticle.summary

예제 #7

0

파일 보기

파일: wikipedia.py 프로젝트: HarrisonGregg/Specialization

def wiki_api(options):
	wiki = WikiApi()
	wiki = WikiApi({ 'locale' : 'en'}) # to specify your locale, 'en' is default
	results = wiki.find(options['q'])
	for result in results:
		article = wiki.get_article(results)
		title = article.heading
		url = article.url

		print(url)
		link = Link(topic = options['topic'], title = title, url = url)
		link.save()

예제 #8

0

파일 보기

def get_wikipedia_details(keyword):
    wiki = WikiApi()
    results = wiki.find(keyword)
    if len(results) > 0:
        article = wiki.get_article(results[0])
        if not 'Disambig' in article.image:
            return {
                'heading': article.heading,
                'image': article.image,
                'summary': article.summary,
                'url': article.url
            }
    return None

예제 #9

0

파일 보기

파일: std_out.py 프로젝트: CherylZR/face_id2

def get_full_name_from_wiki(name):
    wiki = WikiApi()
    results = wiki.find(name)
    if len(results) > 0:
        article = wiki.get_article(results[0])
        new_name = article.summary
        new_name = new_name[:new_name.find('(')-1]
        if new_name.find(' refer ') != -1:
            if len(results) > 1:
                article = wiki.get_article(results[1])
                new_name = article.summary
                new_name = new_name[:new_name.find('(') - 1]
            else:
                return None
        table = str.maketrans({key: None for key in string.punctuation + '\r\n'})
        new_name = new_name.translate(table)
        if len(new_name) > 4 and len(new_name) < 50:
            return new_name
        else:
            return None
    else:
        return None

예제 #10

0

파일 보기

파일: test_wikiapi.py 프로젝트: richardARPANET/wiki-api

class TestUnicode:
    @pytest.fixture(autouse=True)
    def set_up(self):
        # using an Italian-Emilian locale that is full of unicode symbols
        self.wiki = WikiApi({'locale': 'eml'})
        self.result = self.wiki.find('Bulaggna')[0]

    def test_search(self):
        # this is urlencoded.
        assert self.result == 'Bul%C3%A5ggna'

    def test_article(self):
        # unicode errors will likely blow in your face here
        assert self.wiki.get_article(self.result) is not None

예제 #11

0

파일 보기

파일: tests.py 프로젝트: tedpark/wiki-api

class TestUnicode(unittest.TestCase):
    def setUp(self):
        # using an Italian-Emilian locale that is full of unicode symbols
        self.wiki = WikiApi({'locale': 'eml'})
        self.res = self.wiki.find('Bulagna')[0]
        self.article = None

    def test_search(self):
        # this is urlencoded.
        self.assertEqual(self.res, u'Bul%C3%A5ggna')

    def test_article(self):
        #unicode errors will likely blow in your face here
        self.assertIsNotNone(self.wiki.get_article(self.res))

예제 #12

0

파일 보기

파일: tests.py 프로젝트: speedydeletion/wiki-api

class TestUnicode(unittest.TestCase):
    def setUp(self):
        # using an Italian-Emilian locale that is full of unicode symbols
        self.wiki = WikiApi({'locale': 'eml'})
        self.res = self.wiki.find('Bulaggna')[0]
        self.article = None

    def test_search(self):
        # this is urlencoded.
        self.assertEqual(self.res, u'Bul%C3%A5ggna')

    def test_article(self):
        #unicode errors will likely blow in your face here
        self.assertIsNotNone(self.wiki.get_article(self.res))

예제 #13

0

파일 보기

def wiki_search(query):
    wiki = WikiApi()
    wikiurls = []
    lst = query.split(",")
    num = 10 / len(lst)
    #	print num
    for i in lst:
        results = wiki.find(i)
        cnt = 0
        for j in results:
            cnt = cnt + 1
            article = wiki.get_article(j)
            wikiurls.append(article.url)
            if cnt == num:
                break
    return wikiurls

예제 #14

0

파일 보기

파일: wikisearch.py 프로젝트: deepjahan/set-expansion

def wiki_search(query):
	wiki = WikiApi()
	wikiurls=[]
	lst=query.split(",")
	num = 10/len(lst)
#	print num
	for i in lst:
		results = wiki.find(i)
		cnt=0
		for j in results:
			cnt=cnt+1
			article = wiki.get_article(j)
			wikiurls.append(article.url)
			if cnt==num:
				break
	return wikiurls

예제 #15

0

파일 보기

def get_wiki_phrases(word):
    wiki = WikiApi()
    wiki = WikiApi({'locale': 'en'})
    results = wiki.find(word)
    print results
    phrase = ""
    for i in range(min(4, len(results))):
        article = wiki.get_article(results[i])
        #print article.content
        phrase = phrase + " " + article.content
        #print phrase
    rake_object = rake.Rake("SmartStoplist.txt", 4, 3, 10)

    #Now, we have a RAKE object that extracts keywords where:
    #   Each word has at least 4 characters
    #   Each phrase has at most 3 words
    #   Each keyword appears in the text at least 4 times
    keywords = rake_object.run(phrase)
    return keywords[0:20]

예제 #16

0

파일 보기

def getWikiArticle(word, locale):
    wiki = WikiApi({ 'locale' : locale})
    results = wiki.find(word)
    result = next(iter(results or []), None)
    return wiki.get_article(result) if result else None

예제 #17

0

파일 보기

파일: tests.py 프로젝트: tedpark/wiki-api

# -*- coding: utf-8 -*-
from wikiapi import WikiApi
import unittest

wiki = WikiApi({})
results = wiki.find('Bill Clinton')
article = wiki.get_article(results[0])  # taking first search result


class TestWiki(unittest.TestCase):
    def test_heading(self):
        self.assertIsNotNone(article.heading)

    def test_image(self):
        self.assertTrue(isinstance(article.image, str))

    def test_summary(self):
        self.assertGreater(len(article.summary), 100)

    def test_content(self):
        self.assertGreater(len(article.content), 200)

    def test_references(self):
        self.assertTrue(isinstance(article.references, list))

    def test_get_relevant_article(self):
        keywords = ['president', 'hilary']
        _article = wiki.get_relevant_article(results, keywords)
        self.assertTrue('Bill Clinton' in _article.heading)

    def test_get_relevant_article_no_result(self):

예제 #18

0

파일 보기

파일: tests.py 프로젝트: speedydeletion/wiki-api

# -*- coding: utf-8 -*-
from wikiapi import WikiApi
import unittest

wiki = WikiApi({})
results = wiki.find('Bill Clinton')
article = wiki.get_article(results[0])  # taking first search result


class TestWiki(unittest.TestCase):
    def test_heading(self):
        self.assertIsNotNone(article.heading)

    def test_image(self):
        self.assertTrue(isinstance(article.image, str))

    def test_summary(self):
        self.assertGreater(len(article.summary), 100)

    def test_content(self):
        self.assertGreater(len(article.content), 200)

    def test_references(self):
        self.assertTrue(isinstance(article.references, list))

    def test_url(self):
        self.assertTrue(article.url,
                        u"http://en.wikipedia.org/wiki/Bill_Clinton")

    def test_get_relevant_article(self):
        keywords = ['president', 'hilary']

예제 #19

0

파일 보기

from wikiapi import WikiApi

sub = "sachin tendulkar"
wiki = WikiApi()
wiki = WikiApi({'locale': 'en'})
page = wiki.get_article(sub)
print(page.content)

예제 #20

0

파일 보기

파일: wiki_getarticles.py 프로젝트: PalashMatey/AdvBigData

from wikiapi import WikiApi
wiki = WikiApi()
wiki = WikiApi({ 'locale' : 'en'})

keywords=[]

with open("Important_Names.txt","r") as f:
	for line in f:
		keywords.append(line)
f.close()

count=0
for word in keywords:
	count=count+1
	results = wiki.find(word.strip('\n'))
	if len(results)!=0:
		article = wiki.get_article(results[0])
		text=article.content.encode('utf-8')
		with open("Web"+str(count)+".txt","w") as f:
			f.write(text)
		f.close()
		print article.url

예제 #21

0

파일 보기

파일: wikipedia.py 프로젝트: manishc1/MGSeCor

class Wikipedia_Scanner(object):
	"""
	Class to Scann wikipedia articles.
	"""

	def __init__(self, add_gloss_list, del_gloss_list, category, label):
		"""
		Initialize the class.
		"""
		self.add_phrases = get_phrases(add_gloss_list)
		self.del_phrases = get_phrases(del_gloss_list)
		self.category = category
		self.corpus_dir = CORPUS_DIR + '/' + label + '/wikipedia/' + category
		self.raw_dir = RAW_DATA_DIR + '/' + label + '/wikipedia/' + category
		self.wiki = WikiApi({})
		self.visited_results = self.get_results(self.del_phrases)
		self.count = 0


	def get_results(self, phrases):
		"""
		Return dictionary of wiki results corresponding to phrases.
		"""
		visited_results = {}
		for phrase in phrases:
			results = self.wiki.find(phrase)
			for result in results:
				if (not visited_results.has_key(result)):
					visited_results[result] = True
		return visited_results


	def get_articles(self):
		"""
		Fetches articles and puts in data directory.
		"""
		for phrase in self.add_phrases:
			try:
				results = self.wiki.find(phrase)
				for result in results:
					if (not self.visited_results.has_key(result)):
						self.visited_results[result] = True

						article = self.wiki.get_article(result)
						entry_src = 'wikipedia_' + self.category
						entry_type = 'article'
						entry_id = 'wikipedia_' + result.replace(' ', '_').replace('/', '_')
						entry_title = article.heading
						entry_date = datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')
						entry_desc = clean(article.summary)

						if (''.join(entry_desc.split()) != ''):
							xml_string = bundle_xml(entry_src, entry_type, entry_id, entry_title, entry_date, entry_desc)
			
							write_string(self.corpus_dir + '/' + entry_id.lower() + '.xml', xml_string, False)
							write_string(self.raw_dir + '/' + entry_id.lower() + '.txt', entry_desc, False)
						
							self.count = self.count + 1
							if (self.count % 100 == 0):
								print 'Scanned ' + str(self.count) + ' wiki articles.'

			except Exception as e:
				print 'Wiki Api Error! [' + str(e) + ']'

예제 #22

0

파일 보기

파일: wikipedia.py 프로젝트: manishc1/DySeCor

class WikiGrabber(object):
    """
    Class to grab the wiki articles.
    """

    def __init__(self, filenames):
        """
        Initialize the WikiGrabber class.
        """
        self.glossary = Glossary(filenames)
        self.wiki = WikiApi({})


    def get_articles(self, dir_name):
        """
        Get wiki articles for all the phrases and convert to xml.
        """
        global visited_results
        step = 1000 + len(visited_results)
        try:
            for phrase, flag in self.glossary.phrases:
                print phrase
                results = self.wiki.find(phrase)
                for result in results:
                    if (result not in visited_results):
                        article = self.wiki.get_article(result)
                        self.article_to_xml(article, flag, dir_name)
                        visited_results.append(result)
                        if (len(visited_results) > step):
                            print phrase, len(visited_results)
                            step = step + 1000
        except:
            print phrase, len(visited_results)


    def article_to_xml(self, article, flag, dir_name):
        """
        Create a xml from the article.
        """
        try:
            docId = 'Wiki_' + datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')
            docType = 'Wiki'
            docSource = 'wikipedia'
            docDate = ''
            docTitle = article.heading
            docDesc = clean(article.summary)

            if (len(docDesc.split()) < WORD_LEN_THRESHOLD):
                return 

            if (flag and ('security' not in docDesc.lower())):
                return

            document = lb.E.Document(
                lb.E.Title(docTitle),
                lb.E.Date(docDate),
                lb.E.Description(docDesc),
                id=docId, type=docType, src=docSource)		
            doc = etree.tostring(document, pretty_print=True)

            xml_filename = dir_name + docId + '.xml'
            writeString(xml_filename, XML_HEAD + doc)
        except Exception as e:
            print e

예제 #23

0

파일 보기

파일: WeatherApp.py 프로젝트: rutvik1010/WeatherApp

def main():
    status = True
    pygame.mixer.music.play(-1)
    music_status = 1
#   Create a wikiapi instance
    wiki_status = 1
    wiki_instance = WikiApi()
    wiki_instance = WikiApi({'locale': 'en'})
    namespace = None

    index1 = 0
    data_list = []

#   Load weather data into lists and dictionaries
    weather_location = 0
    connector = yweather.Client()
    weather_id_ny = connector.fetch_woeid('New York')
    weather_data_ny = connector.fetch_weather(str(weather_id_ny), metric=True)
    data_dict_ny = {}
    data_dict_ny.update({'Current Temperature': weather_data_ny["condition"]["temp"], \
                    'Sunrise': weather_data_ny['astronomy']['sunrise'],\
                         'Sunset': weather_data_ny['astronomy']['sunset'],
                    'Max Temperature': (str(weather_data_ny['forecast'][0]['high']) + " Degrees C"), \
                    'Min Temperature': (str(weather_data_ny['forecast'][0]['low'] + " Degrees C")),
                    'Wind': (str(weather_data_ny['wind']['speed'] + " km/h")), \
                    'Condition': weather_data_ny['condition']['text']})
    keys_list_ny = data_dict_ny.keys()

    weather_id_buffalo = connector.fetch_woeid('Buffalo')
    weather_data_buffalo = connector.fetch_weather(str(weather_id_buffalo), metric=True)
    data_dict_buffalo = {}
    data_dict_buffalo.update({'Current Temperature': weather_data_buffalo["condition"]["temp"], \
                    'Sunrise': weather_data_buffalo['astronomy']['sunrise'],\
                              'Sunset': weather_data_buffalo['astronomy']['sunset'],
                    'Max Temperature': (str(weather_data_buffalo['forecast'][0]['high']) + " Degrees C"), \
                    'Min Temperature': (str(weather_data_buffalo['forecast'][0]['low'] + " Degrees C")),
                    'Wind': (str(weather_data_buffalo['wind']['speed'] + " km/h")), \
                    'Condition': weather_data_buffalo['condition']['text']})
    keys_list_buffalo = data_dict_buffalo.keys()

    weather_id_hyd = connector.fetch_woeid('Hyderabad')
    weather_data_hyd = connector.fetch_weather(str(weather_id_hyd), metric=True)
    data_dict_hyd = {}
    data_dict_hyd.update({'Current Temperature': weather_data_hyd["condition"]["temp"], \
                    'Sunrise': weather_data_hyd['astronomy']['sunrise'], \
                          'Sunset': weather_data_hyd['astronomy']['sunset'],
                    'Max Temperature': (str(weather_data_hyd['forecast'][0]['high']) + " Degrees C"), \
                    'Min Temperature': (str(weather_data_hyd['forecast'][0]['low'] + " Degrees C")),
                    'Wind': (str(weather_data_hyd['wind']['speed'] + " km/h")), \
                    'Condition': weather_data_hyd['condition']['text']})
    keys_list_hyd = data_dict_hyd.keys()

    while status:
            main_display.fill(black)
            pointer_location = pygame.mouse.get_pos()
            pointer_click = pygame.mouse.get_pressed()
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    
#   Music Button
                if 325 < pointer_location[0] < 405 and 20 < pointer_location[1] < 50:
                    if pointer_click[0] == 1:
                        wiki_status = 1
                if 700 < pointer_location[0] < 780 and 20 < pointer_location[1] < 50:
                    if pointer_click[0] == 1:
                        music_status = not music_status
                        if music_status == 0:
                            pygame.mixer.music.pause()
                        else:
                            pygame.mixer.music.unpause()
#   New York  Button Check
                if 20 < pointer_location[0] < 80 and 20 < pointer_location[1] < 50:
                    if pointer_click[0] == 1:
                        weather_location = 2
                    
#   Buffalo  Button Check
                if 100 < pointer_location[0] < 160 and 20 < pointer_location[1] < 50:
                    if pointer_click[0] == 1:
                        weather_location = 1
                    
#   Hyderabad  Button Check
                if 180 < pointer_location[0] < 240 and 20 < pointer_location[1] < 50:
                    if pointer_click[0] == 1:
                        weather_location = 0
            try:
                main_display.blit(weather_image, (0,0))
            except:
                pass
            
#   Data Display
            if weather_location == 0:
                data_display(110, data_dict_hyd['Current Temperature'], white, 80, 160)  # Temperature number
                data_display(20, "Deg C", white, 180, 130)                           # Degree
                data_display(15, keys_list_hyd[5] + " : " + data_dict_hyd['Condition'], white, 95, 260)  # Condition
                data_display(15, keys_list_hyd[1] + " : " + data_dict_hyd['Min Temperature'], white, 130, 320)
                data_display(15, keys_list_hyd[6] + " : " + data_dict_hyd['Max Temperature'], white, 130, 360)
                data_display(15, keys_list_hyd[4] + " : " + data_dict_hyd['Sunrise'], white, 95, 400)   # Sunrise
                data_display(15, keys_list_hyd[0] + " : " + data_dict_hyd['Sunset'], white, 95, 440)    # Sunset
                data_display(15, keys_list_hyd[3] + " : " + data_dict_hyd['Wind'], white, 95, 480)  # Wind Speed

            elif weather_location == 1:
                data_display(110, data_dict_buffalo['Current Temperature'], white, 80, 160)  # Temperature number
                data_display(20, "Deg C", white, 180, 130)                           # Degree
                data_display(15, keys_list_buffalo[5] + " : " + data_dict_buffalo['Condition'], white, 95, 260)
                data_display(15, keys_list_buffalo[1] + " : " + data_dict_buffalo['Min Temperature'], white, 130, 320)
                data_display(15, keys_list_buffalo[6] + " : " + data_dict_buffalo['Max Temperature'], white, 130, 360)
                data_display(15, keys_list_buffalo[4] + " : " + data_dict_buffalo['Sunrise'], white, 95, 400)
                data_display(15, keys_list_buffalo[0] + " : " + data_dict_buffalo['Sunset'], white, 95, 440)
                data_display(15, keys_list_buffalo[3] + " : " + data_dict_buffalo['Wind'], white, 95, 480)


            elif weather_location == 2:
                data_display(110, data_dict_ny['Current Temperature'], white, 80, 160)  # Temperature number
                data_display(20, "Deg C", white, 180, 130)                           # Degree
                data_display(15, keys_list_ny[5] + " : " + data_dict_ny['Condition'], white, 95, 260)  # Condition
                data_display(15, keys_list_ny[1] + " : " + data_dict_ny['Min Temperature'], white, 130, 320)
                data_display(15, keys_list_ny[6] + " : " + data_dict_ny['Max Temperature'], white, 130, 360)
                data_display(15, keys_list_ny[4] + " : " + data_dict_ny['Sunrise'], white, 95, 400)   # Sunrise
                data_display(15, keys_list_ny[0] + " : " + data_dict_ny['Sunset'], white, 95, 440)    # Sunset
                data_display(15, keys_list_ny[3] + " : " + data_dict_ny['Wind'], white, 95, 480)  # Wind Speed
                
#   Display Wiki Article
            if wiki_status == 1:
                del data_list[:]
                wiki_status = 0
                blahblah = True
                try:
                    url = 'http://en.wikipedia.org/wiki/Special:Random'
                    if namespace != None:
                        url += '/' + namespace
                    req = urllib2.Request(url, None, { 'User-Agent' : 'x'})
                    page = urllib2.urlopen(req).readlines()
                    wiki_draft1 = remove_tags(page[4])
                    wiki_title = wiki_draft1[:wiki_draft1.index('Wikipedia') - 2]
                    wiki_data_list = wiki_instance.find(wiki_title)
                    wiki_data = wiki_instance.get_article(wiki_data_list[0])
                    temp = endlinefunction(wiki_data.summary, data_list, 90)
                except (urllib2.HTTPError, urllib2.URLError):
                    print "Failed to get article"
                    raise
                    
#   Buttons and Division Display
            pygame.draw.rect(main_display, white, (300, 0, 5, 600))
            pygame.draw.rect(main_display, white, (300, 70, 500, 5))
            drawbutton(wood, 700, 20, 80, 30, 10, "Toggle Music", black)
            drawbutton(white, 20, 20, 60, 30, 10, "New York", black)
            drawbutton(white, 100, 20, 60, 30, 10, "Buffalo", black)
            drawbutton(white, 180, 20, 60, 30, 10, "Hyderabad", black)
            drawbutton(wood, 325, 20, 80, 30, 10, "Next Article", black)
            
#   Cursor Display
            data_display(15, wiki_data.heading, wood, 540, 130)
            y_cood = 150
            j = 25
            for i in range(0, len(data_list)):
                y_cood = y_cood + j
                data_display(10, data_list[i], black, 540, y_cood)
            clock.tick(100)
            pygame.display.flip()

예제 #24

0

파일 보기

파일: wikipedia.py 프로젝트: manishc1/MGSeCor

class Wikipedia_Scanner(object):
    """
	Class to Scann wikipedia articles.
	"""
    def __init__(self, add_gloss_list, del_gloss_list, category, label):
        """
		Initialize the class.
		"""
        self.add_phrases = get_phrases(add_gloss_list)
        self.del_phrases = get_phrases(del_gloss_list)
        self.category = category
        self.corpus_dir = CORPUS_DIR + '/' + label + '/wikipedia/' + category
        self.raw_dir = RAW_DATA_DIR + '/' + label + '/wikipedia/' + category
        self.wiki = WikiApi({})
        self.visited_results = self.get_results(self.del_phrases)
        self.count = 0

    def get_results(self, phrases):
        """
		Return dictionary of wiki results corresponding to phrases.
		"""
        visited_results = {}
        for phrase in phrases:
            results = self.wiki.find(phrase)
            for result in results:
                if (not visited_results.has_key(result)):
                    visited_results[result] = True
        return visited_results

    def get_articles(self):
        """
		Fetches articles and puts in data directory.
		"""
        for phrase in self.add_phrases:
            try:
                results = self.wiki.find(phrase)
                for result in results:
                    if (not self.visited_results.has_key(result)):
                        self.visited_results[result] = True

                        article = self.wiki.get_article(result)
                        entry_src = 'wikipedia_' + self.category
                        entry_type = 'article'
                        entry_id = 'wikipedia_' + result.replace(
                            ' ', '_').replace('/', '_')
                        entry_title = article.heading
                        entry_date = datetime.now().strftime(
                            '%Y-%m-%d_%H-%M-%S-%f')
                        entry_desc = clean(article.summary)

                        if (''.join(entry_desc.split()) != ''):
                            xml_string = bundle_xml(entry_src, entry_type,
                                                    entry_id, entry_title,
                                                    entry_date, entry_desc)

                            write_string(
                                self.corpus_dir + '/' + entry_id.lower() +
                                '.xml', xml_string, False)
                            write_string(
                                self.raw_dir + '/' + entry_id.lower() + '.txt',
                                entry_desc, False)

                            self.count = self.count + 1
                            if (self.count % 100 == 0):
                                print 'Scanned ' + str(
                                    self.count) + ' wiki articles.'

            except Exception as e:
                print 'Wiki Api Error! [' + str(e) + ']'

예제 #25

0

파일 보기

def jarvis(data):
    first = data.split(" ")
    if first[0] == "locate" or first[0] == "location":
        import location
        return location.loco(first[1])
    if (first[0] == "play" or first[0] == "search") and first[1] == "youtube":
        del (first[0])
        del (first[0])
        a = "+".join(first)
        b = " ".join(first)
        import urllib.request
        import urllib.parse
        import re

        query_string = urllib.parse.urlencode({"search_query": a})
        html_content = urllib.request.urlopen(
            "http://www.youtube.com/results?" + query_string)
        search_results = re.findall(r'href=\"\/watch\?v=(.{11})',
                                    html_content.read().decode())
        print("playing:" + a)
        return webbrowser.open("http://www.youtube.com/watch?v=" +
                               search_results[0])
    if first[0] == "google" or first[0] == "search":
        del (first[0])
        a = "+".join(first)
        return webbrowser.open('https://www.google.co.in/search?q=' + a)
    if first[0] == "connect":
        del (first[0])
        a = "".join(first)
        return webbrowser.open(a + ".com")
    if first[0] == "who":
        del (first[0])
        a = "".join(first)
        from wikiapi import WikiApi
        wiki = WikiApi()
        wiki = WikiApi({'locale': 'en'})
        results = wiki.find(a)
        article = wiki.get_article(results[0])
        print(article.summary)
        return webbrowser.open(article.image)

    while (1):
        if data in wikipedia:
            wiki()
            break
        if data in status:
            cpustatus()
            break
        if data in welcome:
            speak("hi there")
            break
        if data in play:
            speak("ok sir")
            playsong()
            break
        if data in newfile:
            writefile()
            break
        if data in readfile:
            readfile()
            break
        if data in searchweb:
            speak("ok sir")
            search()
            break
        if data in time:
            speak(ctime())
            break
        if "close notepad" in data:
            clsnotepad()
            break
        if "close video" in data:
            clsvlc()
            break
        if "close browser" in data:
            clsbrowser()
            break
        if data in display:
            log.display()
            break
        if data in end:
            com = "close"
            return com
            break
        if data in shutdownpc:
            shutdown()
            break
        if data in folders:
            directory()
            break
        if data in closeprogram:
            close()
            break
        else:
            print("I don't understand the command!! Try again")
            break

예제 #26

0

파일 보기

# "calculate" genders from Wikipedia articles
gender = []
seen = {}  # memoization: author -> gender
wiki = WikiApi()
for author in grouped.column("AUTHOR"):
    if author.lower() in seen:
        print(author, "already found previously")
        gender.append(seen[author.lower()])
        continue

    try:
        try:
            print("trying to find " + author + " in wikipedia")
            results = wiki.find(author)
            wikipedia_page = wiki.get_article(results[0]).url

        except Exception:
            # errors when article is not found; use google search
            # instead we try to limit number of google search queries
            # because google limits them for free accounts or something
            print("trying to find " + author + " in google")
            wikipedia_page = google_search(author + ' site: en.wikipedia.org',
                                           num=1)[0]['link']

        g = find_gender(wikipedia_page)

    except Exception:
        # TODO: Possibly search on google for the book title and author if still
        # not found, and find some other site that has pronouns on it, if there
        # are enough not-found cases. Also have to consider that wikipedia

예제 #27

0

파일 보기

 def wiki_search(self, text):
     wiki = WikiApi()
     results = wiki.find(text)
     article = wiki.get_article(results[0])
     return article

예제 #28

0

파일 보기

Program was writing basing on paper https://arxiv.org/pdf/1704.00051.pdf
For more detail [email protected]\n''')

c.inf_threshold = 0.7
while True:
    while True:
        print('What or who do you want to ask about? Example: Barak Obama')
        thing = input()
        results = wiki.find(thing)
        if len(results) > 0:
            print('Ok. I found few wiki pages about {}.'.format(thing))
            break
        else:
            print(
                'Can\'t find any wiki pages about {}. Try another one.'.format(
                    thing))

    article = wiki.get_article(results[0])
    context = article.content
    for question in questions:
        os.system('clear')
        print('Q: {}'.format(question))
        print('Search answers ...')
        answers, probs = tools.get_answer(context, question, model, c)
        print('Found {} answer(s):'.format(len(answers)))
        for i, (a, p) in enumerate(zip(answers, probs)):
            print(str(i + 1), a, ', probability = {0:.2f}'.format(p))
        print()
        print()
        time.sleep(5)

예제 #29

0

파일 보기

파일: test_wikiapi.py 프로젝트: richardARPANET/wiki-api

class TestWiki:
    @pytest.fixture(autouse=True)
    def set_up(self):
        self.wiki = WikiApi()
        self.results = self.wiki.find('Bill Clinton')
        self.article = self.wiki.get_article(self.results[0])

    def test_heading(self):
        assert self.article.heading == 'Bill Clinton'

    def test_image(self):
        assert_url_valid(url=self.article.image)

    def test_summary(self):
        results = self.wiki.find('Tom Hanks')
        article = self.wiki.get_article(results[0])

        assert 'Thomas' in article.summary
        assert 'Jeffrey' in article.summary
        assert 'Hanks' in article.summary

    def test_content(self):
        assert len(self.article.content) > 200

    def test_references(self):
        assert isinstance(self.article.references, list) is True

    def test_url(self):
        assert_url_valid(url=self.article.url)
        assert self.article.url == 'https://en.wikipedia.org/wiki/Bill_Clinton'

    def test_get_relevant_article(self):
        keywords = ['president', 'hilary']
        _article = self.wiki.get_relevant_article(self.results, keywords)

        assert 'Bill Clinton' in _article.heading
        assert len(_article.content) > 5000
        assert 'President Bill Clinton' in _article.content

    def test_get_relevant_article_no_result(self):
        keywords = ['hockey player']
        _article = self.wiki.get_relevant_article(self.results, keywords)
        assert _article is None

    def test__remove_ads_from_content(self):
        content = (
            'From Wikipedia, the free encyclopedia. \n\nLee Strasberg '
            '(November 17, 1901 2013 February 17, 1982) was an American '
            'actor, director and acting teacher.\n'
            'Today, Ellen Burstyn, Al Pacino, and Harvey Keitel lead this '
            'nonprofit studio dedicated to the development of actors, '
            'playwrights, and directors.\n\nDescription above from the '
            'Wikipedia article\xa0Lee Strasberg,\xa0licensed under CC-BY-SA, '
            'full list of contributors on Wikipedia.')

        result_content = self.wiki._remove_ads_from_content(content)

        expected_content = (
            ' \n\nLee Strasberg '
            '(November 17, 1901 2013 February 17, 1982) was an American '
            'actor, director and acting teacher.\n'
            'Today, Ellen Burstyn, Al Pacino, and Harvey Keitel lead this '
            'nonprofit studio dedicated to the development of actors, '
            'playwrights, and directors.')
        assert expected_content == result_content

    @pytest.mark.parametrize(
        'url, expected_tables',
        [
            (
                'https://en.wikipedia.org/wiki/World_population',
                [
                    'Population by continent',
                    '10 most populous countries',
                    '10 most densely populated countries',
                    'Countries ranking highly in both total population and '
                    'population density',
                ],
            ),
            (
                'https://en.wikipedia.org/wiki/List_of_countries_and_'
                'dependencies_by_population',
                ['Sovereign states and dependencies by population'],
            ),
            (
                'https://en.wikipedia.org/wiki/Influenza',
                [],
            ),
            ('https://en.wikipedia.org/wiki/Germany', ['Constituent states']),
            (
                'https://en.wikipedia.org/wiki/Chess_Classic',
                [
                    'Chess Classic Championship',
                    # 'Rapid Chess Open',
                    # 'Chess960 Rapid chess World Championship',
                    'FiNet Open Chess960',
                    # 'Chess960 Computer World Championship',
                ],
            ),
            (
                'https://en.wikipedia.org/wiki/List_of_missions_to_the_Moon',
                ['Missions by date'],
            ),
            (
                'https://en.wikipedia.org/wiki/'
                'List_of_people_who_have_walked_on_the_Moon',
                ['Moonwalkers'],
            )
        ],
    )
    def test_get_tables_returns_expected_keys(self, url, expected_tables):
        tables = self.wiki.get_tables(url=url)

        assert list(tables.keys()) == expected_tables

    def test_get_tables(self, mocker):
        url = ('https://en.wikipedia.org/wiki/'
               'COVID-19_pandemic_by_country_and_territory')

        tables = self.wiki.get_tables(url=url)

        assert tables
        assert isinstance(tables, dict)
        assert tuple(tables.keys()) == (
            'COVID-19 pandemic by location 20 September 2020',
            'COVID-19 cases and deaths by region, '
            'in absolute figures and per million '
            'inhabitants as of 5 September 2020',
            'First COVID-19 cases by country or territory',
            'States with no confirmed COVID-19 cases',
            'Partially recognized states with no confirmed cases',
            'Dependencies with no confirmed cases',
        )
        assert tables['Dependencies with no confirmed cases'].T.to_dict() == {
            0: {
                'Rank': 1,
                'Territory': 'American Samoa',
                'Population': 56700,
                'Status': 'Unincorporated territory',
                'Country': 'United States',
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            1: {
                'Rank': 2,
                'Territory': 'Cook Islands',
                'Population': 15200,
                'Status': 'Associated state',
                'Country': 'New Zealand',
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            2: {
                'Rank': 3,
                'Territory': 'Wallis and Futuna',
                'Population': 11700,
                'Status': 'Overseas collectivity',
                'Country': 'France',
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            3: {
                'Rank': 4,
                'Territory': 'Saint Helena, Ascension and Tristan da Cunha',
                'Population': 5633,
                'Status': 'Overseas territory',
                'Country': 'United Kingdom',
                'Continent': 'Africa',
                'Ref.': mocker.ANY,
            },
            4: {
                'Rank': 5,
                'Territory': 'Svalbard',
                'Population': 2667,
                'Status': 'Unincorporated area',
                'Country': 'Norway',
                'Continent': 'Europe',
                'Ref.': mocker.ANY,
            },
            5: {
                'Rank': 6,
                'Territory': 'Christmas Island',
                'Population': 1955,
                'Status': 'External territory',
                'Country': 'Australia',
                'Continent': 'Asia',
                'Ref.': mocker.ANY,
            },
            6: {
                'Rank': 7,
                'Territory': 'Norfolk Island',
                'Population': 1735,
                'Status': 'External territory',
                'Country': 'Australia',
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            7: {
                'Rank': 8,
                'Territory': 'Niue',
                'Population': 1520,
                'Status': 'Associated state',
                'Country': 'New Zealand',
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            8: {
                'Rank': 9,
                'Territory': 'Tokelau',
                'Population': 1400,
                'Status': 'Dependent territory',
                'Country': 'New Zealand',
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            9: {
                'Rank': 10,
                'Territory': 'Cocos (Keeling) Islands',
                'Population': 555,
                'Status': 'External territory',
                'Country': 'Australia',
                'Continent': 'Asia',
                'Ref.': mocker.ANY,
            },
            10: {
                'Rank': 11,
                'Territory': 'Pitcairn Islands',
                'Population': 50,
                'Status': 'Overseas territory',
                'Country': 'United Kingdom',
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
        }
        assert tables['States with no confirmed COVID-19 cases'].T.to_dict(
        ) == {
            0: {
                'Rank': 1,
                'Country': 'North Korea[a]',
                'Population': 25778816,
                'Continent': 'Asia',
                'Ref.': mocker.ANY,
            },
            1: {
                'Rank': 2,
                'Country': 'Turkmenistan[a]',
                'Population': 6031200,
                'Continent': 'Asia',
                'Ref.': mocker.ANY,
            },
            2: {
                'Rank': 3,
                'Country': 'Solomon Islands',
                'Population': 686884,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            3: {
                'Rank': 4,
                'Country': 'Vanuatu',
                'Population': 307145,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            4: {
                'Rank': 5,
                'Country': 'Samoa',
                'Population': 198413,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            5: {
                'Rank': 6,
                'Country': 'Kiribati',
                'Population': 119451,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            6: {
                'Rank': 7,
                'Country': 'Micronesia',
                'Population': 115030,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            7: {
                'Rank': 8,
                'Country': 'Tonga',
                'Population': 105695,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            8: {
                'Rank': 9,
                'Country': 'Marshall Islands',
                'Population': 59190,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            9: {
                'Rank': 10,
                'Country': 'Palau',
                'Population': 18094,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            10: {
                'Rank': 11,
                'Country': 'Tuvalu',
                'Population': 11793,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
            11: {
                'Rank': 12,
                'Country': 'Nauru',
                'Population': 10823,
                'Continent': 'Oceania',
                'Ref.': mocker.ANY,
            },
        }