예제 #1
0
def setup():
    """
    Sets up global wiki object for Wikipedia lookups.
    """
    global wiki, imdb
    wiki = WikiApi()
    imdb = Imdb(anonymize=True)
예제 #2
0
 def runSearchInput(self):
     searchFor = self.getPluginParamValue("SearchFor")
     locale = self.getPluginParamValue("Locale")
     limitResultsTo = self.getPluginParamValueAsInt("LimitResultsTo")
     includeContent = self.getPluginParamValueAsTrueOrFalse(
         "IncludeContent")
     includeHeading = self.getPluginParamValueAsTrueOrFalse(
         "IncludeHeading")
     includeSummary = self.getPluginParamValueAsTrueOrFalse(
         "IncludeSummary")
     includeURL = self.getPluginParamValueAsTrueOrFalse("IncludeURL")
     wiki = WikiApi({"locale": locale})
     content = ""
     cnt = 0
     for result in wiki.find(searchFor):
         article = wiki.get_article(result)
         if includeHeading:
             content = "{0}\n{1}".format(content, article.heading)
         if includeURL:
             content = "{0}\n{1}".format(content, article.url)
         if includeSummary:
             content = "{0}\n{1}".format(content, article.summary)
         if includeContent:
             content = "{0}\n{1}".format(content, article.content)
         content = "{0}\n\n".format(content)
         cnt += 1
         if cnt >= limitResultsTo:
             break
     content = content.strip()
     self.setInputContent(content)
     return content
예제 #3
0
    def test_cache_not_populated_when_disabled(self):
        wiki = WikiApi({'cache': False})

        assert self._get_cache_size(wiki) == 0
        wiki.find('Bob Marley')
        assert self._get_cache_size(wiki) == 0
        shutil.rmtree(wiki.cache_dir, ignore_errors=True)
예제 #4
0
 def set_up(self):
     # using an Italian-Emilian locale that is full of unicode symbols
     wiki = WikiApi({'locale': 'eml'})
     result = wiki.find('Bulaggna')[0]
     return {
         'wiki': wiki,
         'result': result,
     }
예제 #5
0
 def set_up(self):
     wiki = WikiApi()
     results = wiki.find('Bill Clinton')
     article = wiki.get_article(results[0])
     return {
         'wiki': wiki,
         'results': results,
         'article': article,
     }
예제 #6
0
def get_url(query, log_file):
    wiki = WikiApi()
    results = wiki.find(query)
    if len(results) == 0:
        sys.stderr.write("No wikipedia article found for '" + query + "'\n")
    else:
        article = wiki.get_article(results[0])
        print article.url
        with open(log_file, 'a') as f:
            f.write(article.url + "\n")
예제 #7
0
def get_wiki_phrases(word):
    wiki = WikiApi()
    wiki = WikiApi({'locale': 'en'})
    results = wiki.find(word)
    print results
    phrase = ""
    for i in range(min(4, len(results))):
        article = wiki.get_article(results[i])
        #print article.content
        phrase = phrase + " " + article.content
        #print phrase
    rake_object = rake.Rake("SmartStoplist.txt", 4, 3, 10)

    #Now, we have a RAKE object that extracts keywords where:
    #   Each word has at least 4 characters
    #   Each phrase has at most 3 words
    #   Each keyword appears in the text at least 4 times
    keywords = rake_object.run(phrase)
    return keywords[0:20]
예제 #8
0
    def __init__(self, add_gloss_list, del_gloss_list, category, label):
        """
		Initialize the class.
		"""
        self.add_phrases = get_phrases(add_gloss_list)
        self.del_phrases = get_phrases(del_gloss_list)
        self.category = category
        self.corpus_dir = CORPUS_DIR + '/' + label + '/wikipedia/' + category
        self.raw_dir = RAW_DATA_DIR + '/' + label + '/wikipedia/' + category
        self.wiki = WikiApi({})
        self.visited_results = self.get_results(self.del_phrases)
        self.count = 0
예제 #9
0
def get_wikipedia_details(keyword):
    wiki = WikiApi()
    results = wiki.find(keyword)
    if len(results) > 0:
        article = wiki.get_article(results[0])
        if not 'Disambig' in article.image:
            return {
                'heading': article.heading,
                'image': article.image,
                'summary': article.summary,
                'url': article.url
            }
    return None
예제 #10
0
    def test_cache_populated(self):
        wiki = WikiApi({'cache': True, 'cache_dir': '/tmp/wikiapi-test'})

        assert self._get_cache_size(wiki) == 0
        # Make multiple calls to ensure no duplicate cache items created
        assert wiki.find('Bob Marley') == wiki.find('Bob Marley')
        assert self._get_cache_size(wiki) == 1

        # Check cache keys are unique
        assert wiki.find('Tom Hanks') != wiki.find('Bob Marley')

        assert self._get_cache_size(wiki) == 2
        shutil.rmtree(wiki.cache_dir, ignore_errors=True)
예제 #11
0
def wiki_search(query):
    wiki = WikiApi()
    wikiurls = []
    lst = query.split(",")
    num = 10 / len(lst)
    #	print num
    for i in lst:
        results = wiki.find(i)
        cnt = 0
        for j in results:
            cnt = cnt + 1
            article = wiki.get_article(j)
            wikiurls.append(article.url)
            if cnt == num:
                break
    return wikiurls
예제 #12
0
def get_full_name_from_wiki(name):
    wiki = WikiApi()
    results = wiki.find(name)
    if len(results) > 0:
        article = wiki.get_article(results[0])
        new_name = article.summary
        new_name = new_name[:new_name.find('(')-1]
        if new_name.find(' refer ') != -1:
            if len(results) > 1:
                article = wiki.get_article(results[1])
                new_name = article.summary
                new_name = new_name[:new_name.find('(') - 1]
            else:
                return None
        table = str.maketrans({key: None for key in string.punctuation + '\r\n'})
        new_name = new_name.translate(table)
        if len(new_name) > 4 and len(new_name) < 50:
            return new_name
        else:
            return None
    else:
        return None
예제 #13
0
    def setUp(self):
        """Set up all of the requirements for testing
        """

        self.pos_lex = naivebayes.generate('sentiment/pos.txt',
                                           naivebayes.lexicon())
        self.neg_lex = naivebayes.generate('sentiment/neg.txt',
                                           naivebayes.lexicon())
        self.wiki = WikiApi()
        self.r = praw.Reddit(client_id='l-Gz5blkt7GCUg',
                             client_secret='_xLEgNing89k6__sWItU1_j9aR8',
                             user_agent='testscript by /u/pbexe')
        self.test_sentence = 'The cat sat on the mat. The dog however, did not!'
        self.test_sentence_tokenized = [[('The', 'DT'), ('cat', 'NN'),
                                         ('sat', 'VBD'), ('on', 'IN'),
                                         ('the', 'DT'), ('mat', 'NN'),
                                         ('.', '.')],
                                        [('The', 'DT'), ('dog', 'NN'),
                                         ('however', 'RB'), (',', ','),
                                         ('did', 'VBD'), ('not', 'RB'),
                                         ('!', '.')]]
        self.test_sentence_with_entities = 'Dr Foster went to Glouster'
        self.test_sentence_with_entities_nodes = ['Dr Foster', 'Glouster']
        self.story = Story(source='http://example.com/',
                           content='This is a title')
        self.story.save()
        self.node1 = Node(name='Key word 1',
                          date=timezone.now(),
                          collectedFrom=self.story)
        self.node1.save()
        self.node2 = Node(name='Key word 2',
                          date=timezone.now(),
                          collectedFrom=self.story)
        self.node2.save()
        self.node3 = Node(name='Key word 3',
                          date=timezone.now(),
                          collectedFrom=self.story)
        self.node3.save()
예제 #14
0
def getWikiArticle(word, locale):
    wiki = WikiApi({ 'locale' : locale})
    results = wiki.find(word)
    result = next(iter(results or []), None)
    return wiki.get_article(result) if result else None
예제 #15
0
def jarvis(data):
    first = data.split(" ")
    if first[0] == "locate" or first[0] == "location":
        import location
        return location.loco(first[1])
    if (first[0] == "play" or first[0] == "search") and first[1] == "youtube":
        del (first[0])
        del (first[0])
        a = "+".join(first)
        b = " ".join(first)
        import urllib.request
        import urllib.parse
        import re

        query_string = urllib.parse.urlencode({"search_query": a})
        html_content = urllib.request.urlopen(
            "http://www.youtube.com/results?" + query_string)
        search_results = re.findall(r'href=\"\/watch\?v=(.{11})',
                                    html_content.read().decode())
        print("playing:" + a)
        return webbrowser.open("http://www.youtube.com/watch?v=" +
                               search_results[0])
    if first[0] == "google" or first[0] == "search":
        del (first[0])
        a = "+".join(first)
        return webbrowser.open('https://www.google.co.in/search?q=' + a)
    if first[0] == "connect":
        del (first[0])
        a = "".join(first)
        return webbrowser.open(a + ".com")
    if first[0] == "who":
        del (first[0])
        a = "".join(first)
        from wikiapi import WikiApi
        wiki = WikiApi()
        wiki = WikiApi({'locale': 'en'})
        results = wiki.find(a)
        article = wiki.get_article(results[0])
        print(article.summary)
        return webbrowser.open(article.image)

    while (1):
        if data in wikipedia:
            wiki()
            break
        if data in status:
            cpustatus()
            break
        if data in welcome:
            speak("hi there")
            break
        if data in play:
            speak("ok sir")
            playsong()
            break
        if data in newfile:
            writefile()
            break
        if data in readfile:
            readfile()
            break
        if data in searchweb:
            speak("ok sir")
            search()
            break
        if data in time:
            speak(ctime())
            break
        if "close notepad" in data:
            clsnotepad()
            break
        if "close video" in data:
            clsvlc()
            break
        if "close browser" in data:
            clsbrowser()
            break
        if data in display:
            log.display()
            break
        if data in end:
            com = "close"
            return com
            break
        if data in shutdownpc:
            shutdown()
            break
        if data in folders:
            directory()
            break
        if data in closeprogram:
            close()
            break
        else:
            print("I don't understand the command!! Try again")
            break
예제 #16
0
파일: wikify.py 프로젝트: dilawar/OldHippo
except Exception as e:
    nltk.download('stopwords')
    nltk.download('punkt')
    nltk.download('words')
    nltk.download('wordnet')
    nltk.download('averaged_perceptron_tagger')

from nltk.corpus import stopwords
from nltk.corpus import wordnet
import sys
import os
from wikiapi import WikiApi
import urllib2
import html2text

wiki_ = WikiApi()
common_ = set(nltk.corpus.words.words())

isNoun = lambda x: x[:2] == 'NN'


def url_exists(url):
    ret = urllib2.urlopen(url)
    if ret.code == 200:
        return True
    return False


def wiki_link(query):
    wikiLink = 'https://en.wikipedia.org/wiki/%s' % query
    if url_exists(wikiLink):
예제 #17
0
 def setUp(self):
     # using an Italian-Emilian locale that is full of unicode symbols
     self.wiki = WikiApi({'locale': 'eml'})
     self.res = self.wiki.find('Bulaggna')[0]
     self.article = None
예제 #18
0
# -*- coding: utf-8 -*-
from wikiapi import WikiApi
import unittest

wiki = WikiApi({})
results = wiki.find('Bill Clinton')
article = wiki.get_article(results[0])  # taking first search result


class TestWiki(unittest.TestCase):
    def test_heading(self):
        self.assertIsNotNone(article.heading)

    def test_image(self):
        self.assertTrue(isinstance(article.image, str))

    def test_summary(self):
        self.assertGreater(len(article.summary), 100)

    def test_content(self):
        self.assertGreater(len(article.content), 200)

    def test_references(self):
        self.assertTrue(isinstance(article.references, list))

    def test_url(self):
        self.assertTrue(article.url,
                        u"http://en.wikipedia.org/wiki/Bill_Clinton")

    def test_get_relevant_article(self):
        keywords = ['president', 'hilary']
예제 #19
0
from bs4 import BeautifulSoup
import urllib2

from wikiapi import WikiApi
wiki = WikiApi()
wiki = WikiApi({'locale': 'en'})

# b = wiki.get_article('High Crusade')

# print(b.url)

#results = wiki.find('Barack Obama').content

#print(results)


def get_title_from_search(string):
    return wiki.find(string)[0]


def get_url_from_search(string):
    try:
        article_title = wiki.find(string)[0]
    except IndexError:
        return False
    article_contents = wiki.get_article(article_title)
    return article_contents.url


# print(get_url_from_search('Stranger in a Strange Land'))
예제 #20
0
import lxml.etree
import urllib

from wikiapi import WikiApi

title = "2016 Summer Olympics"

params = { "format":"xml", "action":"query", "prop":"revisions", "rvprop":"timestamp|comment|content" }
params["titles"] = "API|%s" % urllib.quote(title.encode("utf8"))
qs = "&".join("%s=%s" % (k, v)  for k, v in params.items())
url = "http://en.wikipedia.org/w/api.php?%s" % qs
tree = lxml.etree.parse(urllib.urlopen(url))
print (tree)
revs = tree.xpath('//rev')

all_result_xml = revs[0].text

wikiapi = WikiApi({ 'locale' : 'en'})
index_i = wikiapi.getIndex_substring("{{Infobox",all_result_xml)
index_f = wikiapi.getIndex_substring("{{"+title,all_result_xml)
print (index_i," ",index_f)
infobox_result = all_result_xml[index_i:index_f]

a = infobox_result.split("| ")
for b in a:
    print (b)
예제 #21
0
'''
Translate the names in VGGFace2 dataset into English.
You will need to manually install the following libararies:
unidecode
googletrans
wikiapi
author: Feng Wang (UESTC)
'''
import os
import csv
import string
import unidecode
from googletrans import Translator
translator = Translator()
from wikiapi import WikiApi
wiki = WikiApi()

def is_number(uchar):
    return uchar >= u'0' and uchar<=u'9'

def is_alphabet(uchar):
    return (uchar >= u'a' and uchar<=u'z') or (uchar >= u'A' and uchar<=u'Z')

def check_english(name):
    flag = True
    for uchar in name:
        if (not is_alphabet(uchar)) and (not is_number(uchar)) and (uchar != u'\u0020') and (uchar != u'-') and (uchar != u'.'):
            flag = False
    return flag

def non_english_character_count(name):
예제 #22
0
#!/usr/bin/env python
#_*_coding:utf8_*_
import os, json, re, codecs, sys, argparse, collections
from pprint import pprint
from wikiapi import WikiApi
# from nltk.corpus import stopwords
from math import sqrt

json_data = {}
mxspath = os.environ.get('MXS_PATH')
n = 0
list_path = []

wiki = WikiApi()
wiki = WikiApi({'locale': 'fr'})


def cut_word(content):
    text = re.sub("[^a-zA-Z]", " ", content)
    words = text.lower().split()
    # stops = set(stopwords.words('french'))
    tags = [w for w in words]
    return (tags)


def merge_tag(tag1=None, tag2=None):
    v1 = []
    v2 = []
    tag_dict1 = collections.Counter(tag1)
    tag_dict2 = collections.Counter(tag2)
    merged_tag = set()
예제 #23
0
#!/usr/bin/python3
# -*- coding: utf-8 -*-

from wikiapi import WikiApi
import requests, pprint

# This is suitable for extracting content that is organized by pages under a title
# This code requires the wiki-api python library created by Richard O'Dwyer of UK
# https://github.com/richardasaurus/wiki-api

wiki = WikiApi()
wiki = WikiApi({'locale': 'ta'})  # to specify your locale, 'en' is default


# Get the page text of the article with the given title
def getArticleParagraphs(title):
    print(title)
    articleFull = wiki.get_article(title)
    fullText = articleFull.content

    article = ""
    paragraphs = fullText.split('\n\n')
    # print(paragraphs)
    # We want only whole paragraphs that end in a ".", "!", "?" or '"' not fragments
    for paragraph in paragraphs:
        if len(paragraph) > 30:
            end = paragraph[-1]
            if end == '.' or end == '!' or end == '?' or end == '"':
                article = article + "\n\n" + paragraph
    return article
예제 #24
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from articleData import my_articles
import requests
from wikiapi import WikiApi

wiki = WikiApi({'locale': 'es'})


def getURL(searchQuery):
    results = wiki.find(searchQuery)

    try:
        article = wiki.get_article(results[0])
    except:
        article = "no article exists for: " + searchQuery

    try:
        url = article.url
    except:
        url = "no url exists for: " + searchQuery

    # try:
    #     summary = article.summary
    # except:
    #     summary

    print url
    # print summary

예제 #25
0
 def __init__(self):
     self.classifier = classifier.Classifier()
     self.wiki = WikiApi()
     self.bad_urls = set(
         [p['url'] for p in self.classifier.non_accepted_pages])
예제 #26
0
 def set_up(self):
     self.wiki = WikiApi()
     self.results = self.wiki.find('Bill Clinton')
     self.article = self.wiki.get_article(self.results[0])
예제 #27
0
 def wiki_search(self, text):
     wiki = WikiApi()
     results = wiki.find(text)
     article = wiki.get_article(results[0])
     return article