Пример #1
0
def fetch_wikidata(title: str) -> Dict:

    api_data = {}
    wikipedia = MediaWiki()

    try:
        page_data = wikipedia.page(title)
    except Exception as e:
        return {'message': '[wikipedia] error getting wikidata'}

    pool = ThreadPoolExecutor(max_workers=6)

    poster = pool.submit(lambda :page_data.logos)
    content = pool.submit(lambda :page_data.sections)
    categories = pool.submit(lambda :page_data.categories)
    images = pool.submit(lambda :page_data.images)
    summary = pool.submit(lambda :page_data.summary)
    title_ = pool.submit(lambda :page_data.title)

    api_data['poster'] = poster.result()
    api_data['contents'] = content.result()
    api_data['categories'] = categories.result()
    api_data['images'] = images.result()
    api_data['summary'] = summary.result()
    api_data['title'] = title_.result()
    
    return api_data
Пример #2
0
 def __init__(self, url='https://{lang}.wikipedia.org/w/api.php', lang='en',
              timeout=None, rate_limit=False,
              rate_limit_wait=timedelta(milliseconds=50)):
     ''' overloaded init '''
     MediaWiki.__init__(self, url=url, lang=lang, timeout=timeout,
                        rate_limit=rate_limit,
                        rate_limit_wait=rate_limit_wait)
Пример #3
0
    def __init__(self, coord, route):
        """ Function that instanciate a WikimediaApi object """

        self.lat = str(coord['lat']) if coord else ""
        self.lng = str(coord['lng']) if coord else ""
        self.route = route
        self.wikipedia = MediaWiki(lang=u'fr')
Пример #4
0
def keywordExtraction():                                    # Extract the keyword from user's input

    while True:
        keyword_sentence = raw_input()

        if " is " in keyword_sentence:                       # for questions like "Who is - ?"
            keyword_sentences = keyword_sentence.split("is ")
        elif " are " in keyword_sentence:                    # for questions like "Who are the - ?"
            keyword_sentences = keyword_sentence.split("are ")
        elif " about " in keyword_sentence:                  # for questions like "What do you know about - ?"
            keyword_sentences = keyword_sentence.split("about ")
        elif " in " in keyword_sentence:                     # for sentences like "I'm interested in - "
            keyword_sentences = keyword_sentence.split("in ")
        elif " when " in keyword_sentence:                   # for sentences like "I want to know when - "
            keyword_sentences = keyword_sentence.split("when ")
        elif " where " in keyword_sentence:                  # for sentences like "I want to know where - "
            keyword_sentences = keyword_sentence.split("where ")
        else:                                               # if it is not one of the patterned questions
            try:                                            # check if the question has a page on wikipedia (for example if the user inputs only the keyword)
                wikipedia_mediawiki = MediaWiki()
                wikiPage = wikipedia_mediawiki.page(keyword_sentence, auto_suggest=False)       # check without auto-suggest
                return [keyword_sentence, False]            # False = auto-suggest OFF
            except:
                try:
                    wikiPage = wikipedia_mediawiki.page(keyword_sentence)                       # check with auto-suggest
                    return [keyword_sentence, True]         # True = auto-suggest ON
                except:
                    print("I'm sorry the information you want are not available on wikipedia! Try with something else!")
                    log("keywordEX,null")
                    continue

        keyword_sentences = keyword_sentences[1].split("?")
        [page, auto_suggest] = checkWiki(keyword_sentences[0])
        if page:           # if it exists a wikipedia page about the keyword
            return [keyword_sentences[0], auto_suggest]
Пример #5
0
class WikiProvider(LookupProvider):
    '''Concrete provider which provides web results from Wikipedia.
    '''
    def __init__(self):
        '''Initialize WikiProvider with a MediaWiki instance.
        '''
        self._wiki = MediaWiki(
            user_agent="word_tools (https://github.com/ncdulo/word_tools")
        LookupProvider.__init__(self)

    def lookup(self, word, limit=0):
        '''Yield str results for `word` up to `limit`. When `limit <= 0`,
        default to `limit = 3`.
        '''
        # Default to a limit of three results. Once the re-write of CLI
        # is complete, this should be updated, and likely removed
        if limit <= 0:
            limit = 3

        try:
            for result in self._wiki.opensearch(word, results=limit):
                title, _, url = result
                summary = self._wiki.page(title).summarize(chars=200)
                output = title + ' (' + url + ')\n' + summary
                yield output
        except exceptions.DisambiguationError as e:
            print('''Search term disambiguous. There are some issues in the way
results are returned. Wikipedia suggests the following page
names. These may not be correct. This is a known issue.
                ''')
            print(e)
Пример #6
0
class WikiScratcher:
    def __init__(self, category):
        self.wikipedia = MediaWiki(url='https://en.wikipedia.org/w/api.php',
                                   user_agent='wiki-data-loader',
                                   lang='en')
        self.category = category

    # returns {pagename: {sectionname: section}, ....}
    def get_sections(self, num_pages):
        res = {}
        page_titles = self.wikipedia.categorymembers(self.category,
                                                     results=num_pages,
                                                     subcategories=False)
        if (len(page_titles) < num_pages):
            print('Only ' + str(len(page_titles)) + ' pages found !!!')
        for p_title in page_titles:
            res[p_title] = {}
            p = self.wikipedia.page(p_title)
            # add the summary
            res[p_title]['summary'] = p.summary
            # add all other sections
            section_titles = p.sections
            for s_title in section_titles:
                # ignore sections like 'references' or 'see also'
                if (self._ignore_section(s_title)):
                    continue
                section_text = p.section(s_title)
                # ignore empty sections which are in fact most likely subheaders
                if len(section_text) > 0:
                    res[p_title][s_title] = section_text
        return res
Пример #7
0
    def response(self, txObject):

        super(WikiLayer, self).response(txObject)

        if self.check_cmd(COMMANDS["WIKI"]["name"], txObject):

            key_value = parse_cmd_value(txObject[PROCESSED_INPUT])

            respose_value = None
            try:

                wikipedia = MediaWiki()

                try:

                    respose_value = wikipedia.page(key_value).summary

                except DisambiguationError as e:

                    respose_value = str(e)

                txObject[PROCESSED_INPUT] = respose_value
                STOPLAYER.send()

            except ConnectionError as e:

                txObject[PROCESSED_INPUT] = str(e)

        return txObject
Пример #8
0
 async def moegirl_search(q):
     moegirlwiki = MediaWiki(url='http://zh.moegirl.org/api.php')
     t = moegirlwiki.search(q)
     if len(t) == 0:
         return False
     p = moegirlwiki.page(t[0])
     return p.summary
Пример #9
0
 def __init__(self):
     self.wikipedia = MediaWiki(lang=u'fr')
     self._latitude = None
     self._longitude = None
     self._response = None
     self._url = None
     self._summary = None
Пример #10
0
def wikipedia_summary(topic, lang='en'):
    wikipedia = MediaWiki(lang=lang)
    search = wikipedia.search(topic)
    summary = wikipedia.summary(search[0])
    text = '**{}**\n\n{}\n**Read more at:** [{}]({})'.format(
        page.title, summary, page.title, page.url)
    return text
Пример #11
0
class StoryTeller:
    """docstring"""
    def __init__(self):
        self.wikipedia = MediaWiki(lang=u'fr')
        self._latitude = None
        self._longitude = None
        self._response = None
        self._url = None
        self._summary = None

    def set_position(self, latitude, longitude):
        """docstring"""
        self._latitude = latitude
        self._longitude = longitude
        if self._latitude == None and self._longitude == None:
            self._response = []
        else:
            self._response = self.wikipedia.geosearch(
                latitude=self._latitude, longitude=self._longitude)

    def choice_title(self):
        """docstring"""
        return random.choice(self._response)

    def get_informations(self):
        """docstring"""
        if self._response == []:
            return [self._summary, self._url]
        else:
            page = self.wikipedia.page(self.choice_title())
            self._summary = page.summary
            self._url = page.url
            return [self._summary, self._url]
Пример #12
0
def get_prediction():
    wikipedia = MediaWiki()
    word = request.args.get('word')
    # Set stop words language
    stop_words = get_stop_words('en')
    stop_words = get_stop_words('english')

    # split query
    filtered_sentence = ""
    filtered_sentence = word.split()

    reponse = []

    for each in filtered_sentence:
        if each not in stop_words:
            reponse.append(each)

    string_query = ' '.join(reponse)

    serviceurl = 'https://maps.googleapis.com/maps/api/geocode/json?'

    address = string_query

    if len(address) < 1:
        return

    try:
        url = serviceurl + "key=" + app.config['KEY_API'] +\
              "&" + urllib.parse.urlencode({'address': address})

        uh = urllib.request.urlopen(url)
        data = uh.read().decode()
        js = json.loads(data)
    except:
        print('==== Failure URL ====')
        js = None

    if not js:
        if 'status' not in js:
            if js['status'] != 'OK':
                print('==== Failure To Retrieve ====')
                print(js)

    else:
        lat = js["results"][0]["geometry"]["location"]["lat"]
        lng = js["results"][0]["geometry"]["location"]["lng"]

    # sent coordinates to Media wiki
    query = wikipedia.geosearch(str(lat), str(lng))

    # Save first answer
    history = query[0]

    # sent answer to Media wiki
    summary = wikipedia.summary(history)

    # return summary to view html
    return jsonify({'html': summary})
Пример #13
0
def apiWikipedia(search, language):
    print(language, search)
    if(language == 'pt'):
        language = 'pt-br'
    wikipedia = MediaWiki(lang=language)
    if(len(wikipedia.search(search)) < 1):
        raise Exception('apiWikipedia: Content not found')
    page = wikipedia.page(search)
    return page.summary, page.url
def open_webpage(page_name):
    """
    open the desired content of any webpage from wikipeda
    """
    wiki = MediaWiki()
    page = wiki.page(page_name)
    content = page.content

    return content
def webpage_content(page_name):
    """
    open the desired summary of any webpage from wikipeda
    """
    wiki = MediaWiki()
    page = wiki.page(page_name)
    summary = page.summary

    return summary
Пример #16
0
def getTopMatchesUsingCorrelation(keyword, links, numMatches):
    #Calculate correlation
    #Download each link. For each link, find out how many times, current keyword occurs.

    #how many times does this keyword  in each of its links
    keywordOccurenenceMap = {}
    remainingLinkSet = set(links)
    wikipedia = MediaWiki()

    #First get all links from db/cache
    articlesInCache = WikiArticles.objects.filter(title__in=links)
    for articleInCache in articlesInCache:
        #How many times is this keyword in link's associated wikipedia page
        title = articleInCache.title
        html = articleInCache.text
        text = covertHtml2Text(html)
        #Note that we are using link here and title as first argument
        addToKeywordOccurenceMap(title, text, keyword, keywordOccurenenceMap)
        #Remove from set, so that at the end we know what keyword we should fetch from wikipedia
        remainingLinkSet.remove(articleInCache.title)

    newWikiArticles = []
    for link in remainingLinkSet:

        try:
            l.warning("analyzing " + link)
        except Exception as e:
            l.warning("1 rags")

        linkPage = None
        try:
            linkPage = wikipedia.page(link)
        except Exception as e:
            #TODO: Log-error
            continue

        if linkPage is None or linkPage == "":
            raise Exception(
                "Wikipedia page not found/or is empty for keyword " + link)
        title = linkPage.title
        html = linkPage.html
        text = covertHtml2Text(html)
        #Note that we are using link here and title as first argument
        addToKeywordOccurenceMap(link, text, keyword, keywordOccurenenceMap)
        #bulk update
        #newWikiArticle = WikiArticles(title=title,text=text)
        #newWikiArticles.append(newWikiArticle)
        try:
            WikiArticles.objects.create(title=title, text=text)
        except Exception as e:
            l.warning("Failed to save " + title)
            l.warning(str(e))
            #continue silently

    #WikiArticles.objects.bulk_create(newWikiArticles,batch_size=DJANGO_BULK_CREATE_BATCH_SIZE)
    return keywordOccurenenceMap
Пример #17
0
    def make_geosearch(self, lat, lon):
        wikipedia = MediaWiki()
        wikipedia_result = wikipedia.geosearch(lat, lon)
        try:
            opensearch_result = self.make_opensearch(wikipedia_result[0])
            return opensearch_result[0][1], wikipedia_result[0]
        except IndexError:
            return "this is a very nice place but i do not have any story about this place."

        return opensearch_result
 def lookup(self, word):
     wikipedia = MediaWiki()
     #wikipedia.set_api_url('https://en.wikpedia.org/w/api.php')
     summary = ''
     search_results = wikipedia.opensearch(word)
     if len(search_results) > 0:
         page_title = search_results[0][0]
         page = wikipedia.page(page_title)
         parts = page.summary.split('. ')
         summary = parts[0]
     return summary
Пример #19
0
def import_wiki(article_title):
    """
    This function fetch text from Wikipedia page based on the article title.
    This function returns the wikipedia article.

    article_title: The title of Wikipedia article (in string)
    """

    wikipedia = MediaWiki()
    article = wikipedia.page(article_title)
    # print(article.title)
    return article.content
Пример #20
0
def find_short_meaning(search):
    try:
        wikipedia = MediaWiki()
        meaning = wikipedia.page(search.title())
    except DisambiguationError:
        return find_alter_meaning(search)
    else:
        if search.lower() != meaning.title.lower():
            return find_alter_meaning(search)
        def_meaning = meaning.summarize()
        return str(def_meaning + "link for further read: " +
                   wikipedia.opensearch(f'{meaning.title}', results=1)[0][2])
Пример #21
0
def checkWiki(keyword):                     # Check if it exists a wikipedia page corresponding to the keyword
    try:
        wikipedia_mediawiki = MediaWiki()   # check if exists a page corresponding to the keyword
        wikiPage = wikipedia_mediawiki.page(keyword, auto_suggest=False)
        return [True, False]                # True = the page exists, False = auto-suggest OFF
    except:
        try:
            wikiPage = wikipedia_mediawiki.page(keyword)    # check with auto-suggest
            return [True, True]             # True = the page exists, True = auto-suggest ON
        except:
            print("I'm sorry the information you want are not available on wikipedia! Try with something else!")
            return [False, False]           # False = the page doesn't exist
Пример #22
0
    def return_answer(self):
        """this function returns a dictionnary
        containing {'result' : 2, 'commentary' : "sentence from bot",
        'latitude' : number,'longitude' : number,
        "adress" : "info", "summary" : "text", "link_wiki" : "url"}
        2 = result found, wiki found,
        1 = result found no wiki, 0 = not found.
        If 0 appears, there won't be latt, lng,
        neither summary"""

        # if result from parse is null
        if self.sentence == "Error":
            self.result['result'] = 0
            self.result['commentary'] = random.choice(GENERIC_NO_ANSWER)

        # if there is a result
        else:
            # creating googlemaps client
            gmaps = googlemaps.Client(key=os.environ.get("BACKEND_KEY", ""))
            returned_list = gmaps.geocode(self.sentence)

            # if result is empty, we're returning a message
            # and a number that will let ajax know
            if not returned_list:
                self.result['result'] = 0
                self.result['commentary'] = random.choice(GENERIC_NO_ANSWER)
            # answers = 0
            else:
                #creating local var that will display first googlemaps answer
                best_result = returned_list[0]

                compile_dic(best_result, self.result)

                wikipedia = MediaWiki(lang='fr')
                t = wikipedia.geosearch(latitude=self.result["latitude"], \
                    longitude=self.result["longitude"])
                # if wiki does not have stories regarding that place
                if not t:
                    self.result['result'] = 1
                    self.result['commentary'] = random.choice(
                        GENERIC_LOC_FOUND)

                # if wiki has full info
                else:
                    self.result['result'] = 2
                    self.result['commentary'] = random.choice(
                        GENERIC_LOC_FOUND)

                    p = wikipedia.page(t[0])
                    self.result["summary"] = p.summary[:250] + "..."
                    self.result["link_wiki"] = p.url
        return self.result
Пример #23
0
def get_wikipedia_article(s_word):
    try:
        wikipedia = MediaWiki(url=wikiurl)
        wp_words = wikipedia.search(s_word, results=1)
        wp_article = wikipedia.page(wp_words[0])
        return wp_article
    except DisambiguationError as e:
        wp_article = wikipedia.page(random.choice(e.options))
        return wp_article
    except Exception as e:
        app.logger.info('Exception')
        app.logger.info(e)
        return False
Пример #24
0
class WikiMedia:
    """Wikipedia class."""

    def __init__(self):
        self.wikipedia = MediaWiki()
        self.wikipedia.language = "fr"

    def get_infos(self, query):
        """Method allowing to retrieve informations from wikipedia.fr."""
        try:
            titles = self.wikipedia.search(query)
            if len(titles) > 0:
                infos = self.wikipedia.page(titles[0])
                summary = self.wikipedia.summary(titles[0], sentences=3)

                # Add regex  to remove == string == in summary:
                summary = re.sub(r"={2}\s.+={2}", r"", summary)
                status = True
                url = infos.url

            # Return empty results if no titles are return from the API
            else:
                summary = ""
                url = ""
                status = False

        # Use one except block in case of disambiguations errors.
        # Allow to search for the next title if the first one lead
        # to a disambiguation error.

        except mediawiki.exceptions.DisambiguationError:
            if len(titles) > 1:
                try:
                    infos = self.wikipedia.page(titles[1])
                    summary = self.wikipedia.summary(titles[1], sentences=3)
                    summary = re.sub(r"={2}\s.+={2}", r"", summary)
                    url = infos.url
                    status = True

                except mediawiki.exceptions.DisambiguationError:
                    summary = ""
                    url = ""
                    status = False
                    logging.exception("Exception occurred")
            else:
                summary = ""
                url = ""
                status = False
                logging.exception("Exception occurred")

        return {"summary": summary, "url": url, "status": status}
Пример #25
0
class WikimediaApi:
    """ Class that interact with wikimedia api """

    def __init__(self, coord, route):
        """ Function that instanciate a WikimediaApi object """

        self.lat = str(coord['lat']) if coord else ""
        self.lng = str(coord['lng']) if coord else ""
        self.route = route
        self.wikipedia = MediaWiki(lang=u'fr')

    def geosearch(self):
        """ Function that return a list of pages from wikipedia
         and coordinate """
        try:
            geores = self.wikipedia.geosearch(self.lat, self.lng, results=5)
        except:
            geores = []
        return geores

    def get_pagetitle(self):
        """ Function that return the title of a page that match the route """

        geores = self.geosearch()
        pagetitle = ""
        try:
            regex_route = r"" + self.route
            i = 0
            for i in range(len(geores)):
                if re.match(regex_route, geores[i]):
                    pagetitle = geores[i]
        except:
            pass
        if not pagetitle:
            pagetitle = geores[0] if geores else ""
        return pagetitle

    def get_about(self):
        """ Function that return a summary and the url of a wikipedia page """

        pagetitle = self.get_pagetitle()
        page = self.wikipedia.page(pagetitle) if pagetitle else ""
        about_url = page.url if page else ""
        try:
            regex = r'== Situation et accès ==\n.*'
            section = re.search(regex, page.content).group(0)
            regex_sub = r'== Situation et accès =='
            about_text = (re.sub(regex_sub, "", section)).strip()
        except:
            about_text = page.summary if page else ""
        return {"about_text": about_text, 'about_url': about_url}
Пример #26
0
def wiki_search(query: str, lang='ru', unquote_percent_encoded=False) -> str:
    # Default using wikipedia
    from mediawiki import MediaWiki
    wikipedia = MediaWiki(lang=lang)
    result = wikipedia.opensearch(query, results=1)
    if not result:
        return ''

    _, text, url = result[0]

    if unquote_percent_encoded:
        from urllib.parse import unquote
        url = unquote(url)

    return '{} ({})'.format(text, url)
Пример #27
0
    def __init__(self,
                 logger=DEFAULT_LOGGER,
                 separate: bool = True,
                 n: int = 3,
                 **kwargs):

        self.profiler = kwargs.get('profiler', DEFAULT_MEASURER)
        self.logger = logger

        self.tagger = SequenceTagger.load('ner-fast')
        self.wikipedia = MediaWiki()
        self.separate = separate
        self.n = n

        self.logger.info("Candidate selector is loaded and ready to use.")
 def __init__(self):
     super().__init__()
     from os import path
     from json import loads
     self.plugin_name = path.basename(__file__).rsplit('.')[0]
     self.metadata = PluginUtilityService.process_metadata(f'plugins/extensions/{self.plugin_name}')
     self.plugin_cmds = loads(self.metadata.get(C_PLUGIN_INFO, P_PLUGIN_CMDS))
     self.osrs_wiki_url = self.metadata[C_PLUGIN_SET][P_WIKI_URL]
     self.osrs_user_agent = self.metadata[C_PLUGIN_SET][P_USER_AGENT]
     rprint(
         f"{self.metadata[C_PLUGIN_INFO][P_PLUGIN_NAME]} v{self.metadata[C_PLUGIN_INFO][P_PLUGIN_VERS]} Plugin Initialized.")
     try:
         self.osrs_wiki = MediaWiki(url=self.osrs_wiki_url, user_agent=self.osrs_user_agent)
     except Exception:
         rprint(f"{self.plugin_name} Plugin could not be initialized.")
Пример #29
0
 def __init__(self, title):
     self.title = title
     self.mediawiki = get_page(title)
     self.text = MediaWiki().page(title).content
     self.sections = [
         Section.create(mediawiki, text) for mediawiki, text in zip(
             split_by_section(self.mediawiki), split_by_section(self.text))
     ]
     self.sections[0].head = self.title
Пример #30
0
def getAnchorTags(list_of_names):
    wikipedia = MediaWiki()
    output = []
    for x in list_of_names:
        per = []
        try:
            curr = wikipedia.page(x)
            soup = BeautifulSoup(request.urlopen(curr.url).read(),"html.parser")
            soup = (soup.find('p'))
            temp = [tag['href'] for tag in soup.select('a[href]')]
            for g in temp:
                if 'wiki' in g and not 'ogg' in g:
                    k = g[6:]
                    per.append(k)
        except DisambiguationError as e:
            per = []
        output += per
    return output