示例#1
0
 def __init__(self,
              page = 1,
              count = 100,
              lang = mainLangs,
              sortBy = "cosSim", sortByAsc = False,
              returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(bodyLen = -1))):
     """
     return articles about the event
     @param page: page of the articles to return (1, 2, ...)
     @param count: number of articles to return per page (at most 100)
     @param lang: a single lanugage or a list of languages in which to return the articles
     @param sortBy: order in which event articles are sorted. Options: id (internal id), date (published date), cosSim (closeness to event centroid), sourceImportanceRank (importance of the news source, custom set), sourceAlexaGlobalRank (global rank of the news source), sourceAlexaCountryRank (country rank of the news source), socialScore (total shares in social media)
     @param sortByAsc: should the articles be sorted in ascending order (True) or descending (False) based on sortBy value
     @param returnInfo: what details should be included in the returned information
     """
     assert page >= 1, "page has to be >= 1"
     assert count <= 100, "at most 100 articles can be returned per call"
     self.resultType = "articles"
     self.articlesPage = page
     self.articlesCount = count
     self.articlesLang = lang
     self.articlesSortBy = sortBy
     self.articlesSortByAsc = sortByAsc
     self.__dict__.update(returnInfo.getParams("articles"))
示例#2
0
 def suggestLocations(self,
                      prefix,
                      sources=["place", "country"],
                      lang="eng",
                      count=20,
                      countryUri=None,
                      sortByDistanceTo=None,
                      returnInfo=ReturnInfo()):
     """
     return a list of geo locations (cities or countries) that contain the prefix
     @param prefix: input text that should be contained in the location name
     @param source: what types of locations are we interested in. Possible options are "place" and "country"
     @param lang: language in which the prefix is specified
     @param count: number of returned suggestions
     @param countryUri: if provided, then return only those locations that are inside the specified country
     @param sortByDistanceTo: if provided, then return the locations sorted by the distance to the (lat, long) provided in the tuple
     @param returnInfo: what details about locations should be included in the returned information
     """
     params = {
         "prefix": prefix,
         "count": count,
         "source": sources,
         "lang": lang,
         "countryUri": countryUri or ""
     }
     params.update(returnInfo.getParams())
     if sortByDistanceTo:
         assert isinstance(
             sortByDistanceTo, (tuple, list)
         ), "sortByDistanceTo has to contain a tuple with latitude and longitude of the location"
         assert len(
             sortByDistanceTo
         ) == 2, "The sortByDistanceTo should contain two float numbers"
         params["closeToLat"] = sortByDistanceTo[0]
         params["closeToLon"] = sortByDistanceTo[1]
     return self.jsonRequest("/json/suggestLocations", params)
示例#3
0
    def __init__(self,
                 maxArticleCount=60,
                 updatesAfterTm=None,
                 lang=None,
                 mandatorySourceLocation=False,
                 returnInfo=ReturnInfo()):
        """
        get the list of articles that were recently added to the Event Registry and match the selected criteria
        @param maxArticleCount: max articles to return (at most 500)
        @param updatesAfterTm: the time after which the articles were added (returned by previous call to the same method)
        @param lang: return only articles in the specified languages (None if no limits). accepts string or a list of strings
        @param mandatorySourceLocation: return only articles for which we know the source's geographic location
        @param returnInfo: what details should be included in the returned information
        """
        assert maxArticleCount <= 100
        self.resultType = "recentActivity"
        self.recentActivityArticlesMaxArticleCount = maxArticleCount
        if updatesAfterTm != None:
            self.recentActivityArticlesUpdatesAfterTm = updatesAfterTm
        if lang != None:
            self.recentActivityArticlesLang = lang

        self.recentActivityArticlesMandatorySourceLocation = mandatorySourceLocation
        self.__dict__.update(returnInfo.getParams("recentActivityArticles"))
示例#4
0
 def __init__(
     self,
     count=20,  # number of similar stories to return
     source="concept",  # how to compute similarity. Options: concept, cca
     lang=["eng"],  # in which language should be the similar stories
     maxDayDiff=sys.
     maxsize,  # what is the maximum time difference between the similar stories and this one
     returnInfo=ReturnInfo()):
     """
     return a list of similar stories (clusters)
     @param count: number of similar stories to return (at most 200)
     @param source: show is the similarity with other stories computed. Using concepts ('concepts') or CCA ('cca').
     @param lang: in what language(s) should be the returned stories
     @param maxDayDiff: maximum difference in days between the returned stories and the tested event
     @param returnInfo: what details should be included in the returned information
     """
     assert count <= 200
     self.resultType = "similarStories"
     self.similarStoriesCount = count
     self.similarStoriesSource = source
     self.similarStoriesLang = lang
     if maxDayDiff != sys.maxsize:
         self.similarStoriesMaxDayDiff = maxDayDiff
     self.__dict__.update(returnInfo.getParams("similarStories"))
 def __init__(self, returnInfo=ReturnInfo()):
     self.resultType = "info"
     self.__dict__.update(returnInfo.getParams("info"))
 def __init__(self, returnInfo=ReturnInfo()):
     """
     return details about an event
     """
     self.resultType = "info"
     self.__dict__.update(returnInfo.getParams("info"))
    def getTopConceptCorrelations(self,
            candidateConceptsQuery = None,
            candidatesPerType = 1000,
            conceptType = None,
            exactCount = 10,
            approxCount = 0,
            returnInfo = ReturnInfo()):
        """
        compute concepts that correlate the most with the input data. If candidateConceptsQuery is provided we first identify the
        concepts that are potentially returned as top correlations. Candidates are obtained by making the query and analyzing the
        concepts that appear in the resulting articles. The top concepts are used as candidates among which we return the top correlations.
        If conceptType is provided then only concepts of the specified type can be provided as the result.

        @param candidateConceptsQuery: optional. An instance of QueryArticles that can be used to limit the space of concept candidates
        @param candidatesPerType: If candidateConceptsQuery is provided, then this number of concepts for each valid type will be return as candidates
        @param conceptType: optional. A string or an array containing the concept types that are valid candidates on which to compute top correlations
            valid values are "person", "org", "loc" and/or "wiki"
        @param exactCount: the number of returned concepts for which the exact value of the correlation is computed
        @param approxCount: the number of returned concepts for which only an approximate value of the correlation is computed
        @param returnInfo: specifies the details about the concepts that should be returned in the output result
        """

        self._clearVal("contextConceptIds")

        # generate all necessary parameters (but don't update the params of the self)
        params = QueryParamsBase.copy(self)

        # compute the candidates
        if candidateConceptsQuery != None:
            assert isinstance(candidateConceptsQuery, QueryArticles), "'candidateConceptsQuery' is expected to be of type QueryArticles"
            candidateConceptsQuery.setRequestedResult(RequestArticlesConceptAggr())
            candidateConceptsQuery._setVal("conceptAggrConceptCountPerType", candidatesPerType)
            candidateConceptsQuery._setVal("conceptAggrConceptIdOnly", True)
            ret = self._er.execQuery(candidateConceptsQuery)
            if ret and "conceptAggr" in ret:
                params._setVal("contextConceptIds", ",".join([str(x) for x in ret["conceptAggr"]]))
            else:
                print("Warning: Failed to compute a candidate set of concepts")

        if conceptType:
            params._setVal("conceptType", conceptType)
        params._setVal("exactCount", exactCount)
        params._setVal("approxCount", approxCount)
        params._setVal("sourceType", "news-concept")

        #
        # compute the correlations
        ret = self._er.jsonRequest(self._getPath(), params.queryParams)

        #
        # extend the return information with the details about the concepts (label, ...)
        if returnInfo != None:
            conceptIds = []
            if ret and ret["news-concept"]["exactCorrelations"]:
                conceptIds += [info["id"] for info in ret["news-concept"]["exactCorrelations"]]
            if ret and ret["news-concept"]["approximateCorrelations"]:
                conceptIds += [info["id"] for info in ret["news-concept"]["approximateCorrelations"]]
            conceptInfos = {}
            for i in range(0, len(conceptIds), 500):
                ids = conceptIds[i:i+500]
                q = GetConceptInfo(returnInfo = returnInfo)
                q.queryById(ids)
                info = self._er.execQuery(q)
                conceptInfos.update(info)
            if ret and ret["news-concept"]["exactCorrelations"]:
                for item in ret["news-concept"]["exactCorrelations"]:
                    item["conceptInfo"] = conceptInfos.get(str(item["id"]), {})
            if ret and ret["news-concept"]["approximateCorrelations"]:
                for item in ret["news-concept"]["approximateCorrelations"]:
                    item["conceptInfo"] = conceptInfos.get(str(item["id"]), {})

        # return result
        return ret
 def suggestConceptClasses(self, prefix, lang = "eng", conceptLang = "eng", source = ["dbpedia", "custom"], page = 1, count = 20, returnInfo = ReturnInfo(), **kwargs):
     """
     return a list of concept classes that match the given prefix
     @param prefix: input text that should be contained in the category name
     @param lang: language in which the prefix is specified
     @param conceptLang: languages in which the label(s) for the concepts are to be returned
     @param source: what types of concepts classes should be returned. valid values are 'dbpedia' or 'custom'
     @param page:  page of the results (1, 2, ...)
     @param count: number of returned suggestions
     @param returnInfo: what details about categories should be included in the returned information
     """
     assert page > 0, "page parameter should be above 0"
     params = { "prefix": prefix, "lang": lang, "conceptLang": conceptLang, "source": source, "page": page, "count": count }
     params.update(returnInfo.getParams())
     params.update(kwargs)
     return self.jsonRequest("/api/v1/suggestConceptClasses", params)
 def suggestLocationsAtCoordinate(self, latitude, longitude, radiusKm, limitToCities = False, lang = "eng", count = 20, ignoreNonWiki = True, returnInfo = ReturnInfo(), **kwargs):
     """
     return a list of geo locations (cities or places) that are close to the provided (lat, long) values
     @param latitude: latitude part of the coordinate
     @param longitude: longitude part of the coordinate
     @param radiusKm: radius in kilometres around the coordinates inside which the locations should be returned
     @param limitToCities: limit the set of results only to cities (True) or also to general places (False)
     @param lang: language in which the location label should be returned
     @param count: number of returned suggestions
     @param ignoreNonWiki: ignore locations that don't have a wiki page and can not be used for concept search
     @param returnInfo: what details about locations should be included in the returned information
     """
     assert isinstance(latitude, (int, float)), "The 'latitude' should be a number"
     assert isinstance(longitude, (int, float)), "The 'longitude' should be a number"
     params = { "action": "getLocationsAtCoordinate", "lat": latitude, "lon": longitude, "radius": radiusKm, "limitToCities": limitToCities, "count": count, "lang": lang }
     params.update(returnInfo.getParams())
     params.update(kwargs)
     return self.jsonRequest("/api/v1/suggestLocationsFast", params)
 def suggestConcepts(self, prefix, sources = ["concepts"], lang = "eng", conceptLang = "eng", page = 1, count = 20, returnInfo = ReturnInfo(), **kwargs):
     """
     return a list of concepts that contain the given prefix. returned matching concepts are sorted based on their frequency of occurence in news (from most to least frequent)
     @param prefix: input text that should be contained in the concept
     @param sources: what types of concepts should be returned. valid values are person, loc, org, wiki, entities (== person + loc + org), concepts (== entities + wiki), conceptClass, conceptFolder
     @param lang: language in which the prefix is specified
     @param conceptLang: languages in which the label(s) for the concepts are to be returned
     @param page:  page of the results (1, 2, ...)
     @param count: number of returned suggestions per page
     @param returnInfo: what details about concepts should be included in the returned information
     """
     assert page > 0, "page parameter should be above 0"
     params = { "prefix": prefix, "source": sources, "lang": lang, "conceptLang": conceptLang, "page": page, "count": count}
     params.update(returnInfo.getParams())
     params.update(kwargs)
     return self.jsonRequest("/api/v1/suggestConceptsFast", params)
示例#11
0
    def __init__(
            self,
            page=1,
            count=100,
            lang=None,
            keywords=None,
            conceptUri=None,
            categoryUri=None,
            sourceUri=None,
            sourceLocationUri=None,
            sourceGroupUri=None,
            authorUri=None,
            locationUri=None,
            dateStart=None,
            dateEnd=None,
            dateMentionStart=None,
            dateMentionEnd=None,
            keywordsLoc="body",
            startSourceRankPercentile=0,
            endSourceRankPercentile=100,
            sortBy="cosSim",
            sortByAsc=False,
            returnInfo=ReturnInfo(articleInfo=ArticleInfoFlags(bodyLen=-1)),
            **kwds):
        """
        return articles about the event
        @param page: page of the articles to return (1, 2, ...)
        @param count: number of articles to return per page (at most 100)

        @param keywords: limit the event articles to those that mention the specified keywords.
            A single keyword/phrase can be provided as a string, multiple keywords/phrases can be provided as a list of strings.
            Use QueryItems.AND() if *all* provided keywords/phrases should be mentioned, or QueryItems.OR() if *any* of the keywords/phrases should be mentioned.
            or QueryItems.OR() to specify a list of keywords where any of the keywords have to appear
        @param conceptUri: limit the event articles to those where the concept with concept uri is mentioned.
            A single concept uri can be provided as a string, multiple concept uris can be provided as a list of strings.
            Use QueryItems.AND() if *all* provided concepts should be mentioned, or QueryItems.OR() if *any* of the concepts should be mentioned.
            To obtain a concept uri using a concept label use EventRegistry.getConceptUri().
        @param categoryUri: limit the event articles to those that are assigned into a particular category.
            A single category can be provided as a string, while multiple categories can be provided as a list in QueryItems.AND() or QueryItems.OR().
            A category uri can be obtained from a category name using EventRegistry.getCategoryUri().
        @param sourceUri: limit the event articles to those that were written by a news source sourceUri.
            If multiple sources should be considered use QueryItems.OR() to provide the list of sources.
            Source uri for a given news source name can be obtained using EventRegistry.getNewsSourceUri().
        @param sourceLocationUri: limit the event articles to those that were written by news sources located in the given geographic location.
            If multiple source locations are provided, then put them into a list inside QueryItems.OR()
            Location uri can either be a city or a country. Location uri for a given name can be obtained using EventRegistry.getLocationUri().
        @param sourceGroupUri: limit the event articles to those that were written by news sources that are assigned to the specified source group.
            If multiple source groups are provided, then put them into a list inside QueryItems.OR()
            Source group uri for a given name can be obtained using EventRegistry.getSourceGroupUri().
        @param authorUri: find articles that were written by a specific author.
            If multiple authors should be considered use QueryItems.OR() to provide the list of authors.
            Author uri for a given author name can be obtained using EventRegistry.getAuthorUri().
        @param locationUri: find articles that describe something that occured at a particular location.
            If value can be a string or a list of strings provided in QueryItems.OR().
            Location uri can either be a city or a country. Location uri for a given name can be obtained using EventRegistry.getLocationUri().
        @param lang: find articles that are written in the specified language.
            If more than one language is specified, resulting articles has to be written in *any* of the languages.
        @param dateStart: find articles that were written on or after dateStart. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime.
        @param dateEnd: find articles that occured before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime.

        @param dateMentionStart: limit the event articles to those that explicitly mention a date that is equal or greater than dateMentionStart.
        @param dateMentionEnd: limit the event articles to those that explicitly mention a date that is lower or equal to dateMentionEnd.
        @param keywordsLoc: where should we look when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title"

        @param startSourceRankPercentile: starting percentile of the sources to consider in the results (default: 0). Value should be in range 0-100 and divisible by 10.
        @param endSourceRankPercentile: ending percentile of the sources to consider in the results (default: 100). Value should be in range 0-100 and divisible by 10.

        @param sortBy: order in which event articles are sorted. Options: id (internal id), date (published date), cosSim (closeness to event centroid), sourceImportanceRank (importance of the news source, custom set), sourceAlexaGlobalRank (global rank of the news source), sourceAlexaCountryRank (country rank of the news source), socialScore (total shares in social media)
        @param sortByAsc: should the articles be sorted in ascending order (True) or descending (False) based on sortBy value
        @param returnInfo: what details should be included in the returned information
        """
        RequestEvent.__init__(self)
        QueryParamsBase.__init__(self)
        assert page >= 1, "page has to be >= 1"
        assert count <= 100, "at most 100 articles can be returned per call"
        self.resultType = "articles"
        self.articlesPage = page
        self.articlesCount = count

        self._setQueryArrVal(keywords, "keyword", "keywordOper", "and")
        self._setQueryArrVal(conceptUri, "conceptUri", "conceptOper", "and")
        self._setQueryArrVal(categoryUri, "categoryUri", "categoryOper", "or")
        self._setQueryArrVal(sourceUri, "sourceUri", "sourceOper", "or")
        self._setQueryArrVal(sourceLocationUri, "sourceLocationUri", None,
                             "or")
        self._setQueryArrVal(sourceGroupUri, "sourceGroupUri",
                             "sourceGroupOper", "or")
        self._setQueryArrVal(authorUri, "authorUri", "authorOper", "or")
        self._setQueryArrVal(
            locationUri, "locationUri", None,
            "or")  # location such as "http://en.wikipedia.org/wiki/Ljubljana"

        self._setQueryArrVal(
            lang, "lang", None,
            "or")  # a single lang or list (possible: eng, deu, spa, zho, slv)

        # starting date of the published articles (e.g. 2014-05-02)
        if dateStart != None:
            self._setDateVal("dateStart", dateStart)
        # ending date of the published articles (e.g. 2014-05-02)
        if dateEnd != None:
            self._setDateVal("dateEnd", dateEnd)

        # first valid mentioned date detected in articles (e.g. 2014-05-02)
        if dateMentionStart != None:
            self._setDateVal("dateMentionStart", dateMentionStart)
        # last valid mentioned date detected in articles (e.g. 2014-05-02)
        if dateMentionEnd != None:
            self._setDateVal("dateMentionEnd", dateMentionEnd)

        self._setValIfNotDefault("keywordLoc", keywordsLoc, "body")

        assert startSourceRankPercentile >= 0 and startSourceRankPercentile % 10 == 0 and startSourceRankPercentile <= 100
        assert endSourceRankPercentile >= 0 and endSourceRankPercentile % 10 == 0 and endSourceRankPercentile <= 100
        assert startSourceRankPercentile < endSourceRankPercentile
        if startSourceRankPercentile != 0:
            self._setVal("startSourceRankPercentile",
                         startSourceRankPercentile)
        if endSourceRankPercentile != 100:
            self._setVal("endSourceRankPercentile", endSourceRankPercentile)

        self.articlesSortBy = sortBy
        self.articlesSortByAsc = sortByAsc
        # the filtering params are stored in queryParams. update the params on the self and delete the queryParams object
        self.__dict__.update(self.queryParams)
        self.__dict__.update(returnInfo.getParams("articles"))
        del self.queryParams
 def suggestCustomConcepts(self, prefix, lang = "eng", conceptLang = "eng", page = 1, count = 20, returnInfo = ReturnInfo()):
     """
     return a list of custom concepts that contain the given prefix. Custom concepts are the things (indicators, stock prices, ...) for which we import daily trending values that can be obtained using GetCounts class
     @param prefix: input text that should be contained in the concept name
     @param lang: language in which the prefix is specified
     @param conceptLang: languages in which the label(s) for the concepts are to be returned
     @param page:  page of the results (1, 2, ...)
     @param count: number of returned suggestions
     @param returnInfo: what details about categories should be included in the returned information
     """
     assert page > 0, "page parameter should be above 0"
     params = { "prefix": prefix, "lang": lang, "conceptLang": conceptLang, "page": page, "count": count }
     params.update(returnInfo.getParams())
     return self.jsonRequest("/json/suggestCustomConcepts", params)