示例#1
0
def main():
    mongoDelete = Mongo()
    mongoDelete.DeleteAllDB()
    mongoDelete.CloseConnection()
    """api_key = 'cf837dbe80ba4179beaa9ee8bcdfa08e'
    newsapi = NewsApiClient(api_key= api_key)
    countries = listOfCountries('newsAPI')"""
    er = EventRegistry(apiKey=ERKey, allowUseOfArchive=False)
    countries = OneListOfCountry('Countries.txt')
    translate = sys.argv[1]
    """for count in countries:
        current_country = str(count)
        current_language = countries[count]
        mdb = Mongo()
        top_5 = newsapi.get_top_headlines(page_size=5, page=1, country=current_country)['articles']
        for headline in top_5:
            headline_obj = newsArticle(headline, current_country)
            summary(headline_obj, mdb, abr, current_language, str(translate))
        mdb.closeConnection()"""
    for country in countries:
        currentCountry = str(country)
        mdb = Mongo()
        countryNews = er.getLocationUri(currentCountry)
        top10 = QueryArticlesIter(sourceLocationUri=[countryNews],
                                  isDuplicateFilter=False,
                                  dataType='news')
        for headline in top10.execQuery(er,
                                        sortBy="date",
                                        sortByAsc=False,
                                        maxItems=3):
            # print(article['lang'], article['url'], article['title'], article['body'])
            headlineObj = NewsArticle(headline, currentCountry)
            summary(headlineObj, mdb, headlineObj.GetLanguage(), translate)
        mdb.CloseConnection()
示例#2
0
 def __init__(self, api_key=''):
     self.news_api = EventRegistry(apiKey=api_key,
                                   repeatFailedRequestCount=1)
     self.lang_code = {
         'en': 'eng',
         'de': 'deu',
         'fr': 'fra',
         'it': 'ita',
         'es': 'spa',
         'pl': 'pol',
         'ro': 'ron',
         'nl': 'nld',
         'hu': 'hun',
         'pt': 'por'
     }
     self._no_tokens = False
示例#3
0
def noticias(theme, dates):
    er = EventRegistry("c4b6b663-d180-4f4c-8163-4c45fbc7cbd7")
    q = QueryArticlesIter(conceptUri=theme,
                          dateStart=dates[0],
                          dateEnd=dates[1])
    for art in q.execQuery(er, sortBy="date"):
        print(art)
示例#4
0
 def __init__(self,
              api_key=open(f'{ROOT_PATH}/ER_API_KEY').readline().rstrip()):
     self.news_api = EventRegistry(apiKey=api_key,
                                   repeatFailedRequestCount=1)
     self.lang_code = {
         'en': 'eng',
         'de': 'deu',
         'fr': 'fra',
         'it': 'ita',
         'es': 'spa',
         'pl': 'pol',
         'ro': 'ron',
         'nl': 'nld',
         'hu': 'hun',
         'pt': 'por'
     }
     self._no_tokens = False
示例#5
0
class NewsArticlesApi():
    def __init__(self,
                 api_key=open(f'{ROOT_PATH}/ER_API_KEY').readline().rstrip()):
        self.news_api = EventRegistry(apiKey=api_key,
                                      repeatFailedRequestCount=1)
        self.lang_code = {
            'en': 'eng',
            'de': 'deu',
            'fr': 'fra',
            'it': 'ita',
            'es': 'spa',
            'pl': 'pol',
            'ro': 'ron',
            'nl': 'nld',
            'hu': 'hun',
            'pt': 'por'
        }
        self._no_tokens = False

    def get_news_articles(self,
                          keyword,
                          lang='en',
                          sort_by='date',
                          max_items=10):

        keyword_query = QueryArticlesIter(
            keywords=keyword,
            keywordsLoc='body',
            locationUri=self.news_api.getLocationUri(keyword),
            lang=self.lang_code[lang],
            dataType='news')

        # if no tokens available returnn no results
        if self._no_tokens:
            return []

        # in case of any exception return no news articles
        try:
            keyword_articles = []
            for article in keyword_query.execQuery(self.news_api,
                                                   sortBy=sort_by,
                                                   maxItems=max_items):
                keyword_articles.append({
                    'title': article['title'],
                    'date': article['date'],
                    'source': article['source']['uri'],
                    'url': article['url'],
                    'body': article['body']
                })
            return keyword_articles
        except:
            self._no_tokens = True
            return []

    def reset(self):
        self._no_tokens = False
示例#6
0
    def handle(self, *args, **options):
        er = EventRegistry(apiKey=apikey)
        q = QueryArticlesIter(
            keywords='Andrew Yang',
            keywordsLoc='body, title',
        )

        for article in q.execQuery(er, sortBy='date', maxItems=100):
            try:
                # If the article does not exist, we create a new object in the
                # database.
                existing_article, new_article \
                    = NewsArticle.objects.get_or_create(
                        title=article['title'],
                        url=article['url'],
                        text=article['body'],
                        website=safeget(article, 'source', 'uri'),
                        publish_date=article.get('dateTime'),
                    )
                # If the author of the article does not exist, we create a
                # new object in the database and associate it with the
                # related article.
                for author in article['authors']:
                    existing_author, new_author = Author.objects.get_or_create(
                        author_name=author.get('name'))
                    existing_email, new_email \
                        = AuthorEmail.objects.get_or_create(
                            author_email=author.get('uri'),
                            author=existing_author
                        )
                    if new_article:
                        existing_article.authors.add(existing_author)
                        existing_article.save()
                        print(existing_author, ' added to ', existing_article)
                    if existing_article:
                        print('exists')
            except IntegrityError as e:
                print('Integrity Error:', e)
示例#7
0
"""
Download articles from eventregistry.org using an API
"""

from eventregistry import QueryArticlesIter, EventRegistry

LANGUAGES = ["deu", "fra", "eng"]
NUM_ARTICLES = 20
APIKEY = None
er = EventRegistry(apiKey=APIKEY)

for lang in LANGUAGES:
    print("Downloading articles for {}".format(lang))
    query = QueryArticlesIter(lang=lang)
    articles = query.execQuery(er, sortBy="date", maxItems=NUM_ARTICLES)

    with open("files/metadata_{}.tsv".format(lang), 'w') as file:
        for i, article in enumerate(articles):
            with open("files/{}/article_{}.txt".format(lang, i), "w") as f:
                f.write(article["title"] + "\n" + article["body"])
                file.write("{}\n".format(article["title"]))
示例#8
0
import json
import datetime

from eventregistry import EventRegistry, QueryArticles, QueryItems, ComplexArticleQuery

from countries import Countries, CountryAliases, get_country_alias, get_country_language

# Init
_api = EventRegistry(apiKey=json.load(open("res/keys.json", 'r'))["news"])
_location_uris = {c: _api.getLocationUri(get_country_alias(c, CountryAliases.NAME)) for c in Countries}


def _get_month_ago():
    """Gets the date a month ago in the format accepted by EvnetRegistry

    returns in form YYYY-MM-DD
    """
    d = datetime.date.today()
    ddelta = datetime.timedelta(days=31)
    return str(d - ddelta)


def get_relevant_headlines(home_country: Countries, target_country: Countries):
    """Pulls article headlines and creation dates from the EventRegistry API

    :returns [
        (<Article title>, <Article date create>),
        (<Article title>, <Article date create>),
        (<Article title>, <Article date create>),
        ...
        ]