def main(): mongoDelete = Mongo() mongoDelete.DeleteAllDB() mongoDelete.CloseConnection() """api_key = 'cf837dbe80ba4179beaa9ee8bcdfa08e' newsapi = NewsApiClient(api_key= api_key) countries = listOfCountries('newsAPI')""" er = EventRegistry(apiKey=ERKey, allowUseOfArchive=False) countries = OneListOfCountry('Countries.txt') translate = sys.argv[1] """for count in countries: current_country = str(count) current_language = countries[count] mdb = Mongo() top_5 = newsapi.get_top_headlines(page_size=5, page=1, country=current_country)['articles'] for headline in top_5: headline_obj = newsArticle(headline, current_country) summary(headline_obj, mdb, abr, current_language, str(translate)) mdb.closeConnection()""" for country in countries: currentCountry = str(country) mdb = Mongo() countryNews = er.getLocationUri(currentCountry) top10 = QueryArticlesIter(sourceLocationUri=[countryNews], isDuplicateFilter=False, dataType='news') for headline in top10.execQuery(er, sortBy="date", sortByAsc=False, maxItems=3): # print(article['lang'], article['url'], article['title'], article['body']) headlineObj = NewsArticle(headline, currentCountry) summary(headlineObj, mdb, headlineObj.GetLanguage(), translate) mdb.CloseConnection()
def __init__(self, api_key=''): self.news_api = EventRegistry(apiKey=api_key, repeatFailedRequestCount=1) self.lang_code = { 'en': 'eng', 'de': 'deu', 'fr': 'fra', 'it': 'ita', 'es': 'spa', 'pl': 'pol', 'ro': 'ron', 'nl': 'nld', 'hu': 'hun', 'pt': 'por' } self._no_tokens = False
def noticias(theme, dates): er = EventRegistry("c4b6b663-d180-4f4c-8163-4c45fbc7cbd7") q = QueryArticlesIter(conceptUri=theme, dateStart=dates[0], dateEnd=dates[1]) for art in q.execQuery(er, sortBy="date"): print(art)
def __init__(self, api_key=open(f'{ROOT_PATH}/ER_API_KEY').readline().rstrip()): self.news_api = EventRegistry(apiKey=api_key, repeatFailedRequestCount=1) self.lang_code = { 'en': 'eng', 'de': 'deu', 'fr': 'fra', 'it': 'ita', 'es': 'spa', 'pl': 'pol', 'ro': 'ron', 'nl': 'nld', 'hu': 'hun', 'pt': 'por' } self._no_tokens = False
class NewsArticlesApi(): def __init__(self, api_key=open(f'{ROOT_PATH}/ER_API_KEY').readline().rstrip()): self.news_api = EventRegistry(apiKey=api_key, repeatFailedRequestCount=1) self.lang_code = { 'en': 'eng', 'de': 'deu', 'fr': 'fra', 'it': 'ita', 'es': 'spa', 'pl': 'pol', 'ro': 'ron', 'nl': 'nld', 'hu': 'hun', 'pt': 'por' } self._no_tokens = False def get_news_articles(self, keyword, lang='en', sort_by='date', max_items=10): keyword_query = QueryArticlesIter( keywords=keyword, keywordsLoc='body', locationUri=self.news_api.getLocationUri(keyword), lang=self.lang_code[lang], dataType='news') # if no tokens available returnn no results if self._no_tokens: return [] # in case of any exception return no news articles try: keyword_articles = [] for article in keyword_query.execQuery(self.news_api, sortBy=sort_by, maxItems=max_items): keyword_articles.append({ 'title': article['title'], 'date': article['date'], 'source': article['source']['uri'], 'url': article['url'], 'body': article['body'] }) return keyword_articles except: self._no_tokens = True return [] def reset(self): self._no_tokens = False
def handle(self, *args, **options): er = EventRegistry(apiKey=apikey) q = QueryArticlesIter( keywords='Andrew Yang', keywordsLoc='body, title', ) for article in q.execQuery(er, sortBy='date', maxItems=100): try: # If the article does not exist, we create a new object in the # database. existing_article, new_article \ = NewsArticle.objects.get_or_create( title=article['title'], url=article['url'], text=article['body'], website=safeget(article, 'source', 'uri'), publish_date=article.get('dateTime'), ) # If the author of the article does not exist, we create a # new object in the database and associate it with the # related article. for author in article['authors']: existing_author, new_author = Author.objects.get_or_create( author_name=author.get('name')) existing_email, new_email \ = AuthorEmail.objects.get_or_create( author_email=author.get('uri'), author=existing_author ) if new_article: existing_article.authors.add(existing_author) existing_article.save() print(existing_author, ' added to ', existing_article) if existing_article: print('exists') except IntegrityError as e: print('Integrity Error:', e)
""" Download articles from eventregistry.org using an API """ from eventregistry import QueryArticlesIter, EventRegistry LANGUAGES = ["deu", "fra", "eng"] NUM_ARTICLES = 20 APIKEY = None er = EventRegistry(apiKey=APIKEY) for lang in LANGUAGES: print("Downloading articles for {}".format(lang)) query = QueryArticlesIter(lang=lang) articles = query.execQuery(er, sortBy="date", maxItems=NUM_ARTICLES) with open("files/metadata_{}.tsv".format(lang), 'w') as file: for i, article in enumerate(articles): with open("files/{}/article_{}.txt".format(lang, i), "w") as f: f.write(article["title"] + "\n" + article["body"]) file.write("{}\n".format(article["title"]))
import json import datetime from eventregistry import EventRegistry, QueryArticles, QueryItems, ComplexArticleQuery from countries import Countries, CountryAliases, get_country_alias, get_country_language # Init _api = EventRegistry(apiKey=json.load(open("res/keys.json", 'r'))["news"]) _location_uris = {c: _api.getLocationUri(get_country_alias(c, CountryAliases.NAME)) for c in Countries} def _get_month_ago(): """Gets the date a month ago in the format accepted by EvnetRegistry returns in form YYYY-MM-DD """ d = datetime.date.today() ddelta = datetime.timedelta(days=31) return str(d - ddelta) def get_relevant_headlines(home_country: Countries, target_country: Countries): """Pulls article headlines and creation dates from the EventRegistry API :returns [ (<Article title>, <Article date create>), (<Article title>, <Article date create>), (<Article title>, <Article date create>), ... ]