示例#1
0
 def get_sources(self,
                 fields=["image", "title", "image", "link"],
                 limit=20,
                 **kargs):
     newsapi = NewsApiClient(api_key=self.__key)
     sources = newsapi.get_sources(**kargs)
     return self.__transformDate(sources.get("sources"), fields, limit)
示例#2
0
def getresource(searchfor):
    dicc = {}
    newsapi = NewsApiClient(api_key="d11761b89fdb4599b1497bf951690000")
    sources = newsapi.get_sources(category=searchfor,
                                  language="en",
                                  country="us")
    return jsonify(sources)
示例#3
0
文件: news.py 项目: louixp/CoroNow
class NewsAPI:
    def __init__(self):
        self.api = NewsApiClient(api_key=apikey)

    def headlines(self, keyword=None, sources=None, country=None, category=None,  lang="en"):
        source_str = None
        if sources != None:
            source_str = ""
            for i in range(len(sources)-1):
                source_str += sources[i] + ","
            source_str += sources[len(sources)-1]
        top_headlines = self.api.get_top_headlines(q=keyword,
                                                   sources=source_str,
                                                   category=category,
                                                   language=lang,
                                                   country=country)
        return top_headlines['articles']

    def update_sources(self):
        sources = self.api.get_sources()['sources']
        db = open(database, 'w')
        print("Database opened for writing")
        json.dump(sources, db)
        db.close()
        print("Database closed for writing")

    def get_sources(self):
        print("Preparing news...")
        db = open(database, "r")
        print("Database opened for reading")
        sources = json.load(db)
        db.close()
        print("Database closed for reading")
        return sources
示例#4
0
def newsapi(stock):
    # newsapi_symbol = input("Enter a symbol")
    newsapi = NewsApiClient(api_key='861ff0ffbaaa4eaa9571ce516cc5e088')

    all_articles = newsapi.get_everything(q=stock,
                                          language='en',
                                          sort_by='publishedAt',
                                          page_size=100)

    sources = newsapi.get_sources()

    title = []
    desc = []

    i = 1
    pos, neg, neu = 0, 0, 0

    for article in all_articles['articles']:
        a = str(article['content'])
        title.append(
            str(article['title']) + ' : \n' + str(article['description']))
        # desc.append(str(article['description']))
        b = article['source']
        c = article['publishedAt']
        # print(i, a)
        i += 1

        analysis = TextBlob(a)
        if analysis.sentiment.polarity > 0:
            # print('\nPositive:\n', a)
            # print('The source is:', b['name'])
            # print('It was published at:', c)
            pos += 1

        elif analysis.sentiment.polarity == 0:
            # print('\nNeutral:\n', a)
            # print('The source is:', b['name'])
            # print('It was published at:', c)
            neu += 1

        else:
            # print('\nNegative:\n', a)
            # print('The source is:', b['name'])
            # print('It was published at:', c)
            neg += 1

    # print(title)

    total = pos + neg + neu
    pos_news, neg_news, neu_news = pos / total, neg / total, neu / total

    if pos_news - neg_news > 0:
        # print('\nThe net value of News is: ', (pos_news - neg_news + 1)/2)
        output = ((pos_news - neg_news + 1) * 100) / 2
    else:
        # print("\nThe net value of News is: ", (pos_news - neg_news + 1)/2)
        output = ((pos_news - neg_news + 1) * 100) / 2
    # print(output)
    return output, title, desc
class NewsApiSourcesCollector:
    def __init__(self):
        with open('config.json') as f:
            config = json.load(f)
        self.API_KEY = config['NEWS_API_KEY']
        try:
            self.newsapi = NewsApiClient(api_key=self.API_KEY)
            self.sources = self.newsapi.get_sources()
        except NewsAPIException as e:
            print("Invalid API key:", e)

    def export_sources(self):
        if hasattr(self, 'sources'):
            with open('newsapi_sources.json', 'w', encoding='utf8') as f:
                json.dump(self.sources, f, indent=4, ensure_ascii=False)
            print("Exporting was successful")
        else:
            print("Exporting was unsuccessful")
示例#6
0
def get_news(topic):

    newsClient = NewsApiClient(api_key="bad068d6ce6c4ccfb30eb5785c360efe")
    #                                              q is search terms, category for category of news, language is english
    #                                              if possible (foreign news may not be english)
    keyWords = topic + " soccer"
    sportsSources = newsClient.get_sources(category="sports")
    sourceIds = ''
    for i in range(len(sportsSources['sources'])):
        if (i == len(sportsSources['sources']) - 1):
            sourceIds = sourceIds = sourceIds + sportsSources['sources'][i][
                'id']
        else:
            sourceIds = sourceIds + sportsSources['sources'][i]['id'] + ","
    threeDaysAgo = datetime.date(datetime.now()) - timedelta(
        3)  #date 3 days ago
    topHeadlines = newsClient.get_everything(q=keyWords,
                                             sources=sourceIds,
                                             language='en',
                                             sort_by='relevancy',
                                             from_param=threeDaysAgo)
    articles = topHeadlines['articles'][:3]
    return json.dumps(articles)
示例#7
0
class NewsURL:
    def __init__(self, start_date, end_date):
        self.API_KEY1 = '9382dd6539f448e59de4ab7c8c214f6f'  #김민수
        self.API_KEY2 = '08fe48df23494ab0bb4faa1162fee7fa'  #이명훈
        self.API_KEY3 = '0bc1cc3aff43418ba35488984b6742a4'  #최범석
        self.API_KEY4 = 'f996355abde44786b91bdef6bc92ee62'  #이명훈2
        self.API_KEY5 = '2533fbe4f09e4d9dbc51905dcd13d4a3'  #최범석2
        # Get the source
        self.tech_newsapi = NewsApiClient(api_key=self.API_KEY1)
        self.sources = self.tech_newsapi.get_sources()
        self.general_newsapi_1 = NewsApiClient(api_key=self.API_KEY2)
        self.general_newsapi_2 = NewsApiClient(api_key=self.API_KEY3)
        self.general_newsapi_3 = NewsApiClient(api_key=self.API_KEY4)
        self.google_newsapi = NewsApiClient(api_key=self.API_KEY5)
        # Make the magazine list
        self.general_magazine1 = [
            "ABC News", "Associated Press", "Business Insider", "CBS News",
            "CNN"
        ]
        self.general_magazine2 = [
            "Mashable", "NBC News", "The New York Times", "Reuters",
            "The Economist"
        ]
        self.general_magazine3 = [
            "The Washington Post", "The Washington Times", "Time", "USA Today"
        ]
        self.tech_magazine = [
            "Ars Technica", "Engadget", "Hacker News", "TechCrunch",
            "TechRader", "The Next Web", "The Verge", "Wired"
        ]
        self.today = datetime.date.today()
        self.start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
        self.end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
        self.timedelta = int((self.end_date - self.start_date).days) + 1
        # company_list
        self.cor_list = pd.read_csv(
            './company_data/Company.csv')['Name'].tolist()
        if os.path.exists('./source/') == False:
            os.mkdir('./source')
        if os.path.exists('./source/{}'.format(
                self.today.strftime("%Y-%m-%d"))) == False:
            os.mkdir('./source/{}'.format(self.today.strftime("%Y-%m-%d")))
        if os.path.exists('./backup/') == False:
            os.mkdir('./backup')
        if os.path.exists('./backup/{}'.format(
                self.today.strftime("%Y-%m-%d"))) == False:
            os.mkdir('./backup/{}'.format(self.today.strftime("%Y-%m-%d")))
        print("news_crawler start! From: {}, to: {}, {}days".format(
            self.start_date.strftime("%Y-%m-%d"),
            self.end_date.strftime("%Y-%m-%d"), self.timedelta))

    # Get the magazine information
    def make_magazine(self, mode="tech"):
        if mode == "tech":
            magazine = []
            id_list = []
            for s in self.sources['sources']:
                if s['name'] in self.tech_magazine:
                    magazine.append(s)
            for m in magazine:
                id_list.append(m['id'])
        elif mode == "general":
            magazine_1 = list()
            magazine_2 = list()
            magazine_3 = list()
            general_magazine_dict = dict()
            for s in self.sources['sources']:
                if s['name'] in self.general_magazine1:
                    magazine_1.append(s)
                    general_magazine_dict['general_magazine1'] = magazine_1
                elif s['name'] in self.general_magazine2:
                    magazine_2.append(s)
                    general_magazine_dict['general_magazine2'] = magazine_2
                elif s['name'] in self.general_magazine3:
                    magazine_3.append(s)
                    general_magazine_dict['general_magazine3'] = magazine_3
            id_1 = list()
            id_2 = list()
            id_3 = list()
            id_list = dict()
            for gm in [
                    'general_magazine1', 'general_magazine2',
                    'general_magazine3'
            ]:
                print(gm)
                for m in general_magazine_dict[gm]:
                    if gm == 'general_magazine1':
                        id_1.append(m['id'])
                        id_list[gm] = id_1
                    elif gm == 'general_magazine2':
                        id_2.append(m['id'])
                        id_list[gm] = id_2
                    elif gm == 'general_magazine3':
                        id_3.append(m['id'])
                        id_list[gm] = id_3
        # Get the magazine id
        return id_list

    def make_tech_url_list(self):
        # newsapi.get_everything() parameters
        # q: Keywords or phrases to search for
        # sources: A comma-seperated string of identifiers (maximum 20) for the news
        # from: A date and optional time for the oldest article allowed. default: the oldest according to your plan
        # to: A date and optional time for the newest article allowed. default: the newest according to your plan
        # sort_by: The order to sort the articles in. Possible options: relevancy, popularity, publishedAt
        # page_size: The number of results to return per page. 20 is the default, 100 is the maxium
        # page: Use this to page through the results
        start_time = time.time()
        # Make the empty final data frame
        id_list = self.make_magazine(mode="tech")
        total_df = pd.DataFrame(
            columns=["Magazine", "Date", "Author", "Title", "Url"])
        for id in id_list:
            print(id)
            # Make the empty backup data frame
            backup_df = pd.DataFrame(
                columns=["Magazine", "Date", "Author", "Title", "Url"])
            for i in range(0, self.timedelta):
                date = self.start_date + datetime.timedelta(i)
                date = date.strftime("%Y-%m-%d")
                print(date)
                articles = self.tech_newsapi.get_everything(sources=id,
                                                            from_param=date,
                                                            to=date,
                                                            language="en",
                                                            page_size=100,
                                                            page=1)
                for a in articles['articles']:
                    total_df = total_df.append(
                        {
                            "Magazine": id,
                            "Date": a['publishedAt'],
                            "Author": a['author'],
                            "Title": a['title'],
                            "Url": a['url']
                        },
                        ignore_index=True)
                    backup_df = backup_df.append(
                        {
                            "Magazine": id,
                            "Date": a['publishedAt'],
                            "Author": a['author'],
                            "Title": a['title'],
                            "Url": a['url']
                        },
                        ignore_index=True)
            backup_df.to_csv("./backup/{0}/{0}_{1}.csv".format(
                self.today.strftime("%Y-%m-%d"), id),
                             index=False)
        total_df.to_csv("./source/{}/{}_techurl.csv".format(
            self.today.strftime("%Y-%m-%d"), self.today.strftime("%Y%m%d")),
                        index=False,
                        encoding='utf-8')
        end_time = time.time()
        return "success time:{}".format(end_time - start_time)

    def make_general_url_list(self):
        start_time = time.time()
        # newsapi.get_everything() parameters
        # q: Keywords or phrases to search for
        # sources: A comma-seperated string of identifiers (maximum 20) for the news
        # from_param: A date and optional time for the oldest article allowed. default: the oldest according to your plan
        # to: A date and optional time for the newest article allowed. default: the newest according to your plan
        # sort_by: The order to sort the articles in. Possible options: relevancy, popularity, publishedAt
        # page_size: The number of results to return per page. 20 is the default, 100 is the maxium
        # page: Use this to page through the results

        # Make the empty final data frame
        start_date = self.start_date.strftime("%Y-%m-%d")
        end_date = self.end_date.strftime("%Y-%m-%d")
        print("{}~{}".format(start_date, end_date))
        id_dict = self.make_magazine(mode="general")
        total_df = pd.DataFrame(
            columns=["Magazine", "Date", "Author", "Title", "Url", "Company"])
        for gm in [
                'general_magazine1', 'general_magazine2', 'general_magazine3'
        ]:
            id_list = id_dict[gm]
            if gm == 'general_magazine1':
                newsapi = self.general_newsapi_1
            elif gm == 'general_magazine2':
                newsapi = self.general_newsapi_2
            elif gm == 'general_magazine3':
                newsapi = self.general_newsapi_3
            for id in id_list:
                print("Magazine : ", id)
                # Make the empty backup data frame
                backup_df = pd.DataFrame(columns=[
                    "Magazine", "Date", "Author", "Title", "Url", "Company"
                ])
                for query in self.cor_list:
                    print(query)
                    articles = newsapi.get_everything(sources=id,
                                                      q=query,
                                                      from_param=start_date,
                                                      to=end_date,
                                                      language="en",
                                                      page_size=100,
                                                      page=1)
                    for a in articles['articles']:
                        total_df = total_df.append(
                            {
                                "Magazine": id,
                                "Date": a['publishedAt'],
                                "Author": a['author'],
                                "Title": a['title'],
                                "Url": a['url'],
                                "Company": query
                            },
                            ignore_index=True)
                        backup_df = backup_df.append(
                            {
                                "Magazine": id,
                                "Date": a['publishedAt'],
                                "Author": a['author'],
                                "Title": a['title'],
                                "Url": a['url'],
                                "Company": query
                            },
                            ignore_index=True)
                backup_df.to_csv("./backup/{0}/{0}_{1}.csv".format(
                    self.today.strftime("%Y-%m-%d"), id),
                                 index=False)
        total_df.to_csv("./source/{}/{}_genurl.csv".format(
            self.today.strftime("%Y-%m-%d"), self.today.strftime("%Y%m%d")),
                        index=False,
                        encoding='utf-8')
        end_time = time.time()
        return "success time:{}".format(end_time - start_time)

    # cralwer google_news url
    def make_google_url_list(self):
        start_time = time.time()
        # newsapi.get_everything() parameters
        # q: Keywords or phrases to search for
        # sources: A comma-seperated string of identifiers (maximum 20) for the news
        # from: A date and optional time for the oldest article allowed. default: the oldest according to your plan
        # to: A date and optional time for the newest article allowed. default: the newest according to your plan
        # sort_by: The order to sort the articles in. Possible options: relevancy, popularity, publishedAt
        # page_size: The number of results to return per page. 20 is the default, 100 is the maxium
        # page: Use this to page through the results

        # Make the empty final data frame
        start_date = self.start_date.strftime("%Y-%m-%d")
        end_date = self.end_date.strftime("%Y-%m-%d")
        print("{}~{}".format(start_date, end_date))
        total_df = pd.DataFrame(
            columns=["Magazine", "Date", "Author", "Title", "Url"])
        for query in self.cor_list:
            print(query)
            articles = self.google_newsapi.get_everything(
                sources='google-news',
                q=query,
                from_param=start_date,
                to=end_date,
                language="en",
                page_size=100,
                page=1)
            print(len(articles['articles']))
            for a in articles['articles']:
                total_df = total_df.append(
                    {
                        "Magazine": "google_news",
                        "Date": a['publishedAt'],
                        "Author": a['author'],
                        "Title": a['title'],
                        "Url": a['url']
                    },
                    ignore_index=True)
        total_df.to_csv("./source/{0}/{0}_googleurl.csv".format(
            self.today.strftime("%Y%m%d")),
                        index=False,
                        encoding='utf-8')
        end_time = time.time()
        return "success time:{}".format(end_time - start_time)
示例#8
0
def category8():
    diccc = {}
    newsapi = NewsApiClient(api_key="d11761b89fdb4599b1497bf951690000")
    sources = newsapi.get_sources(language="en", country="us")
    return jsonify(sources)
class NewsApiClientTest(unittest.TestCase):
    def setUp(self):
        key = os.environ.get("news_api_secret")
        self.api = NewsApiClient(key)

    def test_api_top_headline(self):
        # Raise TypeError if Keyword/Phrase param is not of type str
        q = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(q=q)

        # Raise ValueError if sources param in not None and country param or category param is not None
        sources = "techcrunch"
        country = "us"
        category = "business"
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(sources=sources,
                                       country=country,
                                       category=category)

        # Raise TypeError if sources param is not of type str
        sources = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(sources=sources)

        # Raise TypeError if language param is not of type str
        language = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(language=language)

        # Raise ValueError if language param is invalid
        language = "xx"
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(language=language)

        # Raise TypeError if country param is not of type str
        country = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(country=country)

        # Raise ValueError if country param is invalid
        country = "xx"
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(country=country)

        # Raises TypeError if category param is not of type str
        category = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(category=category)

        # Raises ValueError if category param is invalid
        category = "x0x"
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(category=category)

        # Raises TypeError if page_size param is not an int
        page_size = "1"
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(page_size=page_size)

        # Raises ValueError if page_size param is less than zero(0) or greater than 100
        page_size = -1
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(page_size=page_size)

        page_size = 1000
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(page_size=page_size)

        # Raises a TypeError is page param is not an int
        page = "1"
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(page=page)

        # Raises a ValueError if page param is less than zero(0)
        page = -1
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(page=page)

    def test_api_get_everything(self):
        # Raise TypeError if Keyword/Phrase param is None
        q = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(q=q)

        # Raise TypeError if sources param is not of type str
        sources = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(sources=sources)

        # Raise TypeError is domains param is not of type str
        domains = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(domains=domains)

        # Raise TypeError is exclude_domains param is not of type str
        exclude_domains = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(exclude_domains=exclude_domains)

        # Raise TypeError if language param is not of type str
        language = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(language=language)

        # Raise ValueError if language param is invalid
        language = "xx"
        with self.assertRaises(ValueError):
            self.api.get_everything(language=language)

        # Raise TypeError is sort_by param is not of type str
        sort_by = 1
        with self.assertRaises(TypeError):
            self.api.get_everything(sort_by=sort_by)

        # Raise ValueError if soft_by param is invalid
        sort_by = "sort"
        with self.assertRaises(ValueError):
            self.api.get_everything(sort_by=sort_by)

        # Raises TypeError if page_size param is not an int
        page_size = "1"
        with self.assertRaises(TypeError):
            self.api.get_everything(page_size=page_size)

        # Raises ValueError if page_size param is less than zero(0) or greater than 100
        page_size = -1
        with self.assertRaises(ValueError):
            self.api.get_everything(page_size=page_size)

        page_size = 1000
        with self.assertRaises(ValueError):
            self.api.get_everything(page_size=page_size)

        # Raises a TypeError is page param is not an int
        page = "1"
        with self.assertRaises(TypeError):
            self.api.get_everything(page=page)

        # Raises a ValueError if page param is less than zero(0)
        page = -1
        with self.assertRaises(ValueError):
            self.api.get_everything(page=page)

    def test_api_get_sources(self):
        # Raise TypeError if language param is not of type str
        language = 0
        with self.assertRaises(TypeError):
            self.api.get_sources(language=language)

        # Raise ValueError if language param is invalid
        language = "xx"
        with self.assertRaises(ValueError):
            self.api.get_sources(language=language)

        # Raise TypeError if country param is not of type str
        country = 0
        with self.assertRaises(TypeError):
            self.api.get_sources(country=country)

        # Raise ValueError if country param is invalid
        country = "xx"
        with self.assertRaises(ValueError):
            self.api.get_sources(country=country)

        # Raises TypeError if category param is not of type str
        category = 0
        with self.assertRaises(TypeError):
            self.api.get_sources(category=category)

        # Raises ValueError if category param is invalid
        category = "x0x"
        with self.assertRaises(ValueError):
            self.api.get_sources(category=category)
示例#10
0
# Getting newspaper articles from News API

from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

import pandas as pd 
import nltk as nlp
import requests
import json
import numpy as np

newsapi = NewsApiClient(api_key='f9589c5fc62d4d7699b9eaa845e24683')
sources = newsapi.get_sources()

# Getting information of the first 60 newspaper articles

all_articles = list()
no_pages = 2 

for i in range(no_pages):
    content = newsapi.get_everything(q='arsenal', from_param='2019-08-20', to='2019-09-05', language='en', sort_by='relevancy', page=i+1)
    all_articles.append(content)


# In[10]:


# Sentiment Analysis of the content of the newspaper articles 
示例#11
0
class Headlines(object):
    '''Access daily articles via api and create tabular files to store in S3.

       Exceptions: 
       NewsAPIException triggered when subscribed is not a paid customer; this limits number of results
       S3Exception triggered when old bucket deleted but not cleaned up by AWS; needs unique bucket name
    '''
    def __init__(self, api_key):
        super(Headlines, self).__init__()
        self.news_cli = NewsApiClient(api_key=api_key)
        self.region = 'us-west-1'
        self.upload_cli = boto3.resource('s3', self.region)
        self.file_name = '_'.join([log_date, 'top_headlines.csv'])
        self.bucket_dict = defaultdict(list)
        self.en_sources = []
        self.headlines = []
        self.paths = []

    def get_english_sources(self) -> list:
        # this will return a complete dataset of 'en' news sources, not paginated
        self.en_sources = sorted([
            item['id']
            for item in self.news_cli.get_sources(language='en')['sources']
        ])
        logging.debug('SOURCES ({}): {}'.format(len(self.en_sources),
                                                self.en_sources))
        return self.en_sources

    def get_keyword_headlines(self, keywords) -> list:
        # this will also return a complete dataset of news sources by keywords, not paginated
        search = ','.join(keywords)
        logging.debug('KEYWORDS ({}): {}'.format(len(keywords), search))
        return self.get_top_headlines(search)

    def get_top_headlines(self, keywords=None) -> list:
        # for all above sources and/or keywords
        page_num = 1
        sources = ','.join(self.en_sources)
        get_top = self.news_cli.get_top_headlines
        if keywords:
            headlines = get_top(sources=sources, q=keywords)
        else:
            headlines = get_top(sources=sources)
        # we need the total results to help us paginate (NOTE: unless paid subscriber, only the first 100 will be retrievable)
        num_results = headlines['totalResults']
        while len(self.headlines) < num_results:
            try:
                results = get_top(sources=sources,
                                  page_size=100,
                                  page=page_num)
                self.headlines.extend(results['articles'])
                logging.debug(
                    ('TOTAL_ARTICLES: {}, PAGE: {}, NUM_PAGE_RESULTS: {}, '
                     'NUM_AGGD_ARTICLES: {}'.format(num_results, page_num,
                                                    len(results['articles']),
                                                    len(self.headlines))))
                page_num += 1
            except NewsAPIException as ex:
                # this occurs when we have not paid for subscription and we need to short circuit to complete proc
                logging.warning('Caught NewsAPI Exception: {}'.format(ex))
                break

        return self.headlines

    def create_csvs(self) -> list:
        # dynamically gen fields by flattening nested vars
        fields = list(
            itertools.chain(*[[k] if not isinstance(v, dict) else map(
                lambda x: '_'.join([k, x]), v.keys())
                              for k, v in self.headlines[0].items()]))
        logging.debug('FIELDS ({}): {}'.format(len(fields), fields))
        # munge fields to account for flattened nested vars
        for headline in self.headlines:
            headline['content_id'] = headline['source']['id']
            headline['content_name'] = headline['source']['name']
            del headline['source']
            self.bucket_dict[headline['content_id']].append(headline)
        logging.debug('BUCKETS ({}): {}, {}'.format(
            len(self.bucket_dict.keys()), self.bucket_dict.keys(),
            self.bucket_dict))
        for bucket, headlines in self.bucket_dict.items():
            # tmp file store for ease of bucket naming
            path = '@'.join([self.file_name, bucket])
            with open(path, 'w') as out:
                writer = csv.DictWriter(out, fieldnames=fields)
                writer.writeheader()
                for row in headlines:
                    writer.writerow(row)
            self.paths.append(path)
            logging.debug('PATHS ({}): {}'.format(len(self.paths), self.paths))
        return self.paths

    def upload_s3(self):
        for path in self.paths:
            out_file, bucket = path.split('@')
            logging.debug('UPLOADING: {}, TO: {}/{}'.format(
                path, bucket, self.file_name))
            # first check if bucket already exists
            if not self.upload_cli.Bucket(
                    bucket) in self.upload_cli.buckets.all():
                try:
                    self.upload_cli.create_bucket(Bucket=bucket,
                                                  ACL='public-read-write',
                                                  CreateBucketConfiguration={
                                                      'LocationConstraint':
                                                      self.region
                                                  })
                except Exception as ex:
                    # S3 needs time to delete old buckets so sometimes we have to force this to be a unique name (at least for testing)
                    self.upload_cli.create_bucket(Bucket='.'.join(
                        [bucket, log_date]),
                                                  ACL='public-read-write',
                                                  CreateBucketConfiguration={
                                                      'LocationConstraint':
                                                      self.region
                                                  })
            try:
                self.upload_cli.meta.client.upload_file(
                    path, bucket, self.file_name)
            except Exception as ex:
                logging.error('Caught upload error: {}'.format(ex))
class NewsApiClientTest(unittest.TestCase):

    def setUp(self):
        key = os.environ.get('news_api_secret')
        self.api = NewsApiClient(key)

    def test_api_top_headline(self):
        # Raise TypeError if Keyword/Phrase param is not of type str
        q = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(q=q)

        # Raise ValueError if sources param in not None and country param or category param is not None
        sources = 'techcrunch'
        country = 'us'
        category = 'business'
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(sources=sources, country=country, category=category)

        # Raise TypeError if sources param is not of type str
        sources = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(sources=sources)

        # Raise TypeError if language param is not of type str
        language = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(language=language)

        # Raise ValueError if language param is invalid
        language = 'xx'
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(language=language)

        # Raise TypeError if country param is not of type str
        country = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(country=country)

        # Raise ValueError if country param is invalid
        country = 'xx'
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(country=country)

        # Raises TypeError if category param is not of type str
        category = 0
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(category=category)

        # Raises ValueError if category param is invalid
        category = 'x0x'
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(category=category)

        # Raises TypeError if page_size param is not an int
        page_size = '1'
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(page_size=page_size)

        # Raises ValueError if page_size param is less than zero(0) or greater than 100
        page_size = -1
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(page_size=page_size)

        page_size = 1000
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(page_size=page_size)

        # Raises a TypeError is page param is not an int
        page = '1'
        with self.assertRaises(TypeError):
            self.api.get_top_headlines(page=page)

        # Raises a ValueError if page param is less than zero(0)
        page = -1
        with self.assertRaises(ValueError):
            self.api.get_top_headlines(page=page)

    def test_api_get_everything(self):
        # Raise TypeError if Keyword/Phrase param is None
        q = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(q=q)

        # Raise TypeError if sources param is not of type str
        sources = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(sources=sources)

        # Raise TypeError is domains param is not of type str
        domains = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(domains=domains)

        # Raise TypeError is exclude_domains param is not of type str
        exclude_domains = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(exclude_domains=exclude_domains)

        # Raise TypeError is from_param param is not of type str
        from_param = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(from_param=from_param)

        # Raise ValueError if param is not in the format YYYY-MM-DD
        from_param = '2016-6-4'
        with self.assertRaises(ValueError):
            self.api.get_everything(from_param=from_param)

        # Raise TypeError if to param is not of type str
        to = 1
        with self.assertRaises(TypeError):
            self.api.get_everything(to=to)

        # Raise ValueError if to param is not in the format YYYY-MM-DD
        to = '2016-6-24'
        with self.assertRaises(ValueError):
            self.api.get_everything(to=to)

        # Raise TypeError if language param is not of type str
        language = 0
        with self.assertRaises(TypeError):
            self.api.get_everything(language=language)

        # Raise ValueError if language param is invalid
        language = 'xx'
        with self.assertRaises(ValueError):
            self.api.get_everything(language=language)

        # Raise TypeError is sort_by param is not of type str
        sort_by = 1
        with self.assertRaises(TypeError):
            self.api.get_everything(sort_by=sort_by)

        # Raise ValueError if soft_by param is invalid
        sort_by = 'sort'
        with self.assertRaises(ValueError):
            self.api.get_everything(sort_by=sort_by)

        # Raises TypeError if page_size param is not an int
        page_size = '1'
        with self.assertRaises(TypeError):
            self.api.get_everything(page_size=page_size)

        # Raises ValueError if page_size param is less than zero(0) or greater than 100
        page_size = -1
        with self.assertRaises(ValueError):
            self.api.get_everything(page_size=page_size)

        page_size = 1000
        with self.assertRaises(ValueError):
            self.api.get_everything(page_size=page_size)

        # Raises a TypeError is page param is not an int
        page = '1'
        with self.assertRaises(TypeError):
            self.api.get_everything(page=page)

        # Raises a ValueError if page param is less than zero(0)
        page = -1
        with self.assertRaises(ValueError):
            self.api.get_everything(page=page)

    def test_api_get_sources(self):
        # Raise TypeError if language param is not of type str
        language = 0
        with self.assertRaises(TypeError):
            self.api.get_sources(language=language)

        # Raise ValueError if language param is invalid
        language = 'xx'
        with self.assertRaises(ValueError):
            self.api.get_sources(language=language)

        # Raise TypeError if country param is not of type str
        country = 0
        with self.assertRaises(TypeError):
            self.api.get_sources(country=country)

        # Raise ValueError if country param is invalid
        country = 'xx'
        with self.assertRaises(ValueError):
            self.api.get_sources(country=country)

        # Raises TypeError if category param is not of type str
        category = 0
        with self.assertRaises(TypeError):
            self.api.get_sources(category=category)

        # Raises ValueError if category param is invalid
        category = 'x0x'
        with self.assertRaises(ValueError):
            self.api.get_sources(category=category)
示例#13
0
文件: main.py 项目: ChrisUSC/CSCI_571
def Technology():
    newsapi = NewsApiClient(api_key='3061013219ce4282b5d26bdcf8b9f966')
    sources = newsapi.get_sources(category='technology',
                                  language='en',
                                  country='us')
    return jsonify({"category": sources})
示例#14
0
    # LOOPS THROUGH ALL THE GUILD / SERVERS THAT THE BOT IS ASSOCIATED WITH.
    for guild in bot.guilds:
        # PRINT THE SERVER'S ID AND NAME.
        print(f"- {guild.id} (name: {guild.name})")

        # INCREMENTS THE GUILD COUNTER.
        guild_count = guild_count + 1

    # PRINTS HOW MANY GUILDS / SERVERS THE BOT IS IN.
    print("SampleDiscordBot is in " + str(guild_count) + " guilds.")


#Initialize newsapi
newsapi = NewsApiClient(api_key=NEWSAPI_TOKEN)
ussources = newsapi.get_sources(language="en", country="us")


# /v2/top-headlines
@bot.event
async def on_message(message, *args):
    # CHECKS IF THE MESSAGE THAT WAS SENT IS EQUAL TO "HELLO".
    if message.content == "!hello":
        # SENDS BACK A MESSAGE TO THE CHANNEL.
        await message.channel.send("Message Recieved. Hey There!"
                                   )  #bot response to hello
    if message.content == "!help":
        # SENDS WHAT TO DO TO HELP THE USER
        await message.channel.send(
            "This bot currently can return breaking news headlines for a country and category with the !headlines."
        )