Exemplo n.º 1
0
 def search(self):
     nyt = NYTAPI("wbWOIDwmGPWGQALhXbfC3BDK3EMtFBMA")
     startDate = str(self.startDate) + " 00:00:00"
     endDate = str(self.endDate) + " 23:59:59"
     articles = nyt.article_search(
         query="Covid",
         results=10,
         dates={
             #"begin": datetime.datetime(2020, 6, 24),
             #"end": datetime.datetime(2020, 6, 27)
             "begin": datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S'),
             "end": datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S')
         },
         options={
             "sort":
             "relevance",
             "sources": [
                 "New York Times", "AP", "Reuters",
                 "International Herald Tribune"
             ],
             "type_of_material": ["News"]
         })
     sorted_articles = sorted(
         articles,
         key=lambda x: datetime.strptime(x['pub_date'][0:10], '%Y-%m-%d'),
         reverse=True)
     for x in range(len(sorted_articles)):
         sorted_articles[x]['pub_date'] = datetime.strptime(
             sorted_articles[x]['pub_date'][0:10],
             '%Y-%m-%d').strftime('%d-%b-%Y')
     return sorted_articles
Exemplo n.º 2
0
    def search(self):
        nyt = NYTAPI("wbWOIDwmGPWGQALhXbfC3BDK3EMtFBMA")
        startDate = str(self.startDate) + " 00:00:00"
        endDate = str(self.endDate) + " 23:59:59"
        articles = nyt.article_search(
            query="Covid",
            results=10,
            dates={
                #"begin": datetime.datetime(2020, 6, 24),
                #"end": datetime.datetime(2020, 6, 27)
                "begin": datetime.datetime.strptime(startDate,
                                                    '%Y-%m-%d %H:%M:%S'),
                "end": datetime.datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S')
            },
            options={
                "sort":
                "relevance",
                "sources": [
                    "New York Times", "AP", "Reuters",
                    "International Herald Tribune"
                ],
                "type_of_material": ["News"]
            })

        return articles
Exemplo n.º 3
0
nyt.top_stories(section="science")
nyt.most_viewed(days=30)
time.sleep(5)
nyt.most_shared(days=30, method="email")
nyt.book_reviews(author="Michelle Obama")
time.sleep(5)
nyt.best_sellers_lists()
nyt.best_sellers_list(date=datetime.datetime(2019, 1, 1),
                      name="hardcover-fiction")
time.sleep(5)
nyt.movie_reviews(keyword="FBI", options={"order": "by-opening-date"})
nyt.article_metadata(
    url=
    "https://www.nytimes.com/2019/10/20/world/middleeast/erdogan-turkey-nuclear-weapons-trump.html"
)
time.sleep(5)
nyt.tag_query("Pentagon", max_results=20)
nyt.archive_metadata(date=datetime.datetime(2019, 1, 1))
time.sleep(5)
nyt.article_search(query="Trump",
                   results=20,
                   dates={
                       "begin_date": datetime.datetime(2019, 1, 1),
                       "end_date": datetime.datetime(2019, 2, 1)
                   },
                   options={"sort": "oldest"})

end = datetime.datetime.now()
print(end - begin)
Exemplo n.º 4
0
class TestNewYorkTimes(unittest.TestCase):
    def setUp(self):
        self.nyt = NYTAPI(API_KEY, parse_dates=True)

    def tearDown(self):
        self.nyt.close()

    def test_empty_api_key(self):
        with self.assertRaises(ValueError):
            NYTAPI()

    def test_top_stories(self):
        top_stories = self.nyt.top_stories()
        self.assertIsInstance(top_stories, list)
        self.assertGreater(len(top_stories), 0)

        for top_story in top_stories:
            self.assertIsInstance(top_story, dict)
            self.assertIsInstance(top_story["created_date"], datetime.datetime)
            self.assertIsInstance(top_story["published_date"],
                                  datetime.datetime)

    def test_top_stories_section(self):
        section = "world"
        top_stories_section = self.nyt.top_stories(section=section)
        self.assertIsInstance(top_stories_section, list)
        self.assertGreater(len(top_stories_section), 0)

        for top_story in top_stories_section:
            self.assertIsInstance(top_story, dict)

    def test_top_stories_wrong_section(self):
        with self.assertRaises(ValueError):
            self.nyt.top_stories("abcdfsda")

        with self.assertRaises(TypeError):
            self.nyt.top_stories(section=123)

    def test_most_viewed(self):
        most_viewed = self.nyt.most_viewed()
        self.assertIsInstance(most_viewed, list)
        self.assertGreater(len(most_viewed), 0)

        for most in most_viewed:
            self.assertIsInstance(most, dict)
            self.assertIsInstance(most["media"], list)

    def test_most_viewed_invalid_days(self):
        with self.assertRaises(ValueError):
            self.nyt.most_viewed(2)

        with self.assertRaises(TypeError):
            self.nyt.most_viewed(days="1")

    def test_most_shared(self):
        most_shared = self.nyt.most_shared()
        self.assertIsInstance(most_shared, list)
        self.assertGreater(len(most_shared), 0)

        for most in most_shared:
            self.assertIsInstance(most, dict)
            self.assertIsInstance(most["published_date"], datetime.date)
            self.assertIsInstance(most["updated"], datetime.datetime)
            self.assertIsInstance(most["media"], list)

    def test_most_shared_invalid(self):
        with self.assertRaises(ValueError):
            self.nyt.most_shared(method="twitter")

        with self.assertRaises(ValueError):
            self.nyt.most_shared(days=2)

        with self.assertRaises(TypeError):
            self.nyt.most_shared(days="2")

    def test_book_reviews(self):
        author = "Barack Obama"
        book_reviews = self.nyt.book_reviews(author=author)
        self.assertIsInstance(book_reviews, list)
        self.assertGreater(len(book_reviews), 0)

        for book_review in book_reviews:
            self.assertIsInstance(book_review, dict)
            self.assertEqual(book_review["book_author"], author)

    def test_book_reviews_invalid(self):
        with self.assertRaises(ValueError):
            self.nyt.book_reviews()

        with self.assertRaises(ValueError):
            self.nyt.book_reviews(isbn=213789, author="author")

        with self.assertRaises(ValueError):
            self.nyt.book_reviews(isbn=213789)

    def test_best_sellers_lists(self):
        best_sellers_lists = self.nyt.best_sellers_lists()
        self.assertIsInstance(best_sellers_lists, list)
        self.assertGreater(len(best_sellers_lists), 0)

    def test_best_seller_list(self):
        best_seller_list = self.nyt.best_sellers_list(date=datetime.datetime(
            2019, 1, 1),
                                                      name="hardcover-fiction")
        self.assertIsInstance(best_seller_list, list)
        self.assertEqual(best_seller_list[0]["primary_isbn13"],
                         "9780385544153")

    def test_best_seller_list_invalid(self):
        with self.assertRaises(ValueError):
            self.nyt.best_sellers_list(name="not a name")

        with self.assertRaises(TypeError):
            self.nyt.best_sellers_list(date="123")

    def test_movie_reviews(self):
        movie_reviews = self.nyt.movie_reviews()
        self.assertIsInstance(movie_reviews, list)
        self.assertGreater(len(movie_reviews), 0)

        for movie_review in movie_reviews:
            self.assertIsInstance(movie_review, dict)

    def test_movie_reviews_invalid(self):
        with self.assertRaises(TypeError):
            self.nyt.movie_reviews(keyword=123)

    def test_article_metadata(self):
        article_metadata = self.nyt.article_metadata(
            "https://www.nytimes.com/live/2021/02/10/us/impeachment-trial/prosecutors-begin-arguments-against-trump-saying-he-became-the-inciter-in-chief-of-a-dangerous-insurrection"
        )
        self.assertIsInstance(article_metadata, list)

        for article in article_metadata:
            self.assertIsInstance(article, dict)

        title = "Prosecutors argue that Trump ‘became the inciter in chief’ and retell riot with explicit video."
        creation_datetime = datetime.datetime(
            2021,
            2,
            10,
            11,
            4,
            8,
            tzinfo=datetime.timezone(datetime.timedelta(days=-1,
                                                        seconds=68400)),
        )
        self.assertEqual(article_metadata[0]["title"], title)
        self.assertEqual(
            article_metadata[0]["created_date"],
            creation_datetime,
        )

    def test_article_metadata_invalid(self):
        with self.assertRaises(TypeError):
            self.nyt.article_metadata()

        with self.assertRaises(TypeError):
            self.nyt.article_metadata(123)

        with self.assertRaises(ValueError):
            self.nyt.article_metadata("text")

    def test_archive_metadata(self):
        archive_metadata = self.nyt.archive_metadata(
            date=datetime.date.today())
        self.assertIsInstance(archive_metadata, list)
        self.assertGreater(len(archive_metadata), 0)

        for metadata in archive_metadata:
            self.assertIsInstance(metadata, dict)
            self.assertGreaterEqual(
                metadata["pub_date"],
                datetime.datetime.now(tz=datetime.timezone.utc).replace(
                    day=1, hour=0, minute=0, second=0, microsecond=0),
            )

    def test_archive_metadata_invalid(self):
        with self.assertRaises(TypeError):
            self.nyt.archive_metadata("string")

        with self.assertRaises(TypeError):
            self.nyt.archive_metadata(123)

    def test_article_search(self):
        search = self.nyt.article_search("Joe Biden", results=80)
        self.assertIsInstance(search, list)
        self.assertEqual(80, len(search))
        for article in search:
            self.assertIsInstance(article, dict)

    def test_article_search_invalid(self):
        with self.assertRaises(TypeError):
            self.nyt.article_search(123)

        with self.assertRaises(TypeError):
            self.nyt.article_search("query", datetime.date.today())

    def test_section_list(self):
        section_list = self.nyt.section_list()
        self.assertIsInstance(section_list, list)
        self.assertGreater(len(section_list), 0)

        for section in section_list:
            self.assertIsInstance(section, dict)

    def test_latest_articles(self):
        latest_articles = self.nyt.latest_articles()
        self.assertIsInstance(latest_articles, list)

        for article in latest_articles:
            self.assertIsInstance(article, dict)

    def test_latest_articles_invalid(self):
        with self.assertRaises(TypeError):
            self.nyt.latest_articles(source=123)

    def test_tag_query(self):
        tags = self.nyt.tag_query("Obama", max_results=2)
        self.assertIsInstance(tags, list)
        self.assertIs(2, len(tags))

    def test_tag_query_invalid(self):
        with self.assertRaises(TypeError):
            self.nyt.tag_query(123)

        with self.assertRaises(TypeError):
            self.nyt.tag_query("Obama", max_results="2")

    def test_parse_dates_disabled(self):
        local_nyt = NYTAPI(API_KEY)
        data = local_nyt.article_metadata(
            "https://www.nytimes.com/live/2021/02/10/us/impeachment-trial/prosecutors-begin-arguments-against-trump-saying-he-became-the-inciter-in-chief-of-a-dangerous-insurrection"
        )

        self.assertEqual(data[0]["created_date"], "2021-02-10T11:04:08-05:00")
Exemplo n.º 5
0
from pynytimes import NYTAPI
nyt = NYTAPI("YOUR_API_KEY")
articles = nyt.article_search(
    "https://api.nytimes.com/svc/search/v2/articlesearch.json?fq=indigo&api-key=YOUR_API_KEY"
)
#print(articles)
news = []
for i in articles:
    dic = {}
    dic['url'] = i['web_url']
    news.append(dic)
#print(news)
urls = []
for new in news:
    for key, url in new.items():
        urls.append(url)
for url in urls:
    print(url)

# article :indigo
"""
https://www.nytimes.com/1889/07/14/archives/no-robbery.html
https://www.nytimes.com/1858/10/29/archives/central-america-crops-and-contracts-in-costa-ricathe-proposed.html
https://www.nytimes.com/1898/08/20/archives/reviews-of-books-dialect-tales-justly-praised.html
https://www.nytimes.com/1890/10/19/archives/anne-bissell.html
https://www.nytimes.com/1859/10/27/archives/european-news-the-jason-at-st-johns-further-by-the-persia-the-great.html
https://www.nytimes.com/1859/06/27/archives/from-the-pacific-coast-nicaragua-rejects-the-american-ultimatum.html
"""
import pandas as pd

nyt = NYTAPI("WxQXsVSaIIlTgEfG0VnrlP7JhOVYYL0j")

search="US Embassy move to Jerusalem"
start_date = datetime(2015, 1, 1)
end_date = datetime(2019, 12, 31)

articles = nyt.article_search(
    query = search,
    results = 50,
    dates = {
        "begin": start_date,
        "end": end_date
    },
    options = {
        "sort": "relevance",
        "sources": [
            "New York Times",
        ],

    }
)

def clean(text):
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) | (\w+:\/\/\S+)", " ", text).split())

headlines = []
pub_dates = []
for article in articles:
    headlines.append(article['headline']['main'])
Exemplo n.º 7
0
from datetime import date, datetime
from pynytimes import NYTAPI

# Make sure to set parse dates to True so that the dates
# are parsed into datetime.datetime or datetime.date objects
nyt = NYTAPI(
    key="Your API Key",  # Get your API Key at https://developer.nytimes.com
    parse_dates=True,
)

# Search articles about President Biden
biden = nyt.article_search("biden")

# You can optionally define the dates between which you want the articles to be
biden_january = nyt.article_search(query="biden",
                                   dates={
                                       "start": date(2021, 1, 1),
                                       "end": date(2021, 1, 31)
                                   })

# Optionally you can also define
biden = nyt.article_search("biden", )