def search(self): nyt = NYTAPI("wbWOIDwmGPWGQALhXbfC3BDK3EMtFBMA") startDate = str(self.startDate) + " 00:00:00" endDate = str(self.endDate) + " 23:59:59" articles = nyt.article_search( query="Covid", results=10, dates={ #"begin": datetime.datetime(2020, 6, 24), #"end": datetime.datetime(2020, 6, 27) "begin": datetime.datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S'), "end": datetime.datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S') }, options={ "sort": "relevance", "sources": [ "New York Times", "AP", "Reuters", "International Herald Tribune" ], "type_of_material": ["News"] }) return articles
def search(self): nyt = NYTAPI("wbWOIDwmGPWGQALhXbfC3BDK3EMtFBMA") startDate = str(self.startDate) + " 00:00:00" endDate = str(self.endDate) + " 23:59:59" articles = nyt.article_search( query="Covid", results=10, dates={ #"begin": datetime.datetime(2020, 6, 24), #"end": datetime.datetime(2020, 6, 27) "begin": datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S'), "end": datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S') }, options={ "sort": "relevance", "sources": [ "New York Times", "AP", "Reuters", "International Herald Tribune" ], "type_of_material": ["News"] }) sorted_articles = sorted( articles, key=lambda x: datetime.strptime(x['pub_date'][0:10], '%Y-%m-%d'), reverse=True) for x in range(len(sorted_articles)): sorted_articles[x]['pub_date'] = datetime.strptime( sorted_articles[x]['pub_date'][0:10], '%Y-%m-%d').strftime('%d-%b-%Y') return sorted_articles
def test_parse_dates_disabled(self): local_nyt = NYTAPI(API_KEY) data = local_nyt.article_metadata( "https://www.nytimes.com/live/2021/02/10/us/impeachment-trial/prosecutors-begin-arguments-against-trump-saying-he-became-the-inciter-in-chief-of-a-dangerous-insurrection" ) self.assertEqual(data[0]["created_date"], "2021-02-10T11:04:08-05:00")
def test_empty_api_key(self): with self.assertRaises(ValueError): NYTAPI()
def setUp(self): self.nyt = NYTAPI(API_KEY, parse_dates=True)
class TestNewYorkTimes(unittest.TestCase): def setUp(self): self.nyt = NYTAPI(API_KEY, parse_dates=True) def tearDown(self): self.nyt.close() def test_empty_api_key(self): with self.assertRaises(ValueError): NYTAPI() def test_top_stories(self): top_stories = self.nyt.top_stories() self.assertIsInstance(top_stories, list) self.assertGreater(len(top_stories), 0) for top_story in top_stories: self.assertIsInstance(top_story, dict) self.assertIsInstance(top_story["created_date"], datetime.datetime) self.assertIsInstance(top_story["published_date"], datetime.datetime) def test_top_stories_section(self): section = "world" top_stories_section = self.nyt.top_stories(section=section) self.assertIsInstance(top_stories_section, list) self.assertGreater(len(top_stories_section), 0) for top_story in top_stories_section: self.assertIsInstance(top_story, dict) def test_top_stories_wrong_section(self): with self.assertRaises(ValueError): self.nyt.top_stories("abcdfsda") with self.assertRaises(TypeError): self.nyt.top_stories(section=123) def test_most_viewed(self): most_viewed = self.nyt.most_viewed() self.assertIsInstance(most_viewed, list) self.assertGreater(len(most_viewed), 0) for most in most_viewed: self.assertIsInstance(most, dict) self.assertIsInstance(most["media"], list) def test_most_viewed_invalid_days(self): with self.assertRaises(ValueError): self.nyt.most_viewed(2) with self.assertRaises(TypeError): self.nyt.most_viewed(days="1") def test_most_shared(self): most_shared = self.nyt.most_shared() self.assertIsInstance(most_shared, list) self.assertGreater(len(most_shared), 0) for most in most_shared: self.assertIsInstance(most, dict) self.assertIsInstance(most["published_date"], datetime.date) self.assertIsInstance(most["updated"], datetime.datetime) self.assertIsInstance(most["media"], list) def test_most_shared_invalid(self): with self.assertRaises(ValueError): self.nyt.most_shared(method="twitter") with self.assertRaises(ValueError): self.nyt.most_shared(days=2) with self.assertRaises(TypeError): self.nyt.most_shared(days="2") def test_book_reviews(self): author = "Barack Obama" book_reviews = self.nyt.book_reviews(author=author) self.assertIsInstance(book_reviews, list) self.assertGreater(len(book_reviews), 0) for book_review in book_reviews: self.assertIsInstance(book_review, dict) self.assertEqual(book_review["book_author"], author) def test_book_reviews_invalid(self): with self.assertRaises(ValueError): self.nyt.book_reviews() with self.assertRaises(ValueError): self.nyt.book_reviews(isbn=213789, author="author") with self.assertRaises(ValueError): self.nyt.book_reviews(isbn=213789) def test_best_sellers_lists(self): best_sellers_lists = self.nyt.best_sellers_lists() self.assertIsInstance(best_sellers_lists, list) self.assertGreater(len(best_sellers_lists), 0) def test_best_seller_list(self): best_seller_list = self.nyt.best_sellers_list(date=datetime.datetime( 2019, 1, 1), name="hardcover-fiction") self.assertIsInstance(best_seller_list, list) self.assertEqual(best_seller_list[0]["primary_isbn13"], "9780385544153") def test_best_seller_list_invalid(self): with self.assertRaises(ValueError): self.nyt.best_sellers_list(name="not a name") with self.assertRaises(TypeError): self.nyt.best_sellers_list(date="123") def test_movie_reviews(self): movie_reviews = self.nyt.movie_reviews() self.assertIsInstance(movie_reviews, list) self.assertGreater(len(movie_reviews), 0) for movie_review in movie_reviews: self.assertIsInstance(movie_review, dict) def test_movie_reviews_invalid(self): with self.assertRaises(TypeError): self.nyt.movie_reviews(keyword=123) def test_article_metadata(self): article_metadata = self.nyt.article_metadata( "https://www.nytimes.com/live/2021/02/10/us/impeachment-trial/prosecutors-begin-arguments-against-trump-saying-he-became-the-inciter-in-chief-of-a-dangerous-insurrection" ) self.assertIsInstance(article_metadata, list) for article in article_metadata: self.assertIsInstance(article, dict) title = "Prosecutors argue that Trump ‘became the inciter in chief’ and retell riot with explicit video." creation_datetime = datetime.datetime( 2021, 2, 10, 11, 4, 8, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=68400)), ) self.assertEqual(article_metadata[0]["title"], title) self.assertEqual( article_metadata[0]["created_date"], creation_datetime, ) def test_article_metadata_invalid(self): with self.assertRaises(TypeError): self.nyt.article_metadata() with self.assertRaises(TypeError): self.nyt.article_metadata(123) with self.assertRaises(ValueError): self.nyt.article_metadata("text") def test_archive_metadata(self): archive_metadata = self.nyt.archive_metadata( date=datetime.date.today()) self.assertIsInstance(archive_metadata, list) self.assertGreater(len(archive_metadata), 0) for metadata in archive_metadata: self.assertIsInstance(metadata, dict) self.assertGreaterEqual( metadata["pub_date"], datetime.datetime.now(tz=datetime.timezone.utc).replace( day=1, hour=0, minute=0, second=0, microsecond=0), ) def test_archive_metadata_invalid(self): with self.assertRaises(TypeError): self.nyt.archive_metadata("string") with self.assertRaises(TypeError): self.nyt.archive_metadata(123) def test_article_search(self): search = self.nyt.article_search("Joe Biden", results=80) self.assertIsInstance(search, list) self.assertEqual(80, len(search)) for article in search: self.assertIsInstance(article, dict) def test_article_search_invalid(self): with self.assertRaises(TypeError): self.nyt.article_search(123) with self.assertRaises(TypeError): self.nyt.article_search("query", datetime.date.today()) def test_section_list(self): section_list = self.nyt.section_list() self.assertIsInstance(section_list, list) self.assertGreater(len(section_list), 0) for section in section_list: self.assertIsInstance(section, dict) def test_latest_articles(self): latest_articles = self.nyt.latest_articles() self.assertIsInstance(latest_articles, list) for article in latest_articles: self.assertIsInstance(article, dict) def test_latest_articles_invalid(self): with self.assertRaises(TypeError): self.nyt.latest_articles(source=123) def test_tag_query(self): tags = self.nyt.tag_query("Obama", max_results=2) self.assertIsInstance(tags, list) self.assertIs(2, len(tags)) def test_tag_query_invalid(self): with self.assertRaises(TypeError): self.nyt.tag_query(123) with self.assertRaises(TypeError): self.nyt.tag_query("Obama", max_results="2") def test_parse_dates_disabled(self): local_nyt = NYTAPI(API_KEY) data = local_nyt.article_metadata( "https://www.nytimes.com/live/2021/02/10/us/impeachment-trial/prosecutors-begin-arguments-against-trump-saying-he-became-the-inciter-in-chief-of-a-dangerous-insurrection" ) self.assertEqual(data[0]["created_date"], "2021-02-10T11:04:08-05:00")
from pynytimes import NYTAPI nyt = NYTAPI("Type_Your_Key") reviews = nyt.movie_reviews(keyword="Batman") l = ['display_title', 'mpaa_rating', 'headline', 'summary_short'] review_length = len(reviews) for u in range(review_length): str1 = "" for i in l: s = reviews[u][i] str1 += s + "\t" print(str1) # print(reviews)
from pynytimes import NYTAPI # Make sure to set parse dates to True so that the dates # are parsed into datetime.datetime or datetime.date objects nyt = NYTAPI( key="Your API Key", # Get your API Key at https://developer.nytimes.com parse_dates=True, ) # Get most shared articles of today most_shared = nyt.most_shared() # Optionally you can also define the timeframe # Valid options are 1, 7, 30 most_shared_last_week = nyt.most_shared(days=7) most_shared_last_month = nyt.most_shared(days=30) # You can also define the method of sharing. # Options are: email (default) or facebook. most_shared_email = nyt.most_shared(method="email") most_shared_facebook = nyt.most_shared(method="facebook") # These options can also be mixed and matched # So the most shared articles of last month on facebook are most_shared_last_month_facebook = nyt.most_shared(days=30, method="facebook")
key = 'EE4r1tU8dgaQej94KTnlJxWPglKKaz4e' secret = 'AbqACp2UngzDCOWu' import requests import json url = 'https://api.nytimes.com/svc/archive/v1/2020/06.json?api-key={key:AbqACp2UngzDCOWu}' r = requests.get(url) json_data = r.json() from pynytimes import NYTAPI nyt = NYTAPI("EE4r1tU8dgaQej94KTnlJxWPglKKaz4e") import datetime years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020] for YEAR in years: data = nyt.archive_metadata(date=datetime.datetime(YEAR, 6, 1)) headlines_nyt = [] #for i in range(len(data)): for i in range(1001): print(i) headlines_nyt.append( (data[i]['headline']['main'], data[i]['pub_date'], 'NY_TIMES', 0)) import pandas as pd
return story @st.cache(suppress_st_warning=True) def summarizeArticle(toSummarize, minLength, maxLength): return summarizer(toSummarize, min_length=minLength, max_length=maxLength)[0]["summary_text"] # NY Times API NYTimesAPIkey = environ.get("NYTimesAPIkey") if NYTimesAPIkey is None: raise KeyError("'NYTimesAPIkey' not an environment variable name.") nyt = NYTAPI(NYTimesAPIkey) t0 = perf_counter() summarizer = initializeSummarizer() t1 = perf_counter() Δt01 = t1 - t0 # Now for the Streamlit interface: st.sidebar.title("About") st.sidebar.info( "This streamlit app uses the default HuggingFace summarization " "pipeline (Facebook's BART model) to summarize text from selected " "NY Times articles.\n\n" "The actual summarization time takes a few seconds, although"
from PopulateDB import addArticlesDB from pynytimes import NYTAPI from datetime import datetime, date, timezone from pymongo import MongoClient, errors, ASCENDING, DESCENDING from bson import regex import sys import json import requests import random import string nyt = NYTAPI("qsPCmSV09wV4AbCCaJmXFPxo3nCwGtbU") LIMIT = 10 # output is limited to 10 documents def start(): global myclient global db global article try: myclient = MongoClient("mongodb://localhost:27018/", w=1, readPreference="primaryPreferred") print(myclient) print("Connection successful!") except errors.ConnectionFailure as e: print("Connection failed!") print(e) db = myclient["nyt"] #connect to database article = db["article"] #coonect to collections
from pynytimes import NYTAPI from datetime import datetime import re from textblob import TextBlob import numpy import pandas as pd nyt = NYTAPI("WxQXsVSaIIlTgEfG0VnrlP7JhOVYYL0j") search="US Embassy move to Jerusalem" start_date = datetime(2015, 1, 1) end_date = datetime(2019, 12, 31) articles = nyt.article_search( query = search, results = 50, dates = { "begin": start_date, "end": end_date }, options = { "sort": "relevance", "sources": [ "New York Times", ], } ) def clean(text): return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) | (\w+:\/\/\S+)", " ", text).split())
from pynytimes import NYTAPI # Make sure to turn parse dates on so that the dates # are parsed into datetime.datetime or datetime.date objects nyt = NYTAPI("API Key", parse_dates=True) # Get the most viewed articles of today most_viewed = nyt.most_viewed() # Optionally you can also define the time period of the most # viewed articles most_viewed_last_week = nyt.most_viewed(days=7) # Valid options are 1, 7 or 30 most_viewed_last_month = nyt.most_viewed(days=30)
from .basisFuncs import * from .part3funcs import normalize_headline import datetime from pynytimes import NYTAPI import time as time import pytz try: nyt = NYTAPI(os.getenv("nytimesPythonApiKey")) except: print("no nyt api key oh well") def populateNewsRatioColumn(): mycursor.execute("update " + mainTable + " set newsRatio = 0") mydb.commit() updateFormula = "UPDATE " + mainTable + " SET newsRatio = %s WHERE id = %s" allTweets = getTweetsFromDB( returnParams=["favCount", "cleanedText", "id", "publishTime"], purePres=True) tweets = [t for t in allTweets if len(t[1]) > 1] ratios = [] tuples = [] favCounts = [t[0] for t in tweets] for i, t in enumerate(tweets): ratio = determineWhetherTweetWasInfluencedByNewsRatio(t) tuples.append((str(ratio), t[2])) ratios.append(ratio)
from pynytimes import NYTAPI nyt = NYTAPI("YOUR_API_KEY") articles = nyt.article_search( "https://api.nytimes.com/svc/search/v2/articlesearch.json?fq=indigo&api-key=YOUR_API_KEY" ) #print(articles) news = [] for i in articles: dic = {} dic['url'] = i['web_url'] news.append(dic) #print(news) urls = [] for new in news: for key, url in new.items(): urls.append(url) for url in urls: print(url) # article :indigo """ https://www.nytimes.com/1889/07/14/archives/no-robbery.html https://www.nytimes.com/1858/10/29/archives/central-america-crops-and-contracts-in-costa-ricathe-proposed.html https://www.nytimes.com/1898/08/20/archives/reviews-of-books-dialect-tales-justly-praised.html https://www.nytimes.com/1890/10/19/archives/anne-bissell.html https://www.nytimes.com/1859/10/27/archives/european-news-the-jason-at-st-johns-further-by-the-persia-the-great.html https://www.nytimes.com/1859/06/27/archives/from-the-pacific-coast-nicaragua-rejects-the-american-ultimatum.html """
from pynytimes import NYTAPI # Make sure to turn parse dates on so that the dates # are parsed into datetime.datetime or datetime.date objects nyt = NYTAPI("API Key", parse_dates=True) # Get top stories top_stories = nyt.top_stories() # Optionally you can also define a section # Valid options for sections can be found in README top_stories_science = nyt.top_stories(section="science")
@st.cache(suppress_st_warning=True) def summarizeArticle(toSummarize, minLength, maxLength): return summarizer(toSummarize, min_length=minLength, max_length=maxLength)[0]["summary_text"] # NY Times API # NYTimesAPIkey = environ.get("NYTimesAPIkey") # if NYTimesAPIkey is None: # raise KeyError("'NYTimesAPIkey' not an environment variable name.") # nyt = NYTAPI(NYTimesAPIkey) nyt = NYTAPI(st.secrets["NYTimesAPIkey"]) t0 = perf_counter() summarizer = initializeSummarizer() t1 = perf_counter() Δt01 = t1 - t0 print(f"Δt to initialize summarizer: {Δt01:5.2f}s", flush=True) # Now for the Streamlit interface: st.sidebar.title("About") st.sidebar.info( "This streamlit app uses the default HuggingFace summarization " "pipeline (Facebook's BART model) to summarize text from selected " "NY Times articles.\n\n"
from pynytimes import NYTAPI import datetime import random import time import os random_wait = random.randint(0, 60) time.sleep(random_wait) begin = datetime.datetime.now() API_KEY = os.environ["NewYorkTimesAPIKey"] nyt = NYTAPI(API_KEY) nyt.top_stories(section="science") nyt.most_viewed(days=30) time.sleep(5) nyt.most_shared(days=30, method="email") nyt.book_reviews(author="Michelle Obama") time.sleep(5) nyt.best_sellers_lists() nyt.best_sellers_list(date=datetime.datetime(2019, 1, 1), name="hardcover-fiction") time.sleep(5) nyt.movie_reviews(keyword="FBI", options={"order": "by-opening-date"}) nyt.article_metadata( url= "https://www.nytimes.com/2019/10/20/world/middleeast/erdogan-turkey-nuclear-weapons-trump.html" ) time.sleep(5)
import os from pynytimes import NYTAPI import pandas as pd import datetime import time from pprint import pprint # import api key key = os.getenv("api-key") # set up wrapper for API calls nyt = NYTAPI(key) # create list of dates for each month from 2015 - 2017 start_date = "2015-01-01" end_date = "2017-12-01" date_list = pd.date_range(start_date, end_date, freq="MS") # convert to python datetime for API calls dates = list(date_list.to_pydatetime()) # iterate over list of dates, append to list, convert to a dataframe article_list = [] for date in dates: print(f"Processing Date: {date}") results = nyt.archive_metadata(date = date)
def get_news(): key = "aJDq9vqaMll0JjrRpRDRWwQnwQwPKtzZ" nyt = NYTAPI(key) top_stories = nyt.top_stories() return top_stories[random.randint(1, 11)]["title"], "excited"
from cli import parse_args from pynytimes import NYTAPI mode, topic_url, articles_to_scrape, driver_path = parse_args() # For testing: If True only one article is scrapped DEMO = False DEMO_TOPIC = 'https://www.nytimes.com/section/world/africa' DEMO_ARTICLE_SCRAP = 1 # Database details HOST = 'localhost' DATABASE = 'nytimes' # your variables USER = '******' PASSWORD = '******' # Settings for the logger LOG = 'log_file.log' NYTname = 'scrapper' # API key for the NYT API nyt = NYTAPI("qeOAmrE6yGzowmzGiIpoK0ZBHOnyJ8BG", https=False)
def getAPI(): key = os.getenv("NYT") nyt = NYTAPI(str(key), parse_dates=True) return nyt
from datetime import date, datetime from pynytimes import NYTAPI # Make sure to set parse dates to True so that the dates # are parsed into datetime.datetime or datetime.date objects nyt = NYTAPI( key="Your API Key", # Get your API Key at https://developer.nytimes.com parse_dates=True, ) # Search articles about President Biden biden = nyt.article_search("biden") # You can optionally define the dates between which you want the articles to be biden_january = nyt.article_search(query="biden", dates={ "start": date(2021, 1, 1), "end": date(2021, 1, 31) }) # Optionally you can also define biden = nyt.article_search("biden", )