def get_data_for_multiple_stocks(tickers, start_date, end_date): ''' tickers: list of tickers to get data for start_date, end_date: dt.datetime objects method returns a dictionary b{ticker: pd.DataFrame} ''' start_date_str = start_date.strftime('%Y-%m-%d') end_date_str = end_date.strftime('%Y-%m-%d') stocks = dict() # loop through all the tickers for i, ticker in enumerate(tickers): if i % 5 == 0: print(f'{i}/{len(tickers)}') try: # get the data for the specific ticker s = DataReader(ticker, 'yahoo', start_date_str, end_date_str) s.insert(0, "Ticker", ticker) s['Prev Close'] = s['Adj Close'].shift(1) s['daily_return'] = (s['Adj Close'] / s['Prev Close']) - 1 s['log_return'] = np.log(s['Adj Close'] / s['Prev Close']) # s['perc_return'] = (s['Adj Close']/s['Prev Close']) # add it to the dictionary stocks[ticker] = s except: print(f'something went wrong with {ticker}') continue # return the dictionary return stocks
def get_data_for_multiple_stocks(tickers, start_date, end_date): ''' Obtain stocks information (Date, OHLC, Volume and Adjusted Close). Uses Pandas DataReader to make an API Call to Yahoo Finance and download the data directly. Computes other values - Log Return and Arithmetic Return. Args: tickers: List of Stock Tickers start_date: Start Date of the stock data end_date: End Date of the stock data Returns: A dictionary of dataframes for each stock ''' stocks = dict() for ticker in tickers: s = DataReader(ticker, 'yahoo', start_date, end_date) s.insert( 0, "Ticker", ticker) #insert ticker column so you can reference better later s['Date'] = pd.to_datetime(s.index) #useful for transformation later s['Prev Adj Close'] = s['Adj Close'].shift(1) s['Log Return'] = np.log(s['Adj Close'] / s['Prev Adj Close']) s['Return'] = (s['Adj Close'] / s['Prev Adj Close'] - 1) s = s.reset_index(drop=True) cols = list(s.columns.values) # re-arrange columns cols.remove("Date") s = s[["Date"] + cols] s["Date"] = pd.to_datetime(s["Date"]) s = s.set_index("Date") stocks[ticker] = s return stocks
#companies = pdconn.read_table('companies') #cursor = pdconn.execute('select ticker from company c left join industry i on c.industry = i.industry where sector = "Healthcare"') cursor = pdconn.execute('select ticker from company') companies = cursor.fetchall() start_date = '1985-01-01' #start_date = '2016-01-01' end_date = '2016-05-10' #companies is a list of tuples for co in companies: co = co[0] if co in done: continue done.append(co) print("Fetching data for " + co) #fetch data from yahoo as pandas dataframe try: ts = DataReader(co, 'yahoo', start=start_date, end=end_date) except: print('Could not read data for ' + co) continue ts.rename(columns={'Adj Close': 'AdjClose'}, inplace=True) # AdjClose values sometimes get extremely large, causing out of bounds errors. Limit those values here. if max(ts['AdjClose']) > 9.99e5: ts.loc[ts.AdjClose > 9.99e5, 'AdjClose'] = 9.99e5 ts.insert(0, 'ticker', co) #add ticker to dataframe ts = ts.round(decimals=2) pdconn.to_sql(ts, 'histPrice', if_exists='append') time.sleep(random.randint(1, 5))