Пример #1
0
def get_data_for_multiple_stocks(tickers, start_date, end_date):
    '''
    tickers: list of tickers to get data for
    start_date, end_date: dt.datetime objects
    method returns a dictionary b{ticker: pd.DataFrame}
    '''
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = end_date.strftime('%Y-%m-%d')
    stocks = dict()
    # loop through all the tickers
    for i, ticker in enumerate(tickers):
        if i % 5 == 0:
            print(f'{i}/{len(tickers)}')

        try:
            # get the data for the specific ticker
            s = DataReader(ticker, 'yahoo', start_date_str, end_date_str)

            s.insert(0, "Ticker", ticker)

            s['Prev Close'] = s['Adj Close'].shift(1)
            s['daily_return'] = (s['Adj Close'] / s['Prev Close']) - 1
            s['log_return'] = np.log(s['Adj Close'] / s['Prev Close'])
            # s['perc_return'] = (s['Adj Close']/s['Prev Close'])
            # add it to the dictionary
            stocks[ticker] = s
        except:
            print(f'something went wrong with {ticker}')
            continue

    # return the dictionary
    return stocks
def get_data_for_multiple_stocks(tickers, start_date, end_date):
    '''
    Obtain stocks information (Date, OHLC, Volume and Adjusted Close). 
    Uses Pandas DataReader to make an API Call to Yahoo Finance and download the data directly.
    Computes other values - Log Return and Arithmetic Return.
    
    Args: 
      tickers: List of Stock Tickers
      start_date: Start Date of the stock data
      end_date: End Date of the stock data
    Returns:
      A dictionary of dataframes for each stock
    '''
    stocks = dict()
    for ticker in tickers:
        s = DataReader(ticker, 'yahoo', start_date, end_date)
        s.insert(
            0, "Ticker",
            ticker)  #insert ticker column so you can reference better later
        s['Date'] = pd.to_datetime(s.index)  #useful for transformation later
        s['Prev Adj Close'] = s['Adj Close'].shift(1)
        s['Log Return'] = np.log(s['Adj Close'] / s['Prev Adj Close'])
        s['Return'] = (s['Adj Close'] / s['Prev Adj Close'] - 1)
        s = s.reset_index(drop=True)

        cols = list(s.columns.values)  # re-arrange columns
        cols.remove("Date")
        s = s[["Date"] + cols]
        s["Date"] = pd.to_datetime(s["Date"])
        s = s.set_index("Date")

        stocks[ticker] = s

    return stocks
Пример #3
0
#companies = pdconn.read_table('companies')
#cursor = pdconn.execute('select ticker from company c left join industry i on c.industry = i.industry where sector = "Healthcare"')
cursor = pdconn.execute('select ticker from company')
companies = cursor.fetchall()

start_date = '1985-01-01'
#start_date = '2016-01-01'
end_date = '2016-05-10'

#companies is a list of tuples
for co in companies:
    co = co[0]
    if co in done: continue
    done.append(co)
    print("Fetching data for " + co)
    #fetch data from yahoo as pandas dataframe
    try:
        ts = DataReader(co, 'yahoo', start=start_date, end=end_date)
    except:
        print('Could not read data for ' + co)
        continue
    ts.rename(columns={'Adj Close': 'AdjClose'}, inplace=True)
    # AdjClose values sometimes get extremely large, causing out of bounds errors.  Limit those values here.
    if max(ts['AdjClose']) > 9.99e5:
        ts.loc[ts.AdjClose > 9.99e5, 'AdjClose'] = 9.99e5
    ts.insert(0, 'ticker', co)  #add ticker to dataframe
    ts = ts.round(decimals=2)
    pdconn.to_sql(ts, 'histPrice', if_exists='append')
    time.sleep(random.randint(1, 5))