def DataReader(name, data_source=None, start=None, end=None, retry_count=3, pause=0.001): """ Imports data from a number of online sources. Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED) and Kenneth French's data library. Parameters ---------- name : str or list of strs the name of the dataset. Some data sources (yahoo, google, fred) will accept a list of names. data_source: str the data source ("yahoo", "yahoo-actions", "google", "fred", or "ff") start : {datetime, None} left boundary for range (defaults to 1/1/2010) end : {datetime, None} right boundary for range (defaults to today) Examples ---------- # Data from Yahoo! Finance gs = DataReader("GS", "yahoo") # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance gs = DataReader("GS", "yahoo-actions") # Data from Google Finance aapl = DataReader("AAPL", "google") # Data from FRED vix = DataReader("VIXCLS", "fred") # Data from Fama/French ff = DataReader("F-F_Research_Data_Factors", "famafrench") ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench") ff = DataReader("6_Portfolios_2x3", "famafrench") ff = DataReader("F-F_ST_Reversal_Factor", "famafrench") """ start, end = _sanitize_dates(start, end) if data_source == "yahoo": return get_data_yahoo(symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause) elif data_source == "yahoo-actions": return get_data_yahoo_actions(symbol=name, start=start, end=end, retry_count=retry_count, pause=pause) elif data_source == "google": return get_data_google(symbols=name, start=start, end=end, chunksize=25, retry_count=retry_count, pause=pause) elif data_source == "fred": return get_data_fred(name, start, end) elif data_source == "famafrench": return get_data_famafrench(name) else: raise NotImplementedError( "data_source=%r is not implemented" % data_source)
def _get_data_one(sym, start, end, interval, retry_count, pause): """ Get historical data for the given name from google. Date format is datetime Returns a DataFrame. """ start, end = _sanitize_dates(start, end) # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv url = "%s%s" % (_URL, urlencode({"q": sym, "startdate": start.strftime('%b %d, ' '%Y'), "enddate": end.strftime('%b %d, %Y'), "output": "csv"})) return _retry_read_url(url, retry_count, pause, 'Google')
def _get_data_one(sym, start, end, interval, retry_count, pause): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ start, end = _sanitize_dates(start, end) url = (_URL + 's=%s' % sym + '&a=%s' % (start.month - 1) + '&b=%s' % start.day + '&c=%s' % start.year + '&d=%s' % (end.month - 1) + '&e=%s' % end.day + '&f=%s' % end.year + '&g=%s' % interval + '&ignore=.csv') return _retry_read_url(url, retry_count, pause, 'Yahoo!')
def _get_data_one(sym, start, end, interval, retry_count, pause): """ Get historical data for the given name from yahoo. Date format is datetime Returns a DataFrame. """ start, end = _sanitize_dates(start, end) params = { 's': sym, 'a': start.month - 1, 'b': start.day, 'c': start.year, 'd': end.month - 1, 'e': end.day, 'f': end.year, 'g': interval, 'ignore': '.csv' } url = _encode_url(_URL, params) return _retry_read_url(url, retry_count, pause, 'Yahoo!')
def _get_data(name, start=dt.datetime(2010, 1, 1), end=dt.datetime.today()): """ Get data for the given name from the St. Louis FED (FRED). Date format is datetime Returns a DataFrame. If multiple names are passed for "series" then the index of the DataFrame is the outer join of the indicies of each series. """ start, end = _sanitize_dates(start, end) if not is_list_like(name): names = [name] else: names = name urls = [_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for n in names] def fetch_data(url, name): with urlopen(url) as resp: data = read_csv(resp, index_col=0, parse_dates=True, header=None, skiprows=1, names=["DATE", name], na_values='.') try: return data.truncate(start, end) except KeyError: if data.ix[3].name[7:12] == 'Error': raise IOError("Failed to get the data. Check that {0!r} is " "a valid FRED series.".format(name)) raise df = concat([fetch_data(url, n) for url, n in zip(urls, names)], axis=1, join='outer') return df
def _get_data(symbol, start=None, end=None, retry_count=3, pause=0.001): """ Returns DataFrame of historical corporate actions (dividends and stock splits) from symbols, over date range, start to end. All dates in the resulting DataFrame correspond with dividend and stock split ex-dates. Parameters ---------- sym : string with a single Single stock symbol (ticker). start : string, (defaults to '1/1/2010') Starting date, timestamp. Parses many different kind of date representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') end : string, (defaults to today) Ending date, timestamp. Same format as starting date. retry_count : int, default 3 Number of times to retry query request. pause : int, default 0 Time, in seconds, of the pause between retries. """ start, end = _sanitize_dates(start, end) url = (_URL + 's=%s' % symbol + \ '&a=%s' % (start.month - 1) + \ '&b=%s' % start.day + \ '&c=%s' % start.year + \ '&d=%s' % (end.month - 1) + \ '&e=%s' % end.day + \ '&f=%s' % end.year + \ '&g=v') for _ in range(retry_count): time.sleep(pause) try: with urlopen(url) as resp: lines = resp.read() except _network_error_classes: pass else: actions_index = [] actions_entries = [] for line in csv.reader(StringIO(bytes_to_str(lines))): # Ignore lines that aren't dividends or splits (Yahoo # add a bunch of irrelevant fields.) if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'): continue action, date, value = line if action == 'DIVIDEND': actions_index.append(to_datetime(date)) actions_entries.append({ 'action': action, 'value': float(value) }) elif action == 'SPLIT' and ':' in value: # Convert the split ratio to a fraction. For example a # 4:1 split expressed as a fraction is 1/4 = 0.25. denominator, numerator = value.split(':', 1) split_fraction = float(numerator) / float(denominator) actions_index.append(to_datetime(date)) actions_entries.append({ 'action': action, 'value': split_fraction }) return DataFrame(actions_entries, index=actions_index) raise IOError("after %d tries, Yahoo! did not " \ "return a 200 for url %r" % (retry_count, url))