def test_sanitize_dates_int(self):
     start_int = 2018
     end_int = 2019
     expected_start = pd.to_datetime(dt.datetime(start_int, 1, 1))
     expected_end = pd.to_datetime(dt.datetime(end_int, 1, 1))
     assert _sanitize_dates(start_int,
                            end_int) == (expected_start, expected_end)
Exemplo n.º 2
0
    def __init__(
        self,
        symbols,
        start=None,
        end=None,
        retry_count=3,
        pause=0.1,
        timeout=30,
        session=None,
        freq=None,
    ):

        self.symbols = symbols

        start, end = _sanitize_dates(start or self.default_start_date, end)
        self.start = start
        self.end = end

        if not isinstance(retry_count, int) or retry_count < 0:
            raise ValueError("'retry_count' must be integer larger than 0")
        self.retry_count = retry_count
        self.pause = pause
        self.timeout = timeout
        self.pause_multiplier = 1
        self.session = _init_session(session)
        self.freq = freq
        self.headers = None
Exemplo n.º 3
0
def _get_data(name, start=dt.datetime(2010, 1, 1),
              end=dt.datetime.today()):
    """
    Get data for the given name from OECD.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    if not isinstance(name, compat.string_types):
        raise ValueError('data name must be string')

    # API: https://data.oecd.org/api/sdmx-json-documentation/
    url = '{0}/{1}/all/all?'.format(_URL, name)
    def fetch_data(url, name):
        resp = _urlopen(url)
        resp = resp.read()
        resp = resp.decode('utf-8')
        data = read_jsdmx(resp)
        try:
            idx_name = data.index.name # hack for pandas 0.16.2
            data.index = pd.to_datetime(data.index)
            data = data.sort_index()
            data = data.truncate(start, end)
            data.index.name = idx_name
        except ValueError:
            pass
        return data
    df = fetch_data(url, name)
    return df
Exemplo n.º 4
0
def _get_data(name, start=dt.datetime(2010, 1, 1), end=dt.datetime.today()):
    """
    Get data for the given name from OECD.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    if not isinstance(name, compat.string_types):
        raise ValueError("data name must be string")

    # API: https://data.oecd.org/api/sdmx-json-documentation/
    url = "{0}/{1}/all/all?".format(_URL, name)

    def fetch_data(url, name):
        resp = _urlopen(url)
        resp = resp.read()
        resp = resp.decode("utf-8")
        data = read_jsdmx(resp)
        try:
            idx_name = data.index.name  # hack for pandas 0.16.2
            data.index = pd.to_datetime(data.index)
            data = data.sort_index()
            data = data.truncate(start, end)
            data.index.name = idx_name
        except ValueError:
            pass
        return data

    df = fetch_data(url, name)
    return df
Exemplo n.º 5
0
def _get_data_one(sym, start, end, interval, retry_count, pause):
    """
    Get historical data for the given name from google.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
    params = {"q": sym, "startdate": start.strftime("%b %d, %Y"), "enddate": end.strftime("%b %d, %Y"), "output": "csv"}
    url = _encode_url(_URL, params)
    return _retry_read_url(url, retry_count, pause, "Google")
Exemplo n.º 6
0
    def __init__(self, symbols, start=None, end=None,
                 retry_count=3, pause=0.1, timeout=30, session=None):
        self.symbols = symbols

        start, end = _sanitize_dates(start, end)
        self.start = start
        self.end = end

        if not isinstance(retry_count, int) or retry_count < 0:
            raise ValueError("'retry_count' must be integer larger than 0")
        self.retry_count = retry_count
        self.pause = pause
        self.timeout = timeout
        self.session = _init_session(session, retry_count)
    def test_sanitize_invalid_dates(self):
        with pytest.raises(ValueError):
            _sanitize_dates(2019, 2018)

        with pytest.raises(ValueError):
            _sanitize_dates("2019-01-01", "2018-01-01")

        with pytest.raises(ValueError):
            _sanitize_dates("20199", None)
Exemplo n.º 8
0
def _get_data_one(sym, start, end, interval, retry_count, pause):
    """
    Get historical data for the given name from google.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
    params = {
        'q': sym,
        'startdate': start.strftime('%b %d, %Y'),
        'enddate': end.strftime('%b %d, %Y'),
        'output': "csv"
    }
    url = _encode_url(_URL, params)
    return _retry_read_url(url, retry_count, pause, 'Google')
Exemplo n.º 9
0
def _get_data_one(sym, start, end, interval, retry_count, pause):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)
    params = {
        's': sym,
        'a': start.month - 1,
        'b': start.day,
        'c': start.year,
        'd': end.month - 1,
        'e': end.day,
        'f': end.year,
        'g': interval,
        'ignore': '.csv'
    }
    url = _encode_url(_URL, params)
    return _retry_read_url(url, retry_count, pause, 'Yahoo!')
Exemplo n.º 10
0
def _get_data(name, start=dt.datetime(2010, 1, 1), end=dt.datetime.today()):
    """
    Get data for the given name from the St. Louis FED (FRED).
    Date format is datetime

    Returns a DataFrame.

    If multiple names are passed for "series" then the index of the
    DataFrame is the outer join of the indicies of each series.
    """
    start, end = _sanitize_dates(start, end)

    if not is_list_like(name):
        names = [name]
    else:
        names = name

    urls = [_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for n in names]

    def fetch_data(url, name):
        with urlopen(url) as resp:
            data = read_csv(resp,
                            index_col=0,
                            parse_dates=True,
                            header=None,
                            skiprows=1,
                            names=["DATE", name],
                            na_values='.')
        try:
            return data.truncate(start, end)
        except KeyError:  # pragma: no cover
            if data.ix[3].name[7:12] == 'Error':
                raise IOError("Failed to get the data. Check that {0!r} is "
                              "a valid FRED series.".format(name))
            raise

    df = concat([fetch_data(url, n) for url, n in zip(urls, names)],
                axis=1,
                join='outer')
    return df
Exemplo n.º 11
0
def _get_data(name, start=dt.datetime(2010, 1, 1),
                  end=dt.datetime.today()):
    """
    Get data for the given name from the St. Louis FED (FRED).
    Date format is datetime

    Returns a DataFrame.

    If multiple names are passed for "series" then the index of the
    DataFrame is the outer join of the indicies of each series.
    """
    start, end = _sanitize_dates(start, end)

    if not is_list_like(name):
        names = [name]
    else:
        names = name

    urls = [_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for
            n in names]

    def fetch_data(url, name):
        with urlopen(url) as resp:
            data = read_csv(resp, index_col=0, parse_dates=True,
                            header=None, skiprows=1, names=["DATE", name],
                            na_values='.')
        try:
            return data.truncate(start, end)
        except KeyError: # pragma: no cover
            if data.ix[3].name[7:12] == 'Error':
                raise IOError("Failed to get the data. Check that {0!r} is "
                              "a valid FRED series.".format(name))
            raise
    df = concat([fetch_data(url, n) for url, n in zip(urls, names)],
                axis=1, join='outer')
    return df
 def test_sanitize_dates_defaults(self):
     default_start = pd.to_datetime(dt.date.today() -
                                    dt.timedelta(days=365 * 5))
     default_end = pd.to_datetime(dt.date.today())
     assert _sanitize_dates(None, None) == (default_start, default_end)
 def test_sanitize_dates(self, input_date):
     expected_start = pd.to_datetime(input_date)
     expected_end = pd.to_datetime(dt.date.today())
     result = _sanitize_dates(input_date, None)
     assert result == (expected_start, expected_end)
Exemplo n.º 14
0
def DataReader(name, data_source=None, start=None, end=None,
               retry_count=3, pause=0.001):
    """
    Imports data from a number of online sources.

    Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED)
    and Kenneth French's data library.

    Parameters
    ----------
    name : str or list of strs
        the name of the dataset. Some data sources (yahoo, google, fred) will
        accept a list of names.
    data_source: {str, None}
        the data source ("yahoo", "yahoo-actions", "google", "fred", or "ff")
    start : {datetime, None}
        left boundary for range (defaults to 1/1/2010)
    end : {datetime, None}
        right boundary for range (defaults to today)
    retry_count : {int, 3}
        Number of times to retry query request.
    pause : {numeric, 0.001}
        Time, in seconds, to pause between consecutive queries of chunks. If
        single value given for symbol, represents the pause between retries.

    Examples
    ----------

    # Data from Yahoo! Finance
    gs = DataReader("GS", "yahoo")

    # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance
    gs = DataReader("GS", "yahoo-actions")

    # Data from Google Finance
    aapl = DataReader("AAPL", "google")

    # Data from FRED
    vix = DataReader("VIXCLS", "fred")

    # Data from Fama/French
    ff = DataReader("F-F_Research_Data_Factors", "famafrench")
    ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench")
    ff = DataReader("6_Portfolios_2x3", "famafrench")
    ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")
    """
    start, end = _sanitize_dates(start, end)

    if data_source == "yahoo":
        return get_data_yahoo(symbols=name, start=start, end=end,
                              adjust_price=False, chunksize=25,
                              retry_count=retry_count, pause=pause)
    elif data_source == "yahoo-actions":
        return get_data_yahoo_actions(symbol=name, start=start, end=end,
                                      retry_count=retry_count, pause=pause)
    elif data_source == "google":
        return get_data_google(symbols=name, start=start, end=end,
                               chunksize=25, retry_count=retry_count, pause=pause)
    elif data_source == "fred":
        return get_data_fred(name, start, end)
    elif data_source == "famafrench":
        return get_data_famafrench(name)
    elif data_source == "oecd":
        return get_data_oecd(name, start, end)
    else:
        raise NotImplementedError(
                "data_source=%r is not implemented" % data_source)
Exemplo n.º 15
0
def _get_data(symbol, start=None, end=None, retry_count=3, pause=0.001):
    """
    Returns DataFrame of historical corporate actions (dividends and stock
    splits) from symbols, over date range, start to end. All dates in the
    resulting DataFrame correspond with dividend and stock split ex-dates.

    Parameters
    ----------
        sym : string with a single Single stock symbol (ticker).
        start : string, (defaults to '1/1/2010')
                Starting date, timestamp. Parses many different kind of date
                representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
        end : string, (defaults to today)
                Ending date, timestamp. Same format as starting date.
        retry_count : int, default 3
                Number of times to retry query request.
        pause : int, default 0
                Time, in seconds, of the pause between retries.
    """

    start, end = _sanitize_dates(start, end)
    params = {
        's': symbol,
        'a': start.month - 1,
        'b': start.day,
        'c': start.year,
        'd': end.month - 1,
        'e': end.day,
        'f': end.year,
        'g': 'v'
    }
    url = _encode_url(_URL, params)

    for _ in range(retry_count):

        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            actions_index = []
            actions_entries = []

            for line in csv.reader(StringIO(bytes_to_str(lines))):
                # Ignore lines that aren't dividends or splits (Yahoo
                # add a bunch of irrelevant fields.)
                if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'):
                    continue

                action, date, value = line
                if action == 'DIVIDEND':
                    actions_index.append(to_datetime(date))
                    actions_entries.append({
                        'action': action,
                        'value': float(value)
                    })
                elif action == 'SPLIT' and ':' in value:
                    # Convert the split ratio to a fraction. For example a
                    # 4:1 split expressed as a fraction is 1/4 = 0.25.
                    denominator, numerator = value.split(':', 1)
                    split_fraction = float(numerator) / float(denominator)

                    actions_index.append(to_datetime(date))
                    actions_entries.append({
                        'action': action,
                        'value': split_fraction
                    })

            return DataFrame(actions_entries, index=actions_index)

        time.sleep(pause)

    raise IOError("after %d tries, Yahoo! did not " \
                                "return a 200 for url %r" % (retry_count, url))
Exemplo n.º 16
0
def DataReader(name,
               data_source=None,
               start=None,
               end=None,
               retry_count=3,
               pause=0.001):
    """
    Imports data from a number of online sources.

    Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED)
    and Kenneth French's data library.

    Parameters
    ----------
    name : str or list of strs
        the name of the dataset. Some data sources (yahoo, google, fred) will
        accept a list of names.
    data_source: {str, None}
        the data source ("yahoo", "yahoo-actions", "google", "fred", or "ff")
    start : {datetime, None}
        left boundary for range (defaults to 1/1/2010)
    end : {datetime, None}
        right boundary for range (defaults to today)
    retry_count : {int, 3}
        Number of times to retry query request.
    pause : {numeric, 0.001}
        Time, in seconds, to pause between consecutive queries of chunks. If
        single value given for symbol, represents the pause between retries.

    Examples
    ----------

    # Data from Yahoo! Finance
    gs = DataReader("GS", "yahoo")

    # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance
    gs = DataReader("GS", "yahoo-actions")

    # Data from Google Finance
    aapl = DataReader("AAPL", "google")

    # Data from FRED
    vix = DataReader("VIXCLS", "fred")

    # Data from Fama/French
    ff = DataReader("F-F_Research_Data_Factors", "famafrench")
    ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench")
    ff = DataReader("6_Portfolios_2x3", "famafrench")
    ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")
    """
    start, end = _sanitize_dates(start, end)

    if data_source == "yahoo":
        return get_data_yahoo(symbols=name,
                              start=start,
                              end=end,
                              adjust_price=False,
                              chunksize=25,
                              retry_count=retry_count,
                              pause=pause)
    elif data_source == "yahoo-actions":
        return get_data_yahoo_actions(symbol=name,
                                      start=start,
                                      end=end,
                                      retry_count=retry_count,
                                      pause=pause)
    elif data_source == "google":
        return get_data_google(symbols=name,
                               start=start,
                               end=end,
                               chunksize=25,
                               retry_count=retry_count,
                               pause=pause)
    elif data_source == "fred":
        return get_data_fred(name, start, end)
    elif data_source == "famafrench":
        return get_data_famafrench(name)
    elif data_source == "oecd":
        return get_data_oecd(name, start, end)
    else:
        raise NotImplementedError("data_source=%r is not implemented" %
                                  data_source)