示例#1
0
def DataReader(name,
               data_source=None,
               start=None,
               end=None,
               retry_count=3,
               pause=0.001,
               session=None):
    """
    Imports data from a number of online sources.

    Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED),
    Kenneth French's data library, and the SEC's EDGAR Index.

    Parameters
    ----------
    name : str or list of strs
        the name of the dataset. Some data sources (yahoo, google, fred) will
        accept a list of names.
    data_source: {str, None}
        the data source ("yahoo", "yahoo-actions", "yahoo-dividends",
        "google", "fred", "ff", or "edgar-index")
    start : {datetime, None}
        left boundary for range (defaults to 1/1/2010)
    end : {datetime, None}
        right boundary for range (defaults to today)
    retry_count : {int, 3}
        Number of times to retry query request.
    pause : {numeric, 0.001}
        Time, in seconds, to pause between consecutive queries of chunks. If
        single value given for symbol, represents the pause between retries.
    session : Session, default None
            requests.sessions.Session instance to be used

    Examples
    ----------

    # Data from Yahoo! Finance
    gs = DataReader("GS", "yahoo")

    # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance
    gs = DataReader("GS", "yahoo-actions")

    # Data from Google Finance
    aapl = DataReader("AAPL", "google")

    # Data from FRED
    vix = DataReader("VIXCLS", "fred")

    # Data from Fama/French
    ff = DataReader("F-F_Research_Data_Factors", "famafrench")
    ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench")
    ff = DataReader("6_Portfolios_2x3", "famafrench")
    ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")

    # Data from EDGAR index
    ed = DataReader("full", "edgar-index")
    ed2 = DataReader("daily", "edgar-index")
    """
    if data_source == "yahoo":
        return YahooDailyReader(symbols=name,
                                start=start,
                                end=end,
                                adjust_price=False,
                                chunksize=25,
                                retry_count=retry_count,
                                pause=pause,
                                session=session).read()

    elif data_source == "yahoo-actions":
        return YahooActionReader(symbols=name,
                                 start=start,
                                 end=end,
                                 retry_count=retry_count,
                                 pause=pause,
                                 session=session).read()
    elif data_source == "yahoo-dividends":
        return YahooDailyReader(symbols=name,
                                start=start,
                                end=end,
                                adjust_price=False,
                                chunksize=25,
                                retry_count=retry_count,
                                pause=pause,
                                session=session,
                                interval='v').read()

    elif data_source == "google":
        return GoogleDailyReader(symbols=name,
                                 start=start,
                                 end=end,
                                 chunksize=25,
                                 retry_count=retry_count,
                                 pause=pause,
                                 session=session).read()

    elif data_source == "fred":
        return FredReader(symbols=name,
                          start=start,
                          end=end,
                          retry_count=retry_count,
                          pause=pause,
                          session=session).read()

    elif data_source == "famafrench":
        return FamaFrenchReader(symbols=name,
                                start=start,
                                end=end,
                                retry_count=retry_count,
                                pause=pause,
                                session=session).read()

    elif data_source == "oecd":
        return OECDReader(symbols=name,
                          start=start,
                          end=end,
                          retry_count=retry_count,
                          pause=pause,
                          session=session).read()
    elif data_source == "eurostat":
        return EurostatReader(symbols=name,
                              start=start,
                              end=end,
                              retry_count=retry_count,
                              pause=pause,
                              session=session).read()
    elif data_source == "edgar-index":
        return EdgarIndexReader(symbols=name,
                                start=start,
                                end=end,
                                retry_count=retry_count,
                                pause=pause,
                                session=session).read()
    elif data_source == "oanda":
        return get_oanda_currency_historical_rates(start,
                                                   end,
                                                   quote_currency="USD",
                                                   base_currency=name,
                                                   reversed=True,
                                                   session=session)
    else:
        msg = "data_source=%r is not implemented" % data_source
        raise NotImplementedError(msg)
示例#2
0
def get_data_famafrench(*args, **kwargs):
    return FamaFrenchReader(*args, **kwargs).read()
示例#3
0
def DataReader(name,
               data_source=None,
               start=None,
               end=None,
               retry_count=3,
               pause=0.001,
               session=None,
               access_key=None):
    """
    Imports data from a number of online sources.

    Currently supports Google Finance, St. Louis FED (FRED),
    and Kenneth French's data library, among others.

    Parameters
    ----------
    name : str or list of strs
        the name of the dataset. Some data sources (google, fred) will
        accept a list of names.
    data_source: {str, None}
        the data source ("google", "fred", "ff")
    start : {datetime, None}
        left boundary for range (defaults to 1/1/2010)
    end : {datetime, None}
        right boundary for range (defaults to today)
    retry_count : {int, 3}
        Number of times to retry query request.
    pause : {numeric, 0.001}
        Time, in seconds, to pause between consecutive queries of chunks. If
        single value given for symbol, represents the pause between retries.
    session : Session, default None
        requests.sessions.Session instance to be used
    access_key : (str, None)
        Optional parameter to specify an API key for certain data sources.

    Examples
    ----------
    # Data from Google Finance
    aapl = DataReader("AAPL", "google")

    # Price and volume data from IEX
    tops = DataReader(["GS", "AAPL"], "iex-tops")
    # Top of book executions from IEX
    gs = DataReader("GS", "iex-last")
    # Real-time depth of book data from IEX
    gs = DataReader("GS", "iex-book")

    # Data from FRED
    vix = DataReader("VIXCLS", "fred")

    # Data from Fama/French
    ff = DataReader("F-F_Research_Data_Factors", "famafrench")
    ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench")
    ff = DataReader("6_Portfolios_2x3", "famafrench")
    ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")
    """
    if data_source == "yahoo":
        raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Daily'))
        return YahooDailyReader(symbols=name,
                                start=start,
                                end=end,
                                adjust_price=False,
                                chunksize=25,
                                retry_count=retry_count,
                                pause=pause,
                                session=session).read()

    elif data_source == "yahoo-actions":
        raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Actions'))
        return YahooActionReader(symbols=name,
                                 start=start,
                                 end=end,
                                 retry_count=retry_count,
                                 pause=pause,
                                 session=session).read()

    elif data_source == "yahoo-dividends":
        comp = 'Yahoo Dividends'
        raise ImmediateDeprecationError(DEP_ERROR_MSG.format(comp))
        return YahooDivReader(symbols=name,
                              start=start,
                              end=end,
                              adjust_price=False,
                              chunksize=25,
                              retry_count=retry_count,
                              pause=pause,
                              session=session,
                              interval='d').read()

    elif data_source == "google":
        return GoogleDailyReader(symbols=name,
                                 start=start,
                                 end=end,
                                 chunksize=25,
                                 retry_count=retry_count,
                                 pause=pause,
                                 session=session).read()

    elif data_source == "iex":
        return IEXDailyReader(symbols=name,
                              start=start,
                              end=end,
                              chunksize=25,
                              retry_count=retry_count,
                              pause=pause,
                              session=session).read()

    elif data_source == "iex-tops":
        return IEXTops(symbols=name,
                       start=start,
                       end=end,
                       retry_count=retry_count,
                       pause=pause,
                       session=session).read()

    elif data_source == "iex-last":
        return IEXLasts(symbols=name,
                        start=start,
                        end=end,
                        retry_count=retry_count,
                        pause=pause,
                        session=session).read()

    elif data_source == "bankofcanada":
        return BankOfCanadaReader(symbols=name,
                                  start=start,
                                  end=end,
                                  retry_count=retry_count,
                                  pause=pause,
                                  session=session).read()
    elif data_source == "stooq":
        return StooqDailyReader(symbols=name,
                                chunksize=25,
                                retry_count=retry_count,
                                pause=pause,
                                session=session).read()

    elif data_source == "iex-book":
        return IEXDeep(symbols=name,
                       service="book",
                       start=start,
                       end=end,
                       retry_count=retry_count,
                       pause=pause,
                       session=session).read()

    elif data_source == "enigma":
        return EnigmaReader(dataset_id=name, api_key=access_key).read()

    elif data_source == "fred":
        return FredReader(symbols=name,
                          start=start,
                          end=end,
                          retry_count=retry_count,
                          pause=pause,
                          session=session).read()

    elif data_source == "famafrench":
        return FamaFrenchReader(symbols=name,
                                start=start,
                                end=end,
                                retry_count=retry_count,
                                pause=pause,
                                session=session).read()

    elif data_source == "oecd":
        return OECDReader(symbols=name,
                          start=start,
                          end=end,
                          retry_count=retry_count,
                          pause=pause,
                          session=session).read()
    elif data_source == "eurostat":
        return EurostatReader(symbols=name,
                              start=start,
                              end=end,
                              retry_count=retry_count,
                              pause=pause,
                              session=session).read()
    elif data_source == "edgar-index":
        raise ImmediateDeprecationError(DEP_ERROR_MSG.format('EDGAR'))
        return EdgarIndexReader(symbols=name,
                                start=start,
                                end=end,
                                retry_count=retry_count,
                                pause=pause,
                                session=session).read()
    elif data_source == 'nasdaq':
        if name != 'symbols':
            raise ValueError("Only the string 'symbols' is supported for "
                             "Nasdaq, not %r" % (name, ))
        return get_nasdaq_symbols(retry_count=retry_count, pause=pause)

    elif data_source == "quandl":
        return QuandlReader(symbols=name,
                            start=start,
                            end=end,
                            retry_count=retry_count,
                            pause=pause,
                            session=session).read()
    elif data_source == "moex":
        return MoexReader(symbols=name,
                          start=start,
                          end=end,
                          retry_count=retry_count,
                          pause=pause,
                          session=session).read()
    elif data_source == "morningstar":
        return MorningstarDailyReader(symbols=name,
                                      start=start,
                                      end=end,
                                      retry_count=retry_count,
                                      pause=pause,
                                      session=session,
                                      interval="d").read()
    elif data_source == 'robinhood':
        return RobinhoodHistoricalReader(symbols=name,
                                         start=start,
                                         end=end,
                                         retry_count=retry_count,
                                         pause=pause,
                                         session=session).read()
    elif data_source == 'tiingo':
        return TiingoDailyReader(symbols=name,
                                 start=start,
                                 end=end,
                                 retry_count=retry_count,
                                 pause=pause,
                                 session=session,
                                 api_key=access_key).read()
    else:
        msg = "data_source=%r is not implemented" % data_source
        raise NotImplementedError(msg)
示例#4
0
def DataReader(
    name,
    data_source=None,
    start=None,
    end=None,
    retry_count=3,
    pause=0.1,
    session=None,
    api_key=None,
):
    """
    Imports data from a number of online sources.

    Currently supports Google Finance, St. Louis FED (FRED),
    and Kenneth French's data library, among others.

    Parameters
    ----------
    name : str or list of strs
        the name of the dataset. Some data sources (IEX, fred) will
        accept a list of names.
    data_source: {str, None}
        the data source ("iex", "fred", "ff")
    start : {datetime, None}
        left boundary for range (defaults to 1/1/2010)
    end : {datetime, None}
        right boundary for range (defaults to today)
    retry_count : {int, 3}
        Number of times to retry query request.
    pause : {numeric, 0.001}
        Time, in seconds, to pause between consecutive queries of chunks. If
        single value given for symbol, represents the pause between retries.
    session : Session, default None
        requests.sessions.Session instance to be used
    api_key : (str, None)
        Optional parameter to specify an API key for certain data sources.

    Examples
    ----------
    # Data from Google Finance
    aapl = DataReader("AAPL", "iex")

    # Price and volume data from IEX
    tops = DataReader(["GS", "AAPL"], "iex-tops")
    # Top of book executions from IEX
    gs = DataReader("GS", "iex-last")
    # Real-time depth of book data from IEX
    gs = DataReader("GS", "iex-book")

    # Data from FRED
    vix = DataReader("VIXCLS", "fred")

    # Data from Fama/French
    ff = DataReader("F-F_Research_Data_Factors", "famafrench")
    ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench")
    ff = DataReader("6_Portfolios_2x3", "famafrench")
    ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")
    """
    expected_source = [
        "yahoo",
        "iex",
        "iex-tops",
        "iex-last",
        "iex-last",
        "bankofcanada",
        "stooq",
        "iex-book",
        "enigma",
        "fred",
        "famafrench",
        "oecd",
        "eurostat",
        "nasdaq",
        "quandl",
        "moex",
        "robinhood",
        "tiingo",
        "yahoo-actions",
        "yahoo-dividends",
        "av-forex",
        "av-daily",
        "av-daily-adjusted",
        "av-weekly",
        "av-weekly-adjusted",
        "av-monthly",
        "av-monthly-adjusted",
        "av-intraday",
        "econdb",
    ]

    if data_source not in expected_source:
        msg = "data_source=%r is not implemented" % data_source
        raise NotImplementedError(msg)

    if data_source == "yahoo":
        return YahooDailyReader(
            symbols=name,
            start=start,
            end=end,
            adjust_price=False,
            chunksize=25,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "iex":
        return IEXDailyReader(
            symbols=name,
            start=start,
            end=end,
            chunksize=25,
            api_key=api_key,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "iex-tops":
        return IEXTops(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "iex-last":
        return IEXLasts(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "bankofcanada":
        return BankOfCanadaReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()
    elif data_source == "stooq":
        return StooqDailyReader(
            symbols=name,
            chunksize=25,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "iex-book":
        return IEXDeep(
            symbols=name,
            service="book",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "enigma":
        return EnigmaReader(dataset_id=name, api_key=api_key).read()

    elif data_source == "fred":
        return FredReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "famafrench":
        return FamaFrenchReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "oecd":
        return OECDReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()
    elif data_source == "eurostat":
        return EurostatReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()
    elif data_source == "nasdaq":
        if name != "symbols":
            raise ValueError("Only the string 'symbols' is supported for "
                             "Nasdaq, not %r" % (name, ))
        return get_nasdaq_symbols(retry_count=retry_count, pause=pause)

    elif data_source == "quandl":
        return QuandlReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()
    elif data_source == "moex":
        return MoexReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()
    elif data_source == "robinhood":
        return RobinhoodHistoricalReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()
    elif data_source == "tiingo":
        return TiingoDailyReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "yahoo-actions":
        return YahooActionReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    elif data_source == "yahoo-dividends":
        return YahooDivReader(
            symbols=name,
            start=start,
            end=end,
            adjust_price=False,
            chunksize=25,
            retry_count=retry_count,
            pause=pause,
            session=session,
            interval="d",
        ).read()

    elif data_source == "av-forex":
        return AVForexReader(
            symbols=name,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "av-daily":
        return AVTimeSeriesReader(
            symbols=name,
            function="TIME_SERIES_DAILY",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "av-daily-adjusted":
        return AVTimeSeriesReader(
            symbols=name,
            function="TIME_SERIES_DAILY_ADJUSTED",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "av-weekly":
        return AVTimeSeriesReader(
            symbols=name,
            function="TIME_SERIES_WEEKLY",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "av-weekly-adjusted":
        return AVTimeSeriesReader(
            symbols=name,
            function="TIME_SERIES_WEEKLY_ADJUSTED",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "av-monthly":
        return AVTimeSeriesReader(
            symbols=name,
            function="TIME_SERIES_MONTHLY",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "av-monthly-adjusted":
        return AVTimeSeriesReader(
            symbols=name,
            function="TIME_SERIES_MONTHLY_ADJUSTED",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "av-intraday":
        return AVTimeSeriesReader(
            symbols=name,
            function="TIME_SERIES_INTRADAY",
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
            api_key=api_key,
        ).read()

    elif data_source == "econdb":
        return EcondbReader(
            symbols=name,
            start=start,
            end=end,
            retry_count=retry_count,
            pause=pause,
            session=session,
        ).read()

    else:
        msg = "data_source=%r is not implemented" % data_source
        raise NotImplementedError(msg)
    """Helper to compute least square coefs, supports groupby().apply"""
    X = data[x].to_numpy()
    Y = data[y].to_numpy()
    X = np.hstack([np.ones((X.shape[0], 1)), X])
    x = ['Intercept'] + x
    b = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y)).T
    if stdres:
        b = np.hstack([b, np.std(Y - (X @ b.T), axis=0).reshape(-1, 1)])
        x = x + ['stdres']
    return (
        DataFrame(b, columns=x, index=y) if len(b) > 1 else Series(b[0], x)
    )  # return as Series for groupby.apply


## Retrieve market and test asset returns
mkt = FamaFrenchReader('F-F_Research_Data_Factors', start=1900,
                       end=2099).read()
mkt = mkt[0].rename(columns={'Mkt-RF': 'BETA'})
#mkt.index = mkt.index.strftime('%Y%m').astype(int)
asset_names = [
    '25_Portfolios_ME_BETA_5x5', '25_Portfolios_ME_VAR_5x5',
    '25_Portfolios_ME_RESVAR_5x5', '25_Portfolios_5x5'
]
test_assets = {
    asset: FamaFrenchReader(asset, start=1900, end=2099).read()
    for asset in asset_names
}
#for a in test_assets.values():
#    for p in range(2):
#        a[p].index = a[p].index.strftime('%Y%m').astype(int)

## Fama-MacBeth regressions with 5x5 test assets and estimated loadings