示例#1
0
def run_pipeline(pipeline, start_date, end_date=None, bundle=None):
    """
    Compute values for pipeline from start_date to end_date, using the specified
    bundle or the default bundle.

    Parameters
    ----------
    pipeline : Pipeline, required
        The pipeline to run.

    start_date : str (YYYY-MM-DD), required
        First date on which the pipeline should run. If start_date is not a trading
        day, the pipeline will start on the first trading day after start_date.

    end_date : str (YYYY-MM-DD), optional
        Last date on which the pipeline should run. If end_date is not a trading
        day, the pipeline will end on the first trading day after end_date.
        Defaults to today.

    bundle : str, optional
        the bundle code. If omitted, the default bundle will be used (and must be set).

    Returns
    -------
    result : pd.DataFrame
        A frame of computed results. The result columns correspond to the entries
        of pipeline.columns, which should be a dictionary mapping strings to instances
        of zipline.pipeline.term.Term. For each date between start_date and end_date,
        result will contain a row for each asset that passed pipeline.screen. A screen
        of None indicates that a row should be returned for each asset that existed each
        day.

    Examples
    --------
    Get a pipeline of 1-year returns:

    >>> from zipline.pipeline.factors import Returns
    >>> pipeline = Pipeline(                                                                  # doctest: +SKIP
            columns={
                '1Y': Returns(window_length=252),
            })
    >>> factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min")    # doctest: +SKIP
    """

    if not bundle:
        bundle = get_default_bundle()
        if not bundle:
            raise ValidationError("you must specify a bundle or set a default bundle")
        bundle = bundle["default_bundle"]

    load_extensions(code=bundle)

    bundle_data = bundles.load(
        bundle,
        os.environ,
        pd.Timestamp.utcnow(),
    )

    calendar_name = bundles.bundles[bundle].calendar_name
    trading_calendar = get_calendar(calendar_name)

    start_date = pd.Timestamp(start_date)

    if start_date.tz:
        start_date = start_date.tz_convert("UTC")
    else:
        start_date = start_date.tz_localize("UTC")

    if end_date:
        end_date = pd.Timestamp(end_date)
    else:
        end_date = pd.Timestamp.now().normalize()

    if end_date.tz:
        end_date = end_date.tz_convert("UTC")
    else:
        end_date = end_date.tz_localize("UTC")

    first_session = max(bundles.bundles[bundle].start_session, trading_calendar.first_session)
    if start_date < first_session:
        raise ValidationError(
            f"start_date cannot be earlier than {first_session.date().isoformat()} for this bundle")

    # Roll-forward start_date to valid session
    for i in range(100):
        if trading_calendar.is_session(start_date):
            break
        start_date += pd.Timedelta(days=1)
    else:
        raise ValidationError(f"start_date is not in {calendar_name} calendar")

    # Roll-forward end_date to valid session
    for i in range(100):
        if trading_calendar.is_session(end_date):
            break
        end_date += pd.Timedelta(days=1)
    else:
        raise ValidationError("end_date is not in calendar")

    if (
        end_date < start_date):
        raise ValidationError("end_date cannot be earlier than start_date")

    default_pipeline_loader = EquityPricingLoader.without_fx(
        bundle_data.equity_daily_bar_reader,
        bundle_data.adjustment_reader,
    )
    asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder)
    asset_finder_cache[bundle] = asset_finder

    pipeline_loader = QuantRocketPipelineLoaderRouter(
        asset_db_conn=asset_finder.engine,
        calendar=trading_calendar,
        default_loader=default_pipeline_loader,
        default_loader_columns=EquityPricing.columns
    )

    calendar_domain = domain.get_domain_from_calendar(trading_calendar)

    engine = SimplePipelineEngine(
        pipeline_loader,
        asset_finder,
        calendar_domain)

    return engine.run_pipeline(pipeline, start_date, end_date)
示例#2
0
def get_forward_returns(factor, periods=None, bundle=None):
    """
    Get forward returns for the dates and assets in ``factor``, calculated
    over the given periods.

    Parameters
    ----------
    factor : pd.Series
        The factor whose dates and assets to use. The Series should have a
        MultiIndex of (date, asset), as returned by ``run_pipeline``.

    periods : int or list of int
        The periods over which to calculate the forward returns.
        Example: [1, 5, 10]. Defaults to [1].

    bundle : str, optional
        the bundle code. If omitted, the default bundle will be used (and must be set).

    Returns
    -------
    result : pd.DataFrame
        A dataframe of computed forward returns containing one column per
        requested period. It is indexed first by date, then by asset.

    Examples
    --------
    Run a pipeline, then get forward returns for the factor:

    >>> factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min")    # doctest: +SKIP
    >>> forward_returns = get_forward_returns(factor, bundle="usstock-1min")                  # doctest: +SKIP
    """

    if not bundle:
        bundle = get_default_bundle()
        if not bundle:
            raise ValidationError("you must specify a bundle or set a default bundle")
        bundle = bundle["default_bundle"]

    if not periods:
        periods = [1]

    if not isinstance(periods, (list, tuple)):
        periods = [periods]

    columns = {}
    for window_length in periods:
        columns[f"{window_length}D"] = Returns(window_length=window_length+1)

    pipeline = Pipeline(columns=columns)
    returns_data = run_pipeline(
        pipeline,
        factor.index.get_level_values(0).min(),
        factor.index.get_level_values(0).max(),
        bundle=bundle)

    for window_length in periods:
        colname = f"{window_length}D"
        returns_data[colname] = returns_data[colname].unstack().shift(-window_length).stack()

    returns_data = returns_data.reindex(index=factor.index)
    returns_data.index.set_names(["date", "asset"], inplace=True)

    return returns_data
示例#3
0
def continuous_future(root_symbol_str,
                      offset=0,
                      roll="volume",
                      adjustment="mul",
                      bundle=None):
    """
    Return a ContinuousFuture object for the specified root symbol in the specified bundle
    (or default bundle).

    Parameters
    ----------
    root_symbol_str : str
        The root symbol for the future chain.

    offset : int, optional
        The distance from the primary contract. Default is 0.

    roll : str, optional
        How rolls are determined. Possible choices: 'volume',
        (roll when back contract volume exceeds front contract
        volume), or 'calendar' (roll on rollover date). Default
        is 'volume'.

    adjustment : str, optional
        Method for adjusting lookback prices between rolls. Possible choices:
        'mul', 'add', None. Default is 'mul'.

    bundle : str, optional
        the bundle code. If omitted, the default bundle will be used (and must be set).

    Returns
    -------
    asset : zipline.assets.ContinuousFuture

    Examples
    --------
    Get the continuous future object for ES and get the current chain as of
    2020-09-18:

    >>> es = continuous_future("ES", roll="volume", bundle="es-1min")
    >>> data = get_data("2020-09-18 10:00:00", bundle="es-1min")
    >>> print(data.current_chain(es))
    """
    if not bundle:
        bundle = get_default_bundle()
        if not bundle:
            raise ValidationError(
                "you must specify a bundle or set a default bundle")
        bundle = bundle["default_bundle"]

    load_extensions(code=bundle)

    bundle_data = bundles.load(
        bundle,
        os.environ,
        pd.Timestamp.utcnow(),
    )
    asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder)
    asset_finder_cache[bundle] = asset_finder

    continuous_future = asset_finder.create_continuous_future(
        root_symbol_str,
        offset,
        roll,
        adjustment,
    )

    return continuous_future
示例#4
0
def sid(sid, bundle=None):
    """
    Return an Asset object for the specified sid in the specified bundle
    (or default bundle).

    Parameters
    ----------
    sid : str, required
        The sid to retrieve.

    bundle : str, optional
        the bundle code. If omitted, the default bundle will be used (and must be set).

    Returns
    -------
    asset : zipline.assets.Asset

    Notes
    -----
    Each asset is specific to the bundle from which it came. An
    Asset object for AAPL from bundle A cannot be used to retrieve
    AAPL data from bundle B, even if AAPL data is present in bundle
    B.

    Examples
    --------
    Get the asset object for AAPL:

    >>> aapl = sid("FIBBG000B9XRY4", bundle="usstock-1min")
    """
    if not bundle:
        bundle = get_default_bundle()
        if not bundle:
            raise ValidationError(
                "you must specify a bundle or set a default bundle")
        bundle = bundle["default_bundle"]

    load_extensions(code=bundle)

    bundle_data = bundles.load(
        bundle,
        os.environ,
        pd.Timestamp.utcnow(),
    )

    asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder)
    asset_finder_cache[bundle] = asset_finder

    zipline_sid = asset_finder.engine.execute(
        """
        SELECT
            sid
        FROM
            equities
        WHERE
            real_sid = ?
        UNION
        SELECT
            sid
        FROM
            futures_contracts
        WHERE
            real_sid = ?
        """, (sid, sid)).scalar()

    if not zipline_sid:
        raise ValidationError(f"No such sid {sid} in {bundle} bundle")

    asset = asset_finder.retrieve_asset(zipline_sid)

    return asset
示例#5
0
def get_data(dt, bundle=None, data_frequency=None):
    """
    Return a zipline.protocol.BarData object for the specified bundle (or default bundle)
    as of the specified datetime. This is the same object that is passed
    as the `data` parameter to `handle_data` and other backtest functions.

    Parameters
    ----------
    dt : str (YYYY-MM-DD[ HH:MM:SS]), required
        The datetime (for minute data) or date (for daily data) which the
        data object should be anchored to.

    bundle : str, optional
        the bundle code. If omitted, the default bundle will be used (and
        must be set).

    data_frequency : str, optional
        the data frequency. Possible choices: daily, minute. The default is
        "daily" for daily bundles and "minute" for minute bundles. Minute
        bundles also support "daily".

    Returns
    -------
    data : zipline.protocol.BarData

    Examples
    --------
    Get the data object for July 7, 2020 at 11 AM for the usstock minute
    bundle:

    >>> data = get_data('2020-07-07 11:00:00', bundle="usstock-1min")    # doctest: +SKIP

    Get the data object for July 7, 2020 for a daily bundle:

    >>> data = get_data('2020-07-07', bundle="xjpx-1d-bundle")           # doctest: +SKIP
    """
    if not bundle:
        bundle = get_default_bundle()
        if not bundle:
            raise ValidationError(
                "you must specify a bundle or set a default bundle")
        bundle = bundle["default_bundle"]

    load_extensions(code=bundle)

    bundle_data = bundles.load(
        bundle,
        os.environ,
        pd.Timestamp.utcnow(),
    )
    if not data_frequency:
        config = get_bundle_config(bundle)
        data_frequency = config["data_frequency"]

    calendar_name = bundles.bundles[bundle].calendar_name
    trading_calendar = get_calendar(calendar_name)

    session_minute = pd.Timestamp(dt, tz=trading_calendar.tz)
    session = session_minute.normalize().tz_localize(None).tz_localize("UTC")

    first_session = max(bundles.bundles[bundle].start_session,
                        trading_calendar.first_session)
    if session < first_session:
        raise ValidationError(
            f"date cannot be earlier than {first_session.date().isoformat()} for this bundle"
        )

    if not trading_calendar.is_session(session):
        raise ValidationError(
            f"requested date {session.date().isoformat()} is not in {calendar_name} calendar"
        )

    if data_frequency == "minute" and not trading_calendar.is_open_on_minute(
            session_minute):
        raise ValidationError(
            f"requested time {session_minute.isoformat()} is not in {calendar_name} calendar"
        )

    if data_frequency == "minute":
        equity_minute_reader = future_minute_reader = bundle_data.equity_minute_bar_reader
    else:
        equity_minute_reader = future_minute_reader = None

    asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder)
    asset_finder_cache[bundle] = asset_finder

    data_portal = DataPortal(
        asset_finder,
        trading_calendar=trading_calendar,
        first_trading_day=bundle_data.equity_minute_bar_reader.
        first_trading_day,
        equity_minute_reader=equity_minute_reader,
        equity_daily_reader=bundle_data.equity_daily_bar_reader,
        future_minute_reader=future_minute_reader,
        future_daily_reader=bundle_data.equity_daily_bar_reader,
        adjustment_reader=bundle_data.adjustment_reader)

    data = BarData(data_portal=data_portal,
                   simulation_dt_func=lambda: session_minute,
                   data_frequency=data_frequency,
                   trading_calendar=trading_calendar,
                   restrictions=NoRestrictions())

    return data