示例#1
0
def _pipeline_output_index(dates, assets, mask):
    """
    Create a MultiIndex for a pipeline output.

    Parameters
    ----------
    dates : pd.DatetimeIndex
        Row labels for ``mask``.
    assets : pd.Index
        Column labels for ``mask``.
    mask : np.ndarray[bool]
        Mask array indicating date/asset pairs that should be included in
        output index.

    Returns
    -------
    index : pd.MultiIndex
        MultiIndex  containing (date,  asset) pairs  corresponding to  ``True``
        values in ``mask``.
    """
    date_labels = repeat_last_axis(arange(len(dates)), len(assets))[mask]
    asset_labels = repeat_first_axis(arange(len(assets)), len(dates))[mask]
    return MultiIndex(
        levels=[dates, assets],
        labels=[date_labels, asset_labels],
        # TODO: We should probably add names for these.
        names=[None, None],
        verify_integrity=False,
    )
示例#2
0
 def _compute(self, arrays, dates, assets, mask):
     is_my_asset = assets == self._asset.sid
     out = repeat_first_axis(is_my_asset, len(mask))
     # Raise an exception if `self._asset` does not exist for the entirety
     # of the timeframe over which we are computing.
     if (is_my_asset.sum() != 1) or ((out & mask).sum() != len(mask)):
         raise NonExistentAssetInTimeFrame(asset=self._asset, start_date=dates[0], end_date=dates[-1])
     return out
示例#3
0
文件: filter.py 项目: vskynet/zipline
 def _compute(self, arrays, dates, assets, mask):
     is_my_asset = (assets == self._asset.sid)
     out = repeat_first_axis(is_my_asset, len(mask))
     # Raise an exception if `self._asset` does not exist for the entirety
     # of the timeframe over which we are computing.
     if (is_my_asset.sum() != 1) or ((out & mask).sum() != len(mask)):
         raise NonExistentAssetInTimeFrame(
             asset=self._asset, start_date=dates[0], end_date=dates[-1],
         )
     return out
示例#4
0
文件: engine.py 项目: uniwin/zipline
    def _to_narrow(self, data, mask, dates, assets):
        """
        Convert raw computed pipeline results into a DataFrame for public APIs.

        Parameters
        ----------
        data : dict[str -> ndarray[ndim=2]]
            Dict mapping column names to computed results.
        mask : ndarray[bool, ndim=2]
            Mask array of values to keep.
        dates : ndarray[datetime64, ndim=1]
            Row index for arrays `data` and `mask`
        assets : ndarray[int64, ndim=2]
            Column index for arrays `data` and `mask`

        Returns
        -------
        results : pd.DataFrame
            The indices of `results` are as follows:

            index : two-tiered MultiIndex of (date, asset).
                Contains an entry for each (date, asset) pair corresponding to
                a `True` value in `mask`.
            columns : Index of str
                One column per entry in `data`.

        If mask[date, asset] is True, then result.loc[(date, asset), colname]
        will contain the value of data[colname][date, asset].
        """
        if not mask.any():
            # Manually handle the empty DataFrame case. This is a workaround
            # to pandas failing to tz_localize an empty dataframe with a
            # MultiIndex. It also saves us the work of applying a known-empty
            # mask to each array.
            #
            # Slicing `dates` here to preserve pandas metadata.
            empty_dates = dates[:0]
            empty_assets = array([], dtype=object)
            return DataFrame(
                data={
                    name: array([], dtype=arr.dtype)
                    for name, arr in iteritems(data)
                },
                index=MultiIndex.from_arrays([empty_dates, empty_assets]),
            )

        resolved_assets = array(self._finder.retrieve_all(assets))
        dates_kept = repeat_last_axis(dates.values, len(assets))[mask]
        assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask]
        return DataFrame(
            data={name: arr[mask]
                  for name, arr in iteritems(data)},
            index=MultiIndex.from_arrays([dates_kept, assets_kept]),
        ).tz_localize('UTC', level=0)
    def load_adjusted_array(self, domain, columns, dates, sids, mask):
        # load_adjusted_array is called with dates on which the user's algo
        # will be shown data, which means we need to return the data that would
        # be known at the **start** of each date. We assume that the latest
        # data known on day N is the data from day (N - 1), so we shift all
        # query dates back by a trading session.
        sessions = domain.all_sessions()
        shifted_dates = shift_dates(sessions, dates[0], dates[-1], shift=1)

        ohlcv_cols, currency_cols = self._split_column_types(columns)
        del columns  # From here on we should use ohlcv_cols or currency_cols.
        ohlcv_colnames = [c.name for c in ohlcv_cols]

        raw_ohlcv_arrays = self.raw_price_reader.load_raw_arrays(
            ohlcv_colnames,
            shifted_dates[0],
            shifted_dates[-1],
            sids,
        )

        # Currency convert raw_arrays in place if necessary. We use shifted
        # dates to load currency conversion rates to make them line up with
        # dates used to fetch prices.
        self._inplace_currency_convert(
            ohlcv_cols,
            raw_ohlcv_arrays,
            shifted_dates,
            sids,
        )

        adjustments = self.adjustments_reader.load_pricing_adjustments(
            ohlcv_colnames,
            dates,
            sids,
        )

        out = {}
        for c, c_raw, c_adjs in zip(ohlcv_cols, raw_ohlcv_arrays, adjustments):
            out[c] = AdjustedArray(
                c_raw.astype(c.dtype),
                c_adjs,
                c.missing_value,
            )

        for c in currency_cols:
            codes_1d = self.raw_price_reader.currency_codes(sids)
            codes = repeat_first_axis(codes_1d, len(dates))
            out[c] = AdjustedArray(
                codes,
                adjustments={},
                missing_value=None,
            )

        return out
        def compute(self, today, assets, out, close):
            x_matrix = repeat_last_axis(
                (self.window_length - 1) / 2 - self._x,
                len(assets),
            )

            y_bar = np.nanmean(close, axis=0)
            y_bars = repeat_first_axis(y_bar, self.window_length)
            y_matrix = close - y_bars

            out[:] = preprocess(-np.divide((x_matrix * y_matrix).sum(axis=0) /
                                           self._x_var, self.window_length))
示例#7
0
    def _to_narrow(self, data, mask, dates, assets):
        """
        Convert raw computed pipeline results into a DataFrame for public APIs.

        Parameters
        ----------
        data : dict[str -> ndarray[ndim=2]]
            Dict mapping column names to computed results.
        mask : ndarray[bool, ndim=2]
            Mask array of values to keep.
        dates : ndarray[datetime64, ndim=1]
            Row index for arrays `data` and `mask`
        assets : ndarray[int64, ndim=2]
            Column index for arrays `data` and `mask`

        Returns
        -------
        results : pd.DataFrame
            The indices of `results` are as follows:

            index : two-tiered MultiIndex of (date, asset).
                Contains an entry for each (date, asset) pair corresponding to
                a `True` value in `mask`.
            columns : Index of str
                One column per entry in `data`.

        If mask[date, asset] is True, then result.loc[(date, asset), colname]
        will contain the value of data[colname][date, asset].
        """
        if not mask.any():
            # Manually handle the empty DataFrame case. This is a workaround
            # to pandas failing to tz_localize an empty dataframe with a
            # MultiIndex. It also saves us the work of applying a known-empty
            # mask to each array.
            #
            # Slicing `dates` here to preserve pandas metadata.
            empty_dates = dates[:0]
            empty_assets = array([], dtype=object)
            return DataFrame(
                data={
                    name: array([], dtype=arr.dtype)
                    for name, arr in iteritems(data)
                },
                index=MultiIndex.from_arrays([empty_dates, empty_assets]),
            )

        resolved_assets = array(self._finder.retrieve_all(assets))
        dates_kept = repeat_last_axis(dates.values, len(assets))[mask]
        assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask]
        return DataFrame(
            data={name: arr[mask] for name, arr in iteritems(data)},
            index=MultiIndex.from_arrays([dates_kept, assets_kept]),
        ).tz_localize('UTC', level=0)
示例#8
0
    def _to_narrow(self, data, mask, dates, assets):
        """
        Convert raw computed pipeline results into a DataFrame for public APIs.

        Parameters
        ----------
        data : dict[str -> ndarray[ndim=2]]
            Dict mapping column names to computed results.
        mask : ndarray[bool, ndim=2]
            Mask array of values to keep.
        dates : ndarray[datetime64, ndim=1]
            Row index for arrays `data` and `mask`
        assets : ndarray[int64, ndim=2]
            Column index for arrays `data` and `mask`

        Returns
        -------
        results : pd.DataFrame
            The indices of `results` are as follows:

            index : two-tiered MultiIndex of (date, asset).
                Contains an entry for each (date, asset) pair corresponding to
                a `True` value in `mask`.
            columns : Index of str
                One column per entry in `data`.

        If mask[date, asset] is True, then result.loc[(date, asset), colname]
        will contain the value of data[colname][date, asset].
        """
        resolved_assets = array(self._finder.retrieve_all(assets))
        dates_kept = repeat_last_axis(dates.values, len(assets))[mask]
        assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask]
        return DataFrame(
            data={name: arr[mask]
                  for name, arr in iteritems(data)},
            index=MultiIndex.from_arrays([dates_kept, assets_kept]),
        ).tz_localize('UTC', level=0)
示例#9
0
    def _to_narrow(self, data, mask, dates, assets):
        """
        Convert raw computed pipeline results into a DataFrame for public APIs.

        Parameters
        ----------
        data : dict[str -> ndarray[ndim=2]]
            Dict mapping column names to computed results.
        mask : ndarray[bool, ndim=2]
            Mask array of values to keep.
        dates : ndarray[datetime64, ndim=1]
            Row index for arrays `data` and `mask`
        assets : ndarray[int64, ndim=2]
            Column index for arrays `data` and `mask`

        Returns
        -------
        results : pd.DataFrame
            The indices of `results` are as follows:

            index : two-tiered MultiIndex of (date, asset).
                Contains an entry for each (date, asset) pair corresponding to
                a `True` value in `mask`.
            columns : Index of str
                One column per entry in `data`.

        If mask[date, asset] is True, then result.loc[(date, asset), colname]
        will contain the value of data[colname][date, asset].
        """
        resolved_assets = array(self._finder.retrieve_all(assets))
        dates_kept = repeat_last_axis(dates.values, len(assets))[mask]
        assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask]
        return DataFrame(
            data={name: arr[mask] for name, arr in iteritems(data)},
            index=MultiIndex.from_arrays([dates_kept, assets_kept]),
        ).tz_localize('UTC', level=0)
示例#10
0
 def _compute(self, arrays, dates, sids, mask):
     my_columns = sids.isin(self.params['sids'])
     return repeat_first_axis(my_columns, len(mask)) & mask
示例#11
0
 def _compute(self, arrays, dates, sids, mask):
     my_columns = sids.isin(self.params['sids'])
     return repeat_first_axis(my_columns, len(mask)) & mask
示例#12
0
    def _to_narrow(self, terms, data, mask, dates, symbols):
        """
        Convert raw computed pipeline results into a DataFrame for public APIs.

        Parameters
        ----------
        terms : dict[str -> Term]
            Dict mapping column names to terms.
        data : dict[str -> ndarray[ndim=2]]
            Dict mapping column names to computed results for those names.
        mask : ndarray[bool, ndim=2]
            Mask array of values to keep.
        dates : ndarray[datetime64, ndim=1]
            Row index for arrays `data` and `mask`
        symbols : list
            Column index

        Returns
        -------
        results : pd.DataFrame
            The indices of `results` are as follows:

            index : two-tiered MultiIndex of (date, asset).
                Contains an entry for each (date, asset) pair corresponding to
                a `True` value in `mask`.
            columns : Index of str
                One column per entry in `data`.

        If mask[date, asset] is True, then result.loc[(date, asset), colname]
        will contain the value of data[colname][date, asset].
        """
        assert len(dates) == 1
        if not mask.any():
            # Manually handle the empty DataFrame case. This is a workaround
            # to pandas failing to tz_localize an empty dataframe with a
            # MultiIndex. It also saves us the work of applying a known-empty
            # mask to each array.
            #
            # Slicing `dates` here to preserve pandas metadata.
            # empty_dates = dates[:0]
            empty_assets = array([], dtype=object)
            return DataFrame(
                data={
                    name: array([], dtype=arr.dtype)
                    for name, arr in iteritems(data)
                },
                index=pd.Index(empty_assets),
                # index=MultiIndex.from_arrays([empty_dates, empty_assets]),
            )

        # resolved_assets = array(self._finder.retrieve_all(assets))
        # dates_kept = repeat_last_axis(dates.values, len(symbols))[mask]
        # assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask]
        assets_kept = repeat_first_axis(symbols, len(dates))[mask]

        final_columns = {}
        for name in data:
            # Each term that computed an output has its postprocess method
            # called on the filtered result.
            #
            # As of Mon May 2 15:38:47 2016, we only use this to convert
            # LabelArrays into categoricals.
            final_columns[name] = terms[name].postprocess(data[name][mask])

        return DataFrame(
            data=final_columns,
            index=pd.Index(assets_kept),
            # index=MultiIndex.from_arrays([dates_kept, assets_kept]),
        )