Пример #1
0
    def test_yahoo_bars_to_panel_source(self):
        stocks = ['AAPL', 'GE']
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)
        data = factory.load_bars_from_yahoo(stocks=stocks,
                                            indexes={},
                                            start=start,
                                            end=end)

        check_fields = [
            'sid', 'open', 'high', 'low', 'close', 'volume', 'price'
        ]
        source = DataPanelSource(data)
        stocks_iter = cycle(stocks)
        for event in source:
            for check_field in check_fields:
                self.assertIn(check_field, event)
            self.assertTrue(isinstance(event['volume'], int))
            self.assertEqual(next(stocks_iter), event['sid'])
Пример #2
0
 def test_nan_filter_panel(self, env=None):
     env.update_asset_finder(identifiers=[4, 5])
     dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
     df = pd.Panel(np.random.randn(2, 2, 2),
                   major_axis=dates,
                   items=[4, 5],
                   minor_axis=['price', 'volume'])
     # should be filtered
     df.loc[4, dates[0], 'price'] = np.nan
     # should not be filtered, should have been ffilled
     df.loc[5, dates[1], 'price'] = np.nan
     source = DataPanelSource(df)
     event = next(source)
     self.assertEqual(5, event.sid)
     event = next(source)
     self.assertEqual(4, event.sid)
     event = next(source)
     self.assertEqual(5, event.sid)
     self.assertFalse(np.isnan(event.price))
Пример #3
0
def create_test_panel_source(sim_params=None):
    start = sim_params.first_open \
        if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)

    end = sim_params.last_close \
        if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)

    index = pd.DatetimeIndex(start=start, end=end, freq=pd.datetools.day)
    price = np.arange(0, len(index))
    volume = np.ones(len(index)) * 1000
    arbitrary = np.ones(len(index))

    df = pd.DataFrame(
        {
            'price': price,
            'volume': volume,
            'arbitrary': arbitrary
        },
        index=index)
    panel = pd.Panel.from_dict({0: df})

    return DataPanelSource(panel), panel
Пример #4
0
    def test_close_position_equity(self):
        metadata = {
            1: {
                'symbol': 'TEST',
                'asset_type': 'equity',
                'end_date': self.days[3]
            }
        }
        self.algo = TestAlgorithm(sid=1,
                                  amount=1,
                                  order_count=1,
                                  instant_fill=True,
                                  commission=PerShare(0),
                                  asset_metadata=metadata)
        self.data = DataPanelSource(self.panel)

        # Check results
        expected_positions = [1, 1, 0]
        expected_pnl = [0, 1, 2]
        results = self.run_algo()
        self.check_algo_pnl(results, expected_pnl)
        self.check_algo_positions(results, expected_positions)
Пример #5
0
def create_test_panel_source(sim_params=None):
    start = sim_params.first_open \
        if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)

    end = sim_params.last_close \
        if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)

    if trading.environment is None:
        trading.environment = trading.TradingEnvironment()

    index = trading.environment.days_in_range(start, end)

    price = np.arange(0, len(index))
    volume = np.ones(len(index)) * 1000
    arbitrary = np.ones(len(index))

    df = pd.DataFrame({'price': price,
                       'volume': volume,
                       'arbitrary': arbitrary},
                      index=index)
    panel = pd.Panel.from_dict({0: df})

    return DataPanelSource(panel), panel
Пример #6
0
    def test_yahoo_bars_to_panel_source(self):
        finder = AssetFinder()
        stocks = ['AAPL', 'GE']
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)
        data = factory.load_bars_from_yahoo(stocks=stocks,
                                            indexes={},
                                            start=start,
                                            end=end)

        check_fields = ['sid', 'open', 'high', 'low', 'close',
                        'volume', 'price']

        copy_panel = data.copy()
        sids = finder.map_identifier_index_to_sids(
            data.items, data.major_axis[0]
        )
        copy_panel.items = sids
        source = DataPanelSource(copy_panel)
        for event in source:
            for check_field in check_fields:
                self.assertIn(check_field, event)
            self.assertTrue(isinstance(event['volume'], (integer_types)))
            self.assertTrue(event['sid'] in sids)
Пример #7
0
    def run(self, source, overwrite_sim_params=True,
            benchmark_return_source=None):
        """Run the algorithm.

        :Arguments:
            source : can be either:
                     - pandas.DataFrame
                     - zipline source
                     - list of sources

               If pandas.DataFrame is provided, it must have the
               following structure:
               * column names must consist of ints representing the
                 different sids
               * index must be DatetimeIndex
               * array contents should be price info.

        :Returns:
            daily_stats : pandas.DataFrame
              Daily performance metrics such as returns, alpha etc.

        """
        if isinstance(source, list):
            if overwrite_sim_params:
                warnings.warn("""List of sources passed, will not attempt to extract sids, and start and end
 dates. Make sure to set the correct fields in sim_params passed to
 __init__().""", UserWarning)
                overwrite_sim_params = False
        elif isinstance(source, pd.DataFrame):
            # if DataFrame provided, wrap in DataFrameSource
            source = DataFrameSource(source)
        elif isinstance(source, pd.Panel):
            source = DataPanelSource(source)

        if isinstance(source, list):
            self.set_sources(source)
        else:
            self.set_sources([source])

        # Override sim_params if params are provided by the source.
        if overwrite_sim_params:
            if hasattr(source, 'start'):
                self.sim_params.period_start = source.start
            if hasattr(source, 'end'):
                self.sim_params.period_end = source.end
            all_sids = [sid for s in self.sources for sid in s.sids]
            self.sim_params.sids = set(all_sids)
            # Changing period_start and period_close might require updating
            # of first_open and last_close.
            self.sim_params._update_internal()

        # Create history containers
        if len(self.history_specs) != 0:
            self.history_container = HistoryContainer(
                self.history_specs,
                self.sim_params.sids,
                self.sim_params.first_open)

        # Create transforms by wrapping them into StatefulTransforms
        self.transforms = []
        for namestring, trans_descr in iteritems(self.registered_transforms):
            sf = StatefulTransform(
                trans_descr['class'],
                *trans_descr['args'],
                **trans_descr['kwargs']
            )
            sf.namestring = namestring

            self.transforms.append(sf)

        # force a reset of the performance tracker, in case
        # this is a repeat run of the algorithm.
        self.perf_tracker = None

        # create transforms and zipline
        self.gen = self._create_generator(self.sim_params)

        with ZiplineAPI(self):
            # loop through simulated_trading, each iteration returns a
            # perf dictionary
            perfs = []
            for perf in self.gen:
                perfs.append(perf)

            # convert perf dict to pandas dataframe
            daily_stats = self._create_daily_stats(perfs)

        self.analyze(daily_stats)

        return daily_stats
Пример #8
0
    def run(self,
            source,
            overwrite_sim_params=True,
            benchmark_return_source=None):
        """Run the algorithm.

        :Arguments:
            source : can be either:
                     - pandas.DataFrame
                     - zipline source
                     - list of sources

               If pandas.DataFrame is provided, it must have the
               following structure:
               * column names must be the different asset identifiers
               * index must be DatetimeIndex
               * array contents should be price info.

        :Returns:
            daily_stats : pandas.DataFrame
              Daily performance metrics such as returns, alpha etc.

        """

        # Ensure that source is a DataSource object
        if isinstance(source, list):
            if overwrite_sim_params:
                warnings.warn(
                    """List of sources passed, will not attempt to extract start and end
 dates. Make sure to set the correct fields in sim_params passed to
 __init__().""", UserWarning)
                overwrite_sim_params = False
        elif isinstance(source, pd.DataFrame):
            # if DataFrame provided, map columns to sids and wrap
            # in DataFrameSource
            copy_frame = source.copy()
            copy_frame.columns = \
                self.asset_finder.map_identifier_index_to_sids(
                    source.columns, source.index[0]
                )
            source = DataFrameSource(copy_frame)

        elif isinstance(source, pd.Panel):
            # If Panel provided, map items to sids and wrap
            # in DataPanelSource
            copy_panel = source.copy()
            copy_panel.items = self.asset_finder.map_identifier_index_to_sids(
                source.items, source.major_axis[0])
            source = DataPanelSource(copy_panel)

        if isinstance(source, list):
            self.set_sources(source)
        else:
            self.set_sources([source])

        # Override sim_params if params are provided by the source.
        if overwrite_sim_params:
            if hasattr(source, 'start'):
                self.sim_params.period_start = source.start
            if hasattr(source, 'end'):
                self.sim_params.period_end = source.end
            # Changing period_start and period_close might require updating
            # of first_open and last_close.
            self.sim_params._update_internal()

        # The sids field of the source is the reference for the universe at
        # the start of the run
        self._current_universe = set()
        for source in self.sources:
            for sid in source.sids:
                self._current_universe.add(sid)
        # Check that all sids from the source are accounted for in
        # the AssetFinder. This retrieve call will raise an exception if the
        # sid is not found.
        for sid in self._current_universe:
            self.asset_finder.retrieve_asset(sid)

        # force a reset of the performance tracker, in case
        # this is a repeat run of the algorithm.
        self.perf_tracker = None

        # create zipline
        self.gen = self._create_generator(self.sim_params)

        # Create history containers
        if self.history_specs:
            self.history_container = self.history_container_class(
                self.history_specs,
                self.current_universe(),
                self.sim_params.first_open,
                self.sim_params.data_frequency,
            )

        # loop through simulated_trading, each iteration returns a
        # perf dictionary
        perfs = []
        for perf in self.gen:
            perfs.append(perf)

        # convert perf dict to pandas dataframe
        daily_stats = self._create_daily_stats(perfs)

        self.analyze(daily_stats)

        return daily_stats
Пример #9
0
    def run(self, source, sim_params=None, benchmark_return_source=None):
        """Run the algorithm.

        :Arguments:
            source : can be either:
                     - pandas.DataFrame
                     - zipline source
                     - list of zipline sources

               If pandas.DataFrame is provided, it must have the
               following structure:
               * column names must consist of ints representing the
                 different sids
               * index must be DatetimeIndex
               * array contents should be price info.

        :Returns:
            daily_stats : pandas.DataFrame
              Daily performance metrics such as returns, alpha etc.

        """
        if isinstance(source, (list, tuple)):
            assert self.sim_params is not None or sim_params is not None, \
                """When providing a list of sources, \
                sim_params have to be specified as a parameter
                or in the constructor."""
        elif isinstance(source, pd.DataFrame):
            # if DataFrame provided, wrap in DataFrameSource
            source = DataFrameSource(source)
        elif isinstance(source, pd.Panel):
            source = DataPanelSource(source)

        if not isinstance(source, (list, tuple)):
            self.sources = [source]
        else:
            self.sources = source

        # Check for override of sim_params.
        # If it isn't passed to this function,
        # use the default params set with the algorithm.
        # Else, we create simulation parameters using the start and end of the
        # source provided.
        if sim_params is None:
            if self.sim_params is None:
                start = source.start
                end = source.end
                sim_params = create_simulation_parameters(
                    start=start,
                    end=end,
                    capital_base=self.capital_base,
                )
            else:
                sim_params = self.sim_params

        # update sim params to ensure it's set
        self.sim_params = sim_params
        if self.sim_params.sids is None:
            all_sids = [sid for s in self.sources for sid in s.sids]
            self.sim_params.sids = set(all_sids)

        # Create history containers
        if len(self.history_specs) != 0:
            self.history_container = HistoryContainer(
                self.history_specs, self.sim_params.sids,
                self.sim_params.first_open)

        # Create transforms by wrapping them into StatefulTransforms
        self.transforms = []
        for namestring, trans_descr in iteritems(self.registered_transforms):
            sf = StatefulTransform(trans_descr['class'], *trans_descr['args'],
                                   **trans_descr['kwargs'])
            sf.namestring = namestring

            self.transforms.append(sf)

        # force a reset of the performance tracker, in case
        # this is a repeat run of the algorithm.
        self.perf_tracker = None

        # create transforms and zipline
        self.gen = self._create_generator(sim_params)

        with ZiplineAPI(self):
            # loop through simulated_trading, each iteration returns a
            # perf dictionary
            perfs = []
            for perf in self.gen:
                perfs.append(perf)

            # convert perf dict to pandas dataframe
            daily_stats = self._create_daily_stats(perfs)

        self.analyze(daily_stats)

        return daily_stats
Пример #10
0
    def run(self,
            source,
            overwrite_sim_params=True,
            benchmark_return_source=None):
        """Run the algorithm.

        :Arguments:
            source : can be either:
                     - pandas.DataFrame
                     - zipline source
                     - list of sources

               If pandas.DataFrame is provided, it must have the
               following structure:
               * column names must be the different asset identifiers
               * index must be DatetimeIndex
               * array contents should be price info.

        :Returns:
            daily_stats : pandas.DataFrame
              Daily performance metrics such as returns, alpha etc.

        """

        # Ensure that source is a DataSource object
        if isinstance(source, list):
            if overwrite_sim_params:
                warnings.warn(
                    """List of sources passed, will not attempt to extract start and end
 dates. Make sure to set the correct fields in sim_params passed to
 __init__().""", UserWarning)
                overwrite_sim_params = False
        elif isinstance(source, pd.DataFrame):
            # if DataFrame provided, wrap in DataFrameSource
            source = DataFrameSource(source)
        elif isinstance(source, pd.Panel):
            source = DataPanelSource(source)

        if isinstance(source, list):
            self.set_sources(source)
        else:
            self.set_sources([source])

        # Override sim_params if params are provided by the source.
        if overwrite_sim_params:
            if hasattr(source, 'start'):
                self.sim_params.period_start = source.start
            if hasattr(source, 'end'):
                self.sim_params.period_end = source.end
            # The sids field of the source is the canonical reference for
            # sids in this run
            all_sids = [sid for s in self.sources for sid in s.sids]
            self.sim_params.sids = set(all_sids)
            # Check that all sids from the source are accounted for in
            # the AssetFinder
            for sid in self.sim_params.sids:
                try:
                    self.asset_finder.retrieve_asset(sid)
                except SidNotFound:
                    warnings.warn("No Asset found for sid '%s'. Make sure "
                                  "that the correct identifiers and asset "
                                  "metadata are passed to __init__()." % sid)
            # Changing period_start and period_close might require updating
            # of first_open and last_close.
            self.sim_params._update_internal()

        # force a reset of the performance tracker, in case
        # this is a repeat run of the algorithm.
        self.perf_tracker = None

        # create zipline
        self.gen = self._create_generator(self.sim_params)

        # Create history containers
        if self.history_specs:
            self.history_container = self.history_container_class(
                self.history_specs,
                self.sim_params.sids,
                self.sim_params.first_open,
                self.sim_params.data_frequency,
            )

        with ZiplineAPI(self):
            # loop through simulated_trading, each iteration returns a
            # perf dictionary
            perfs = []
            for perf in self.gen:
                perfs.append(perf)

            # convert perf dict to pandas dataframe
            daily_stats = self._create_daily_stats(perfs)

        self.analyze(daily_stats)

        return daily_stats