def test_yahoo_bars_to_panel_source(self): stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = [ 'sid', 'open', 'high', 'low', 'close', 'volume', 'price' ] source = DataPanelSource(data) stocks_iter = cycle(stocks) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], int)) self.assertEqual(next(stocks_iter), event['sid'])
def test_nan_filter_panel(self, env=None): env.update_asset_finder(identifiers=[4, 5]) dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC') df = pd.Panel(np.random.randn(2, 2, 2), major_axis=dates, items=[4, 5], minor_axis=['price', 'volume']) # should be filtered df.loc[4, dates[0], 'price'] = np.nan # should not be filtered, should have been ffilled df.loc[5, dates[1], 'price'] = np.nan source = DataPanelSource(df) event = next(source) self.assertEqual(5, event.sid) event = next(source) self.assertEqual(4, event.sid) event = next(source) self.assertEqual(5, event.sid) self.assertFalse(np.isnan(event.price))
def create_test_panel_source(sim_params=None): start = sim_params.first_open \ if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc) end = sim_params.last_close \ if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc) index = pd.DatetimeIndex(start=start, end=end, freq=pd.datetools.day) price = np.arange(0, len(index)) volume = np.ones(len(index)) * 1000 arbitrary = np.ones(len(index)) df = pd.DataFrame( { 'price': price, 'volume': volume, 'arbitrary': arbitrary }, index=index) panel = pd.Panel.from_dict({0: df}) return DataPanelSource(panel), panel
def test_close_position_equity(self): metadata = { 1: { 'symbol': 'TEST', 'asset_type': 'equity', 'end_date': self.days[3] } } self.algo = TestAlgorithm(sid=1, amount=1, order_count=1, instant_fill=True, commission=PerShare(0), asset_metadata=metadata) self.data = DataPanelSource(self.panel) # Check results expected_positions = [1, 1, 0] expected_pnl = [0, 1, 2] results = self.run_algo() self.check_algo_pnl(results, expected_pnl) self.check_algo_positions(results, expected_positions)
def create_test_panel_source(sim_params=None): start = sim_params.first_open \ if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc) end = sim_params.last_close \ if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc) if trading.environment is None: trading.environment = trading.TradingEnvironment() index = trading.environment.days_in_range(start, end) price = np.arange(0, len(index)) volume = np.ones(len(index)) * 1000 arbitrary = np.ones(len(index)) df = pd.DataFrame({'price': price, 'volume': volume, 'arbitrary': arbitrary}, index=index) panel = pd.Panel.from_dict({0: df}) return DataPanelSource(panel), panel
def test_yahoo_bars_to_panel_source(self): finder = AssetFinder() stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = ['sid', 'open', 'high', 'low', 'close', 'volume', 'price'] copy_panel = data.copy() sids = finder.map_identifier_index_to_sids( data.items, data.major_axis[0] ) copy_panel.items = sids source = DataPanelSource(copy_panel) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (integer_types))) self.assertTrue(event['sid'] in sids)
def run(self, source, overwrite_sim_params=True, benchmark_return_source=None): """Run the algorithm. :Arguments: source : can be either: - pandas.DataFrame - zipline source - list of sources If pandas.DataFrame is provided, it must have the following structure: * column names must consist of ints representing the different sids * index must be DatetimeIndex * array contents should be price info. :Returns: daily_stats : pandas.DataFrame Daily performance metrics such as returns, alpha etc. """ if isinstance(source, list): if overwrite_sim_params: warnings.warn("""List of sources passed, will not attempt to extract sids, and start and end dates. Make sure to set the correct fields in sim_params passed to __init__().""", UserWarning) overwrite_sim_params = False elif isinstance(source, pd.DataFrame): # if DataFrame provided, wrap in DataFrameSource source = DataFrameSource(source) elif isinstance(source, pd.Panel): source = DataPanelSource(source) if isinstance(source, list): self.set_sources(source) else: self.set_sources([source]) # Override sim_params if params are provided by the source. if overwrite_sim_params: if hasattr(source, 'start'): self.sim_params.period_start = source.start if hasattr(source, 'end'): self.sim_params.period_end = source.end all_sids = [sid for s in self.sources for sid in s.sids] self.sim_params.sids = set(all_sids) # Changing period_start and period_close might require updating # of first_open and last_close. self.sim_params._update_internal() # Create history containers if len(self.history_specs) != 0: self.history_container = HistoryContainer( self.history_specs, self.sim_params.sids, self.sim_params.first_open) # Create transforms by wrapping them into StatefulTransforms self.transforms = [] for namestring, trans_descr in iteritems(self.registered_transforms): sf = StatefulTransform( trans_descr['class'], *trans_descr['args'], **trans_descr['kwargs'] ) sf.namestring = namestring self.transforms.append(sf) # force a reset of the performance tracker, in case # this is a repeat run of the algorithm. self.perf_tracker = None # create transforms and zipline self.gen = self._create_generator(self.sim_params) with ZiplineAPI(self): # loop through simulated_trading, each iteration returns a # perf dictionary perfs = [] for perf in self.gen: perfs.append(perf) # convert perf dict to pandas dataframe daily_stats = self._create_daily_stats(perfs) self.analyze(daily_stats) return daily_stats
def run(self, source, overwrite_sim_params=True, benchmark_return_source=None): """Run the algorithm. :Arguments: source : can be either: - pandas.DataFrame - zipline source - list of sources If pandas.DataFrame is provided, it must have the following structure: * column names must be the different asset identifiers * index must be DatetimeIndex * array contents should be price info. :Returns: daily_stats : pandas.DataFrame Daily performance metrics such as returns, alpha etc. """ # Ensure that source is a DataSource object if isinstance(source, list): if overwrite_sim_params: warnings.warn( """List of sources passed, will not attempt to extract start and end dates. Make sure to set the correct fields in sim_params passed to __init__().""", UserWarning) overwrite_sim_params = False elif isinstance(source, pd.DataFrame): # if DataFrame provided, map columns to sids and wrap # in DataFrameSource copy_frame = source.copy() copy_frame.columns = \ self.asset_finder.map_identifier_index_to_sids( source.columns, source.index[0] ) source = DataFrameSource(copy_frame) elif isinstance(source, pd.Panel): # If Panel provided, map items to sids and wrap # in DataPanelSource copy_panel = source.copy() copy_panel.items = self.asset_finder.map_identifier_index_to_sids( source.items, source.major_axis[0]) source = DataPanelSource(copy_panel) if isinstance(source, list): self.set_sources(source) else: self.set_sources([source]) # Override sim_params if params are provided by the source. if overwrite_sim_params: if hasattr(source, 'start'): self.sim_params.period_start = source.start if hasattr(source, 'end'): self.sim_params.period_end = source.end # Changing period_start and period_close might require updating # of first_open and last_close. self.sim_params._update_internal() # The sids field of the source is the reference for the universe at # the start of the run self._current_universe = set() for source in self.sources: for sid in source.sids: self._current_universe.add(sid) # Check that all sids from the source are accounted for in # the AssetFinder. This retrieve call will raise an exception if the # sid is not found. for sid in self._current_universe: self.asset_finder.retrieve_asset(sid) # force a reset of the performance tracker, in case # this is a repeat run of the algorithm. self.perf_tracker = None # create zipline self.gen = self._create_generator(self.sim_params) # Create history containers if self.history_specs: self.history_container = self.history_container_class( self.history_specs, self.current_universe(), self.sim_params.first_open, self.sim_params.data_frequency, ) # loop through simulated_trading, each iteration returns a # perf dictionary perfs = [] for perf in self.gen: perfs.append(perf) # convert perf dict to pandas dataframe daily_stats = self._create_daily_stats(perfs) self.analyze(daily_stats) return daily_stats
def run(self, source, sim_params=None, benchmark_return_source=None): """Run the algorithm. :Arguments: source : can be either: - pandas.DataFrame - zipline source - list of zipline sources If pandas.DataFrame is provided, it must have the following structure: * column names must consist of ints representing the different sids * index must be DatetimeIndex * array contents should be price info. :Returns: daily_stats : pandas.DataFrame Daily performance metrics such as returns, alpha etc. """ if isinstance(source, (list, tuple)): assert self.sim_params is not None or sim_params is not None, \ """When providing a list of sources, \ sim_params have to be specified as a parameter or in the constructor.""" elif isinstance(source, pd.DataFrame): # if DataFrame provided, wrap in DataFrameSource source = DataFrameSource(source) elif isinstance(source, pd.Panel): source = DataPanelSource(source) if not isinstance(source, (list, tuple)): self.sources = [source] else: self.sources = source # Check for override of sim_params. # If it isn't passed to this function, # use the default params set with the algorithm. # Else, we create simulation parameters using the start and end of the # source provided. if sim_params is None: if self.sim_params is None: start = source.start end = source.end sim_params = create_simulation_parameters( start=start, end=end, capital_base=self.capital_base, ) else: sim_params = self.sim_params # update sim params to ensure it's set self.sim_params = sim_params if self.sim_params.sids is None: all_sids = [sid for s in self.sources for sid in s.sids] self.sim_params.sids = set(all_sids) # Create history containers if len(self.history_specs) != 0: self.history_container = HistoryContainer( self.history_specs, self.sim_params.sids, self.sim_params.first_open) # Create transforms by wrapping them into StatefulTransforms self.transforms = [] for namestring, trans_descr in iteritems(self.registered_transforms): sf = StatefulTransform(trans_descr['class'], *trans_descr['args'], **trans_descr['kwargs']) sf.namestring = namestring self.transforms.append(sf) # force a reset of the performance tracker, in case # this is a repeat run of the algorithm. self.perf_tracker = None # create transforms and zipline self.gen = self._create_generator(sim_params) with ZiplineAPI(self): # loop through simulated_trading, each iteration returns a # perf dictionary perfs = [] for perf in self.gen: perfs.append(perf) # convert perf dict to pandas dataframe daily_stats = self._create_daily_stats(perfs) self.analyze(daily_stats) return daily_stats
def run(self, source, overwrite_sim_params=True, benchmark_return_source=None): """Run the algorithm. :Arguments: source : can be either: - pandas.DataFrame - zipline source - list of sources If pandas.DataFrame is provided, it must have the following structure: * column names must be the different asset identifiers * index must be DatetimeIndex * array contents should be price info. :Returns: daily_stats : pandas.DataFrame Daily performance metrics such as returns, alpha etc. """ # Ensure that source is a DataSource object if isinstance(source, list): if overwrite_sim_params: warnings.warn( """List of sources passed, will not attempt to extract start and end dates. Make sure to set the correct fields in sim_params passed to __init__().""", UserWarning) overwrite_sim_params = False elif isinstance(source, pd.DataFrame): # if DataFrame provided, wrap in DataFrameSource source = DataFrameSource(source) elif isinstance(source, pd.Panel): source = DataPanelSource(source) if isinstance(source, list): self.set_sources(source) else: self.set_sources([source]) # Override sim_params if params are provided by the source. if overwrite_sim_params: if hasattr(source, 'start'): self.sim_params.period_start = source.start if hasattr(source, 'end'): self.sim_params.period_end = source.end # The sids field of the source is the canonical reference for # sids in this run all_sids = [sid for s in self.sources for sid in s.sids] self.sim_params.sids = set(all_sids) # Check that all sids from the source are accounted for in # the AssetFinder for sid in self.sim_params.sids: try: self.asset_finder.retrieve_asset(sid) except SidNotFound: warnings.warn("No Asset found for sid '%s'. Make sure " "that the correct identifiers and asset " "metadata are passed to __init__()." % sid) # Changing period_start and period_close might require updating # of first_open and last_close. self.sim_params._update_internal() # force a reset of the performance tracker, in case # this is a repeat run of the algorithm. self.perf_tracker = None # create zipline self.gen = self._create_generator(self.sim_params) # Create history containers if self.history_specs: self.history_container = self.history_container_class( self.history_specs, self.sim_params.sids, self.sim_params.first_open, self.sim_params.data_frequency, ) with ZiplineAPI(self): # loop through simulated_trading, each iteration returns a # perf dictionary perfs = [] for perf in self.gen: perfs.append(perf) # convert perf dict to pandas dataframe daily_stats = self._create_daily_stats(perfs) self.analyze(daily_stats) return daily_stats