def test_single_factor_instance_args(self): """ Test dependency resolution for a single factor with arguments passed to the constructor. """ bar, buzz = SomeDataSet.bar, SomeDataSet.buzz factor = SomeFactor([bar, buzz], window_length=5) graph = self.make_execution_plan(to_dict([factor])) resolution_order = list(graph.ordered()) # SomeFactor, its inputs, and AssetExists() self.assertEqual(len(resolution_order), 4) self.check_dependency_order(resolution_order) self.assertIn(AssetExists(), resolution_order) self.assertEqual(graph.extra_rows[AssetExists()], 4) # LoadableTerms should be specialized to our domain in the execution # order. self.assertIn(bar.specialize(self.DOMAIN), resolution_order) self.assertIn(buzz.specialize(self.DOMAIN), resolution_order) # ComputableTerms don't yet have a notion of specialization, so they # shouldn't appear unchanged in the execution order. self.assertIn(SomeFactor([bar, buzz], window_length=5), resolution_order) self.assertEqual(graph.extra_rows[bar.specialize(self.DOMAIN)], 4) self.assertEqual(graph.extra_rows[bar.specialize(self.DOMAIN)], 4)
def test_single_factor_instance_args(self): """ Test dependency resolution for a single factor with arguments passed to the constructor. """ bar, buzz = SomeDataSet.bar, SomeDataSet.buzz graph = TermGraph(to_dict([SomeFactor([bar, buzz], window_length=5)])) resolution_order = list(graph.ordered()) # SomeFactor, its inputs, and AssetExists() self.assertEqual(len(resolution_order), 4) self.assertIs(resolution_order[0], AssetExists()) self.assertEqual(graph.extra_rows[AssetExists()], 4) self.assertEqual( set([resolution_order[1], resolution_order[2]]), set([bar, buzz]), ) self.assertEqual( resolution_order[-1], SomeFactor([bar, buzz], window_length=5), ) self.assertEqual(graph.extra_rows[bar], 4) self.assertEqual(graph.extra_rows[buzz], 4)
class BoundColumn(Term): """ A Column of data that's been concretely bound to a particular dataset. """ mask = AssetExists() extra_input_rows = 0 inputs = () def __new__(cls, dtype, dataset, name): return super(BoundColumn, cls).__new__( cls, domain=dataset.domain, dtype=dtype, dataset=dataset, name=name, ) def _init(self, dataset, name, *args, **kwargs): self._dataset = dataset self._name = name return super(BoundColumn, self)._init(*args, **kwargs) @classmethod def static_identity(cls, dataset, name, *args, **kwargs): return ( super(BoundColumn, cls).static_identity(*args, **kwargs), dataset, name, ) @property def dataset(self): return self._dataset @property def name(self): return self._name @property def qualname(self): """ Fully qualified of this column. """ return '.'.join([self.dataset.__name__, self.name]) @property def latest(self): # FIXME: Once we support non-float dtypes, this should pass a dtype # along. Right now we're just assuming that inputs will safely coerce # to float. return Latest(inputs=(self, )) def __repr__(self): return "{qualname}::{dtype}".format( qualname=self.qualname, dtype=self.dtype.__name__, ) def short_repr(self): return self.qualname
def test_reuse_atomic_terms(self): """ Test that raw inputs only show up in the dependency graph once. """ f1 = SomeFactor([SomeDataSet.foo, SomeDataSet.bar]) f2 = SomeOtherFactor([SomeDataSet.bar, SomeDataSet.buzz]) graph = TermGraph(to_dict([f1, f2])) resolution_order = list(graph.ordered()) # bar should only appear once. self.assertEqual(len(resolution_order), 6) indices = { term: resolution_order.index(term) for term in resolution_order } self.assertEqual(indices[AssetExists()], 0) # Verify that f1's dependencies will be computed before f1. self.assertLess(indices[SomeDataSet.foo], indices[f1]) self.assertLess(indices[SomeDataSet.bar], indices[f1]) # Verify that f2's dependencies will be computed before f2. self.assertLess(indices[SomeDataSet.bar], indices[f2]) self.assertLess(indices[SomeDataSet.buzz], indices[f2])
class BoundColumn(Term): """ A Column of data that's been concretely bound to a particular dataset. """ mask = AssetExists() extra_input_rows = 0 inputs = () def __new__(cls, dtype, dataset, name): return super(BoundColumn, cls).__new__( cls, domain=dataset.domain, dtype=dtype, dataset=dataset, name=name, ) def _init(self, dataset, name, *args, **kwargs): self._dataset = dataset self._name = name return super(BoundColumn, self)._init(*args, **kwargs) @classmethod def static_identity(cls, dataset, name, *args, **kwargs): return ( super(BoundColumn, cls).static_identity(*args, **kwargs), dataset, name, ) @property def dataset(self): return self._dataset @property def name(self): return self._name @property def qualname(self): """ Fully qualified of this column. """ return '.'.join([self.dataset.__name__, self.name]) @property def latest(self): from zipline.pipeline.factors import Latest return Latest(inputs=(self, ), dtype=self.dtype) def __repr__(self): return "{qualname}::{dtype}".format( qualname=self.qualname, dtype=self.dtype.name, ) def short_repr(self): return self.qualname
def run_graph(self, graph, initial_workspace, mask_sid): initial_workspace.setdefault( AssetExists(), self.asset_exists_masked if mask_sid else self.asset_exists, ) return self.engine.compute_chunk( graph, self.dates, self.assets, initial_workspace, )
def check_output(graph): resolution_order = list(graph.ordered()) self.assertEqual(len(resolution_order), 4) self.check_dependency_order(resolution_order) self.assertIn(AssetExists(), resolution_order) self.assertIn(SomeDataSet.foo, resolution_order) self.assertIn(SomeDataSet.bar, resolution_order) self.assertIn(SomeFactor(), resolution_order) self.assertEqual(graph.node[SomeDataSet.foo]['extra_rows'], 4) self.assertEqual(graph.node[SomeDataSet.bar]['extra_rows'], 4)
def check_output(graph): resolution_order = list(graph.ordered()) self.assertEqual(len(resolution_order), 4) self.assertIs(resolution_order[0], AssetExists()) self.assertEqual( set([resolution_order[1], resolution_order[2]]), set([SomeDataSet.foo, SomeDataSet.bar]), ) self.assertEqual(resolution_order[-1], SomeFactor()) self.assertEqual(graph.node[SomeDataSet.foo]['extra_rows'], 4) self.assertEqual(graph.node[SomeDataSet.bar]['extra_rows'], 4)
def __init__(self, list_symbols, calendar=None, populate_initial_workspace=None): self._list_symbols = list_symbols if calendar is None: calendar = get_calendar('NYSE').all_sessions self._calendar = calendar self._root_mask_term = AssetExists() self._root_mask_dates_term = InputDates() self._populate_initial_workspace = (populate_initial_workspace or default_populate_initial_workspace)
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.ExecutionPlan Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ def get_loader(c): raise AssertionError("run_graph() should not require any loaders!") engine = SimplePipelineEngine( get_loader, self.asset_finder, default_domain=US_EQUITIES, ) if mask is None: mask = self.default_asset_exists_mask dates, sids, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) initial_workspace.setdefault(InputDates(), dates) refcounts = graph.initial_refcounts(initial_workspace) execution_order = graph.execution_order(initial_workspace, refcounts) return engine.compute_chunk( graph=graph, dates=dates, sids=sids, workspace=initial_workspace, execution_order=execution_order, refcounts=refcounts, hooks=NoHooks(), )
def test_single_factor_instance_args(self): """ Test dependency resolution for a single factor with arguments passed to the constructor. """ bar, buzz = SomeDataSet.bar, SomeDataSet.buzz factor = SomeFactor([bar, buzz], window_length=5) graph = self.make_execution_plan(to_dict([factor])) resolution_order = list(graph.ordered()) # SomeFactor, its inputs, and AssetExists() self.assertEqual(len(resolution_order), 4) self.check_dependency_order(resolution_order) self.assertIn(AssetExists(), resolution_order) self.assertEqual(graph.extra_rows[AssetExists()], 4) self.assertIn(bar, resolution_order) self.assertIn(buzz, resolution_order) self.assertIn(SomeFactor([bar, buzz], window_length=5), resolution_order) self.assertEqual(graph.extra_rows[bar], 4) self.assertEqual(graph.extra_rows[buzz], 4)
def __new__(cls, target, regression_length, allowed_missing_percentage=0.25): daily_returns = Returns( window_length=2, mask=(AssetExists() | SingleAsset(asset=target)), ) allowed_missing_count = int(allowed_missing_percentage * regression_length) return super().__new__( cls, inputs=[daily_returns, daily_returns[target]], window_length=regression_length, allowed_missing_count=allowed_missing_count, )
def __new__(cls, target, returns_length, regression_length, mask=NotSpecified): # Use the `SingleAsset` filter here because it protects against # inputting a non-existent target asset. returns = Returns( window_length=returns_length, mask=(AssetExists() | SingleAsset(asset=target)), ) return super().__new__( cls, dependent=returns, independent=returns[target], regression_length=regression_length, mask=mask, )
def __new__(cls, target, returns_length, correlation_length, mask=NotSpecified): # Use the `SingleAsset` filter here because it protects against # inputting a non-existent target asset. returns = Returns( window_length=returns_length, mask=(AssetExists() | SingleAsset(asset=target)), ) return super(RollingSpearmanOfReturns, cls).__new__( cls, base_factor=returns, target=returns[target], correlation_length=correlation_length, mask=mask, )
def __new__(cls, target, returns_length, regression_length, mask=NotSpecified, **kwargs): if mask is NotSpecified: mask = AssetExists() # Make sure we do not filter out the asset of interest. mask = mask | SingleAsset(asset=target) return super(RollingLinearRegressionOfReturns, cls).__new__( cls, target=target, inputs=[Returns(window_length=returns_length)], window_length=regression_length, mask=mask, **kwargs)
def check_output(graph): resolution_order = list(graph.ordered()) # Loadable terms should get specialized during graph construction. specialized_foo = SomeDataSet.foo.specialize(self.DOMAIN) specialized_bar = SomeDataSet.foo.specialize(self.DOMAIN) self.assertEqual(len(resolution_order), 4) self.check_dependency_order(resolution_order) self.assertIn(AssetExists(), resolution_order) self.assertIn(specialized_foo, resolution_order) self.assertIn(specialized_bar, resolution_order) self.assertIn(SomeFactor(), resolution_order) self.assertEqual( graph.graph.node[specialized_foo]['extra_rows'], 4, ) self.assertEqual( graph.graph.node[specialized_bar]['extra_rows'], 4, )
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.TermGraph Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ engine = SimplePipelineEngine( lambda column: ExplodingObject(), self.nyse_sessions, self.asset_finder, ) if mask is None: mask = self.default_asset_exists_mask dates, assets, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) initial_workspace.setdefault(InputDates(), dates) return engine.compute_chunk( graph, dates, assets, initial_workspace, )
class BoundColumn(LoadableTerm): """ A column of data that's been concretely bound to a particular dataset. Instances of this class are dynamically created upon access to attributes of DataSets (for example, USEquityPricing.close is an instance of this class). Attributes ---------- dtype : numpy.dtype The dtype of data produced when this column is loaded. latest : zipline.pipeline.data.Factor or zipline.pipeline.data.Filter A Filter, Factor, or Classifier computing the most recently known value of this column on each date. Produces a Filter if self.dtype == ``np.bool_``. Produces a Classifier if self.dtype == ``np.int64`` Otherwise produces a Factor. dataset : zipline.pipeline.data.DataSet The dataset to which this column is bound. name : str The name of this column. metadata : dict Extra metadata associated with this column. """ mask = AssetExists() window_safe = True def __new__(cls, dtype, missing_value, dataset, name, doc, metadata): return super(BoundColumn, cls).__new__( cls, domain=dataset.domain, dtype=dtype, missing_value=missing_value, dataset=dataset, name=name, ndim=dataset.ndim, doc=doc, metadata=metadata, ) def _init(self, dataset, name, doc, metadata, *args, **kwargs): self._dataset = dataset self._name = name self.__doc__ = doc self._metadata = metadata return super(BoundColumn, self)._init(*args, **kwargs) @classmethod def _static_identity(cls, dataset, name, doc, metadata, *args, **kwargs): return ( super(BoundColumn, cls)._static_identity(*args, **kwargs), dataset, name, doc, frozenset(sorted(metadata.items(), key=first)), ) @property def dataset(self): """ The dataset to which this column is bound. """ return self._dataset @property def name(self): """ The name of this column. """ return self._name @property def metadata(self): """ A copy of the metadata for this column. """ return self._metadata.copy() @property def qualname(self): """ The fully-qualified name of this column. Generated by doing '.'.join([self.dataset.__name__, self.name]). """ return '.'.join([self.dataset.__name__, self.name]) @property def latest(self): dtype = self.dtype if dtype in Filter.ALLOWED_DTYPES: Latest = LatestFilter elif dtype in Classifier.ALLOWED_DTYPES: Latest = LatestClassifier else: assert dtype in Factor.ALLOWED_DTYPES, "Unknown dtype %s." % dtype Latest = LatestFactor return Latest( inputs=(self,), dtype=dtype, missing_value=self.missing_value, ndim=self.ndim, ) def __repr__(self): return "{qualname}::{dtype}".format( qualname=self.qualname, dtype=self.dtype.name, ) def short_repr(self): """Short repr to use when rendering Pipeline graphs.""" return self.qualname
def _render(g, out, format_, include_asset_exists=False): """ Draw `g` as a graph to `out`, in format `format`. Parameters ---------- g : zipline.pipeline.graph.TermGraph Graph to render. out : file-like object format_ : str {'png', 'svg'} Output format. include_asset_exists : bool Whether to filter out `AssetExists()` nodes. """ graph_attrs = {"rankdir": "TB", "splines": "ortho"} cluster_attrs = {"style": "filled", "color": "lightgoldenrod1"} in_nodes = g.loadable_terms out_nodes = list(g.outputs.values()) f = BytesIO() with graph(f, "G", **graph_attrs): # Write outputs cluster. with cluster(f, "Output", labelloc="b", **cluster_attrs): for term in filter_nodes(include_asset_exists, out_nodes): add_term_node(f, term) # Write inputs cluster. with cluster(f, "Input", **cluster_attrs): for term in filter_nodes(include_asset_exists, in_nodes): add_term_node(f, term) # Write intermediate results. for term in filter_nodes(include_asset_exists, topological_sort(g.graph)): if term in in_nodes or term in out_nodes: continue add_term_node(f, term) # Write edges for source, dest in g.graph.edges(): if source is AssetExists() and not include_asset_exists: continue add_edge(f, id(source), id(dest)) cmd = ["dot", "-T", format_] try: proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) except OSError as e: if e.errno == errno.ENOENT: raise RuntimeError( "Couldn't find `dot` graph layout program. " "Make sure Graphviz is installed and `dot` is on your path." ) else: raise f.seek(0) proc_stdout, proc_stderr = proc.communicate(f.read()) if proc_stderr: raise RuntimeError( "Error(s) while rendering graph: %s" % proc_stderr.decode("utf-8") ) out.write(proc_stdout)
class BoundColumn(LoadableTerm): """ A column of data that's been concretely bound to a particular dataset. Attributes ---------- dtype : numpy.dtype The dtype of data produced when this column is loaded. latest : zipline.pipeline.LoadableTerm A :class:`~zipline.pipeline.Filter`, :class:`~zipline.pipeline.Factor`, or :class:`~zipline.pipeline.Classifier` computing the most recently known value of this column on each date. See :class:`zipline.pipeline.mixins.LatestMixin` for more details. dataset : zipline.pipeline.data.DataSet The dataset to which this column is bound. name : str The name of this column. metadata : dict Extra metadata associated with this column. Notes ----- Instances of this class are dynamically created upon access to attributes of :class:`~zipline.pipeline.data.DataSet`. For example, :attr:`~zipline.pipeline.data.EquityPricing.close` is an instance of this class. Pipeline API users should never construct instances of this directly. """ mask = AssetExists() window_safe = True def __new__(cls, dtype, missing_value, dataset, name, doc, metadata): return super(BoundColumn, cls).__new__( cls, domain=dataset.domain, dtype=dtype, missing_value=missing_value, dataset=dataset, name=name, ndim=dataset.ndim, doc=doc, metadata=metadata, ) def _init(self, dataset, name, doc, metadata, *args, **kwargs): self._dataset = dataset self._name = name self.__doc__ = doc self._metadata = metadata return super(BoundColumn, self)._init(*args, **kwargs) @classmethod def _static_identity(cls, dataset, name, doc, metadata, *args, **kwargs): return ( super(BoundColumn, cls)._static_identity(*args, **kwargs), dataset, name, doc, frozenset(sorted(metadata.items(), key=first)), ) def specialize(self, domain): """Specialize ``self`` to a concrete domain. """ if domain == self.domain: return self return type(self)( dtype=self.dtype, missing_value=self.missing_value, dataset=self._dataset.specialize(domain), name=self._name, doc=self.__doc__, metadata=self._metadata, ) def unspecialize(self): """ Unspecialize a column to its generic form. This is equivalent to ``column.specialize(GENERIC)``. """ return self.specialize(GENERIC) @property def dataset(self): """ The dataset to which this column is bound. """ return self._dataset @property def name(self): """ The name of this column. """ return self._name @property def metadata(self): """ A copy of the metadata for this column. """ return self._metadata.copy() @property def qualname(self): """ The fully-qualified name of this column. Generated by doing '.'.join([self.dataset.__name__, self.name]). """ return '.'.join([self.dataset.qualname, self.name]) @property def latest(self): dtype = self.dtype if dtype in Filter.ALLOWED_DTYPES: Latest = LatestFilter elif dtype in Classifier.ALLOWED_DTYPES: Latest = LatestClassifier else: assert dtype in Factor.ALLOWED_DTYPES, "Unknown dtype %s." % dtype Latest = LatestFactor return Latest( inputs=(self, ), dtype=dtype, missing_value=self.missing_value, ndim=self.ndim, ) def __repr__(self): return "{qualname}::{dtype}".format( qualname=self.qualname, dtype=self.dtype.name, ) def graph_repr(self): """Short repr to use when rendering Pipeline graphs.""" # Graphviz interprets `\l` as "divide label into lines, left-justified" return "BoundColumn:\\l Dataset: {}\\l Column: {}\\l".format( self.dataset.__name__, self.name) def recursive_repr(self): """Short repr used to render in recursive contexts.""" return self.qualname
def verify_trace(self, trace, pipeline_start_date, pipeline_end_date, expected_loads, expected_computes, expected_chunks): # Percent complete should be monotonically increasing through the whole # execution. for before, after in toolz.sliding_window(2, trace): self.assertGreaterEqual( after.percent_complete, before.percent_complete, ) # First publish should contain precomputed terms from first chunk. first = trace[0] expected_first = TestingProgressPublisher.TraceState( state='loading', percent_complete=instance_of(float), execution_bounds=(pipeline_start_date, pipeline_end_date), current_chunk_bounds=expected_chunks[0], current_work=instance_of(list)) self.assertEqual(first, expected_first) self.assertGreater(first.percent_complete, 0.0) self.assertEqual( set(first.current_work), {AssetExists(), PREPOPULATED_TERM}, ) # Last publish should have a state of success and be 100% complete. last = trace[-1] expected_last = TestingProgressPublisher.TraceState( state='success', percent_complete=100.0, execution_bounds=(pipeline_start_date, pipeline_end_date), current_chunk_bounds=expected_chunks[-1], # We don't know what the last work item will be, but it must be an # instance of a single ComputableTerm, because we only run # ComputableTerms one at a time, and a LoadableTerm will only be in # the graph if some ComputableTerm depends on it. current_work=[instance_of(ComputableTerm)], ) self.assertEqual(last, expected_last) # Remaining updates should all be loads or computes. middle = trace[1:-1] for update in middle: self.assertIsInstance(update.current_work, list) if update.state == 'loading': for term in update.current_work: self.assertIsInstance( term, (LoadableTerm, AssetExists, PrepopulatedFactor), ) elif update.state == 'computing': for term in update.current_work: self.assertIsInstance(term, ComputableTerm) else: raise AssertionError( "Unexpected state: {}".format(update.state), ) # Break up the remaining updates by chunk. all_chunks = [] grouped = itertools.groupby(middle, attrgetter('current_chunk_bounds')) for (chunk_start, chunk_stop), chunk_trace in grouped: all_chunks.append((chunk_start, chunk_stop)) chunk_trace = list(chunk_trace) expected_end_progress = self.expected_chunk_progress( pipeline_start_date, pipeline_end_date, chunk_stop, ) end_progress = chunk_trace[-1].percent_complete assert_almost_equal(end_progress, expected_end_progress) self.assertEqual(all_chunks, expected_chunks)
class BoundColumn(LoadableTerm): """ A column of data that's been concretely bound to a particular dataset. Attributes ---------- dtype : numpy.dtype The dtype of data produced when this column is loaded. latest : zipline.pipeline.LoadableTerm A :class:`~zipline.pipeline.Filter`, :class:`~zipline.pipeline.Factor`, or :class:`~zipline.pipeline.Classifier` computing the most recently known value of this column on each date. See :class:`zipline.pipeline.mixins.LatestMixin` for more details. dataset : zipline.pipeline.data.DataSet The dataset to which this column is bound. name : str The name of this column. metadata : dict Extra metadata associated with this column. currency_aware : bool Whether or not this column produces currency-denominated data. Notes ----- Instances of this class are dynamically created upon access to attributes of :class:`~zipline.pipeline.data.DataSet`. For example, :attr:`~zipline.pipeline.data.EquityPricing.close` is an instance of this class. Pipeline API users should never construct instances of this directly. """ mask = AssetExists() window_safe = True def __new__(cls, dtype, missing_value, dataset, name, doc, metadata, currency_conversion, currency_aware): if currency_aware and dtype != float64_dtype: raise AssertionError( 'The {} column on dataset {} cannot be constructed with ' 'currency_aware={}, dtype={}. Currency aware columns must ' 'have a float64 dtype.'.format( name, dataset, currency_aware, dtype, )) return super(BoundColumn, cls).__new__( cls, domain=dataset.domain, dtype=dtype, missing_value=missing_value, dataset=dataset, name=name, ndim=dataset.ndim, doc=doc, metadata=metadata, currency_conversion=currency_conversion, currency_aware=currency_aware, ) def _init(self, dataset, name, doc, metadata, currency_conversion, currency_aware, *args, **kwargs): self._dataset = dataset self._name = name self.__doc__ = doc self._metadata = metadata self._currency_conversion = currency_conversion self._currency_aware = currency_aware return super(BoundColumn, self)._init(*args, **kwargs) @classmethod def _static_identity(cls, dataset, name, doc, metadata, currency_conversion, currency_aware, *args, **kwargs): return ( super(BoundColumn, cls)._static_identity(*args, **kwargs), dataset, name, doc, frozenset(sorted(metadata.items(), key=first)), currency_conversion, currency_aware, ) def __lt__(self, other): msg = "Can't compare '{}' with '{}'. (Did you mean to use '.latest'?)" raise TypeError(msg.format(self.qualname, other.__class__.__name__)) __gt__ = __le__ = __ge__ = __lt__ def _replace(self, **kwargs): kw = dict( dtype=self.dtype, missing_value=self.missing_value, dataset=self._dataset, name=self._name, doc=self.__doc__, metadata=self._metadata, currency_conversion=self._currency_conversion, currency_aware=self._currency_aware, ) kw.update(kwargs) return type(self)(**kw) def specialize(self, domain): """Specialize ``self`` to a concrete domain. """ if domain == self.domain: return self return self._replace(dataset=self._dataset.specialize(domain)) def unspecialize(self): """ Unspecialize a column to its generic form. This is equivalent to ``column.specialize(GENERIC)``. """ return self.specialize(GENERIC) @coerce_types(currency=(str, Currency)) def fx(self, currency): """ Construct a currency-converted version of this column. Parameters ---------- currency : str or zipline.currency.Currency Currency into which to convert this column's data. Returns ------- column : BoundColumn Column producing the same data as ``self``, but currency-converted into ``currency``. """ conversion = self._currency_conversion if not self._currency_aware: raise TypeError( 'The .fx() method cannot be called on {} because it does not ' 'produce currency-denominated data.'.format(self.qualname)) elif conversion is not None and conversion.currency == currency: return self return self._replace(currency_conversion=CurrencyConversion( currency=currency, field=DEFAULT_FX_RATE, )) @property def currency_conversion(self): """Specification for currency conversions applied for this term. """ return self._currency_conversion @property def currency_aware(self): """ Whether or not this column produces currency-denominated data. """ return self._currency_aware @property def dataset(self): """ The dataset to which this column is bound. """ return self._dataset @property def name(self): """ The name of this column. """ return self._name @property def metadata(self): """ A copy of the metadata for this column. """ return self._metadata.copy() @property def qualname(self): """The fully-qualified name of this column. """ out = '.'.join([self.dataset.qualname, self.name]) conversion = self._currency_conversion if conversion is not None: out += '.fx({!r})'.format(conversion.currency.code) return out @property def latest(self): dtype = self.dtype if dtype in Filter.ALLOWED_DTYPES: Latest = LatestFilter elif dtype in Classifier.ALLOWED_DTYPES: Latest = LatestClassifier else: assert dtype in Factor.ALLOWED_DTYPES, "Unknown dtype %s." % dtype Latest = LatestFactor return Latest( inputs=(self, ), dtype=dtype, missing_value=self.missing_value, ndim=self.ndim, ) def __repr__(self): return "{qualname}::{dtype}".format( qualname=self.qualname, dtype=self.dtype.name, ) def graph_repr(self): """Short repr to use when rendering Pipeline graphs.""" # Graphviz interprets `\l` as "divide label into lines, left-justified" return "BoundColumn:\\l Dataset: {}\\l Column: {}\\l".format( self.dataset.__name__, self.name) def recursive_repr(self): """Short repr used to render in recursive contexts.""" return self.qualname
def filter_nodes(include_asset_exists, nodes): if include_asset_exists: return nodes return filter(lambda n: n is not AssetExists(), nodes)
def _render(g, out, format_, include_asset_exists=False): """ Draw `g` as a graph to `out`, in format `format`. Parameters ---------- g : zipline.pipeline.graph.TermGraph Graph to render. out : file-like object format_ : str {'png', 'svg'} Output format. include_asset_exists : bool Whether to filter out `AssetExists()` nodes. """ graph_attrs = {'rankdir': 'TB', 'splines': 'ortho'} cluster_attrs = {'style': 'filled', 'color': 'lightgoldenrod1'} in_nodes = list(node for node in g if node.atomic) out_nodes = list(g.outputs.values()) f = BytesIO() with graph(f, "G", **graph_attrs): # Write outputs cluster. with cluster(f, 'Output', labelloc='b', **cluster_attrs): for term in out_nodes: add_term_node(f, term) # Write inputs cluster. with cluster(f, 'Input', **cluster_attrs): for term in in_nodes: if term is AssetExists() and not include_asset_exists: continue add_term_node(f, term) # Write intermediate results. for term in topological_sort(g): if term in in_nodes or term in out_nodes: continue add_term_node(f, term) # Write edges for source, dest in g.edges(): if source is AssetExists() and not include_asset_exists: continue add_edge(f, id(source), id(dest)) cmd = ['dot', '-T', format_] try: proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) except OSError as e: if e.errno == errno.ENOENT: raise RuntimeError( "Couldn't find `dot` graph layout program. " "Make sure Graphviz is installed and `dot` is on your path.") else: raise f.seek(0) proc_stdout, proc_stderr = proc.communicate(f.read()) if proc_stderr: raise RuntimeError("Error(s) while rendering graph: %s" % proc_stderr.decode('utf-8')) out.write(proc_stdout)
class BoundColumn(LoadableTerm): """ A column of data that's been concretely bound to a particular dataset. Instances of this class are dynamically created upon access to attributes of DataSets. Attributes ---------- dtype : numpy.dtype The dtype of data produced when this column is loaded. latest : zipline.pipeline.data.Factor or zipline.pipeline.data.Filter A Filter/Factor computing the most recently known value of this column on each date. Produces a Filter if self.dtype == ``np.bool_``, otherwise produces a Factor. dataset : zipline.pipeline.data.DataSet The dataset to which this column is bound. name : str The name of this column. """ mask = AssetExists() extra_input_rows = 0 inputs = () def __new__(cls, dtype, missing_value, dataset, name): return super(BoundColumn, cls).__new__( cls, domain=dataset.domain, dtype=dtype, missing_value=missing_value, dataset=dataset, name=name, ) def _init(self, dataset, name, *args, **kwargs): self._dataset = dataset self._name = name return super(BoundColumn, self)._init(*args, **kwargs) @classmethod def static_identity(cls, dataset, name, *args, **kwargs): return ( super(BoundColumn, cls).static_identity(*args, **kwargs), dataset, name, ) @property def dataset(self): """ The dataset to which this column is bound. """ return self._dataset @property def name(self): """ The name of this column. """ return self._name @property def qualname(self): """ The fully-qualified of this column. Generated by doing '.'.join([self.dataset.__name__, self.name]). """ return '.'.join([self.dataset.__name__, self.name]) @property def latest(self): if self.dtype == bool_dtype: from zipline.pipeline.filters import Latest else: from zipline.pipeline.factors import Latest return Latest( inputs=(self, ), dtype=self.dtype, missing_value=self.missing_value, ) def __repr__(self): return "{qualname}::{dtype}".format( qualname=self.qualname, dtype=self.dtype.name, ) def short_repr(self): return self.qualname