示例#1
0
    def test_single_factor_instance_args(self):
        """
        Test dependency resolution for a single factor with arguments passed to
        the constructor.
        """
        bar, buzz = SomeDataSet.bar, SomeDataSet.buzz

        factor = SomeFactor([bar, buzz], window_length=5)
        graph = self.make_execution_plan(to_dict([factor]))

        resolution_order = list(graph.ordered())

        # SomeFactor, its inputs, and AssetExists()
        self.assertEqual(len(resolution_order), 4)
        self.check_dependency_order(resolution_order)
        self.assertIn(AssetExists(), resolution_order)
        self.assertEqual(graph.extra_rows[AssetExists()], 4)

        # LoadableTerms should be specialized to our domain in the execution
        # order.
        self.assertIn(bar.specialize(self.DOMAIN), resolution_order)
        self.assertIn(buzz.specialize(self.DOMAIN), resolution_order)

        # ComputableTerms don't yet have a notion of specialization, so they
        # shouldn't appear unchanged in the execution order.
        self.assertIn(SomeFactor([bar, buzz], window_length=5),
                      resolution_order)

        self.assertEqual(graph.extra_rows[bar.specialize(self.DOMAIN)], 4)
        self.assertEqual(graph.extra_rows[bar.specialize(self.DOMAIN)], 4)
示例#2
0
    def test_single_factor_instance_args(self):
        """
        Test dependency resolution for a single factor with arguments passed to
        the constructor.
        """
        bar, buzz = SomeDataSet.bar, SomeDataSet.buzz
        graph = TermGraph(to_dict([SomeFactor([bar, buzz], window_length=5)]))

        resolution_order = list(graph.ordered())

        # SomeFactor, its inputs, and AssetExists()
        self.assertEqual(len(resolution_order), 4)

        self.assertIs(resolution_order[0], AssetExists())
        self.assertEqual(graph.extra_rows[AssetExists()], 4)

        self.assertEqual(
            set([resolution_order[1], resolution_order[2]]),
            set([bar, buzz]),
        )
        self.assertEqual(
            resolution_order[-1],
            SomeFactor([bar, buzz], window_length=5),
        )
        self.assertEqual(graph.extra_rows[bar], 4)
        self.assertEqual(graph.extra_rows[buzz], 4)
示例#3
0
class BoundColumn(Term):
    """
    A Column of data that's been concretely bound to a particular dataset.
    """
    mask = AssetExists()
    extra_input_rows = 0
    inputs = ()

    def __new__(cls, dtype, dataset, name):
        return super(BoundColumn, cls).__new__(
            cls,
            domain=dataset.domain,
            dtype=dtype,
            dataset=dataset,
            name=name,
        )

    def _init(self, dataset, name, *args, **kwargs):
        self._dataset = dataset
        self._name = name
        return super(BoundColumn, self)._init(*args, **kwargs)

    @classmethod
    def static_identity(cls, dataset, name, *args, **kwargs):
        return (
            super(BoundColumn, cls).static_identity(*args, **kwargs),
            dataset,
            name,
        )

    @property
    def dataset(self):
        return self._dataset

    @property
    def name(self):
        return self._name

    @property
    def qualname(self):
        """
        Fully qualified of this column.
        """
        return '.'.join([self.dataset.__name__, self.name])

    @property
    def latest(self):
        # FIXME: Once we support non-float dtypes, this should pass a dtype
        # along.  Right now we're just assuming that inputs will safely coerce
        # to float.
        return Latest(inputs=(self, ))

    def __repr__(self):
        return "{qualname}::{dtype}".format(
            qualname=self.qualname,
            dtype=self.dtype.__name__,
        )

    def short_repr(self):
        return self.qualname
示例#4
0
    def test_reuse_atomic_terms(self):
        """
        Test that raw inputs only show up in the dependency graph once.
        """
        f1 = SomeFactor([SomeDataSet.foo, SomeDataSet.bar])
        f2 = SomeOtherFactor([SomeDataSet.bar, SomeDataSet.buzz])

        graph = TermGraph(to_dict([f1, f2]))
        resolution_order = list(graph.ordered())

        # bar should only appear once.
        self.assertEqual(len(resolution_order), 6)
        indices = {
            term: resolution_order.index(term)
            for term in resolution_order
        }

        self.assertEqual(indices[AssetExists()], 0)

        # Verify that f1's dependencies will be computed before f1.
        self.assertLess(indices[SomeDataSet.foo], indices[f1])
        self.assertLess(indices[SomeDataSet.bar], indices[f1])

        # Verify that f2's dependencies will be computed before f2.
        self.assertLess(indices[SomeDataSet.bar], indices[f2])
        self.assertLess(indices[SomeDataSet.buzz], indices[f2])
示例#5
0
class BoundColumn(Term):
    """
    A Column of data that's been concretely bound to a particular dataset.
    """
    mask = AssetExists()
    extra_input_rows = 0
    inputs = ()

    def __new__(cls, dtype, dataset, name):
        return super(BoundColumn, cls).__new__(
            cls,
            domain=dataset.domain,
            dtype=dtype,
            dataset=dataset,
            name=name,
        )

    def _init(self, dataset, name, *args, **kwargs):
        self._dataset = dataset
        self._name = name
        return super(BoundColumn, self)._init(*args, **kwargs)

    @classmethod
    def static_identity(cls, dataset, name, *args, **kwargs):
        return (
            super(BoundColumn, cls).static_identity(*args, **kwargs),
            dataset,
            name,
        )

    @property
    def dataset(self):
        return self._dataset

    @property
    def name(self):
        return self._name

    @property
    def qualname(self):
        """
        Fully qualified of this column.
        """
        return '.'.join([self.dataset.__name__, self.name])

    @property
    def latest(self):
        from zipline.pipeline.factors import Latest
        return Latest(inputs=(self, ), dtype=self.dtype)

    def __repr__(self):
        return "{qualname}::{dtype}".format(
            qualname=self.qualname,
            dtype=self.dtype.name,
        )

    def short_repr(self):
        return self.qualname
示例#6
0
 def run_graph(self, graph, initial_workspace, mask_sid):
     initial_workspace.setdefault(
         AssetExists(),
         self.asset_exists_masked if mask_sid else self.asset_exists,
     )
     return self.engine.compute_chunk(
         graph,
         self.dates,
         self.assets,
         initial_workspace,
     )
示例#7
0
        def check_output(graph):

            resolution_order = list(graph.ordered())

            self.assertEqual(len(resolution_order), 4)
            self.check_dependency_order(resolution_order)
            self.assertIn(AssetExists(), resolution_order)
            self.assertIn(SomeDataSet.foo, resolution_order)
            self.assertIn(SomeDataSet.bar, resolution_order)
            self.assertIn(SomeFactor(), resolution_order)

            self.assertEqual(graph.node[SomeDataSet.foo]['extra_rows'], 4)
            self.assertEqual(graph.node[SomeDataSet.bar]['extra_rows'], 4)
示例#8
0
        def check_output(graph):

            resolution_order = list(graph.ordered())

            self.assertEqual(len(resolution_order), 4)
            self.assertIs(resolution_order[0], AssetExists())
            self.assertEqual(
                set([resolution_order[1], resolution_order[2]]),
                set([SomeDataSet.foo, SomeDataSet.bar]),
            )
            self.assertEqual(resolution_order[-1], SomeFactor())
            self.assertEqual(graph.node[SomeDataSet.foo]['extra_rows'], 4)
            self.assertEqual(graph.node[SomeDataSet.bar]['extra_rows'], 4)
示例#9
0
    def __init__(self,
                 list_symbols,
                 calendar=None,
                 populate_initial_workspace=None):
        self._list_symbols = list_symbols
        if calendar is None:
            calendar = get_calendar('NYSE').all_sessions
        self._calendar = calendar

        self._root_mask_term = AssetExists()
        self._root_mask_dates_term = InputDates()

        self._populate_initial_workspace = (populate_initial_workspace or
                                            default_populate_initial_workspace)
示例#10
0
    def run_graph(self, graph, initial_workspace, mask=None):
        """
        Compute the given TermGraph, seeding the workspace of our engine with
        `initial_workspace`.

        Parameters
        ----------
        graph : zipline.pipeline.graph.ExecutionPlan
            Graph to run.
        initial_workspace : dict
            Initial workspace to forward to SimplePipelineEngine.compute_chunk.
        mask : DataFrame, optional
            This is a value to pass to `initial_workspace` as the mask from
            `AssetExists()`.  Defaults to a frame of shape `self.default_shape`
            containing all True values.

        Returns
        -------
        results : dict
            Mapping from termname -> computed result.
        """
        def get_loader(c):
            raise AssertionError("run_graph() should not require any loaders!")

        engine = SimplePipelineEngine(
            get_loader,
            self.asset_finder,
            default_domain=US_EQUITIES,
        )
        if mask is None:
            mask = self.default_asset_exists_mask

        dates, sids, mask_values = explode(mask)

        initial_workspace.setdefault(AssetExists(), mask_values)
        initial_workspace.setdefault(InputDates(), dates)

        refcounts = graph.initial_refcounts(initial_workspace)
        execution_order = graph.execution_order(initial_workspace, refcounts)

        return engine.compute_chunk(
            graph=graph,
            dates=dates,
            sids=sids,
            workspace=initial_workspace,
            execution_order=execution_order,
            refcounts=refcounts,
            hooks=NoHooks(),
        )
示例#11
0
    def test_single_factor_instance_args(self):
        """
        Test dependency resolution for a single factor with arguments passed to
        the constructor.
        """
        bar, buzz = SomeDataSet.bar, SomeDataSet.buzz

        factor = SomeFactor([bar, buzz], window_length=5)
        graph = self.make_execution_plan(to_dict([factor]))

        resolution_order = list(graph.ordered())

        # SomeFactor, its inputs, and AssetExists()
        self.assertEqual(len(resolution_order), 4)
        self.check_dependency_order(resolution_order)
        self.assertIn(AssetExists(), resolution_order)
        self.assertEqual(graph.extra_rows[AssetExists()], 4)

        self.assertIn(bar, resolution_order)
        self.assertIn(buzz, resolution_order)
        self.assertIn(SomeFactor([bar, buzz], window_length=5),
                      resolution_order)
        self.assertEqual(graph.extra_rows[bar], 4)
        self.assertEqual(graph.extra_rows[buzz], 4)
示例#12
0
 def __new__(cls,
             target,
             regression_length,
             allowed_missing_percentage=0.25):
     daily_returns = Returns(
         window_length=2,
         mask=(AssetExists() | SingleAsset(asset=target)),
     )
     allowed_missing_count = int(allowed_missing_percentage *
                                 regression_length)
     return super().__new__(
         cls,
         inputs=[daily_returns, daily_returns[target]],
         window_length=regression_length,
         allowed_missing_count=allowed_missing_count,
     )
示例#13
0
 def __new__(cls,
             target,
             returns_length,
             regression_length,
             mask=NotSpecified):
     # Use the `SingleAsset` filter here because it protects against
     # inputting a non-existent target asset.
     returns = Returns(
         window_length=returns_length,
         mask=(AssetExists() | SingleAsset(asset=target)),
     )
     return super().__new__(
         cls,
         dependent=returns,
         independent=returns[target],
         regression_length=regression_length,
         mask=mask,
     )
示例#14
0
 def __new__(cls,
             target,
             returns_length,
             correlation_length,
             mask=NotSpecified):
     # Use the `SingleAsset` filter here because it protects against
     # inputting a non-existent target asset.
     returns = Returns(
         window_length=returns_length,
         mask=(AssetExists() | SingleAsset(asset=target)),
     )
     return super(RollingSpearmanOfReturns, cls).__new__(
         cls,
         base_factor=returns,
         target=returns[target],
         correlation_length=correlation_length,
         mask=mask,
     )
示例#15
0
    def __new__(cls,
                target,
                returns_length,
                regression_length,
                mask=NotSpecified,
                **kwargs):
        if mask is NotSpecified:
            mask = AssetExists()

        # Make sure we do not filter out the asset of interest.
        mask = mask | SingleAsset(asset=target)

        return super(RollingLinearRegressionOfReturns, cls).__new__(
            cls,
            target=target,
            inputs=[Returns(window_length=returns_length)],
            window_length=regression_length,
            mask=mask,
            **kwargs)
示例#16
0
        def check_output(graph):

            resolution_order = list(graph.ordered())

            # Loadable terms should get specialized during graph construction.
            specialized_foo = SomeDataSet.foo.specialize(self.DOMAIN)
            specialized_bar = SomeDataSet.foo.specialize(self.DOMAIN)

            self.assertEqual(len(resolution_order), 4)
            self.check_dependency_order(resolution_order)
            self.assertIn(AssetExists(), resolution_order)
            self.assertIn(specialized_foo, resolution_order)
            self.assertIn(specialized_bar, resolution_order)
            self.assertIn(SomeFactor(), resolution_order)

            self.assertEqual(
                graph.graph.node[specialized_foo]['extra_rows'], 4,
            )
            self.assertEqual(
                graph.graph.node[specialized_bar]['extra_rows'], 4,
            )
示例#17
0
    def run_graph(self, graph, initial_workspace, mask=None):
        """
        Compute the given TermGraph, seeding the workspace of our engine with
        `initial_workspace`.

        Parameters
        ----------
        graph : zipline.pipeline.graph.TermGraph
            Graph to run.
        initial_workspace : dict
            Initial workspace to forward to SimplePipelineEngine.compute_chunk.
        mask : DataFrame, optional
            This is a value to pass to `initial_workspace` as the mask from
            `AssetExists()`.  Defaults to a frame of shape `self.default_shape`
            containing all True values.

        Returns
        -------
        results : dict
            Mapping from termname -> computed result.
        """
        engine = SimplePipelineEngine(
            lambda column: ExplodingObject(),
            self.nyse_sessions,
            self.asset_finder,
        )
        if mask is None:
            mask = self.default_asset_exists_mask

        dates, assets, mask_values = explode(mask)

        initial_workspace.setdefault(AssetExists(), mask_values)
        initial_workspace.setdefault(InputDates(), dates)

        return engine.compute_chunk(
            graph,
            dates,
            assets,
            initial_workspace,
        )
示例#18
0
class BoundColumn(LoadableTerm):
    """
    A column of data that's been concretely bound to a particular dataset.

    Instances of this class are dynamically created upon access to attributes
    of DataSets (for example, USEquityPricing.close is an instance of this
    class).

    Attributes
    ----------
    dtype : numpy.dtype
        The dtype of data produced when this column is loaded.
    latest : zipline.pipeline.data.Factor or zipline.pipeline.data.Filter
        A Filter, Factor, or Classifier computing the most recently known value
        of this column on each date.

        Produces a Filter if self.dtype == ``np.bool_``.
        Produces a Classifier if self.dtype == ``np.int64``
        Otherwise produces a Factor.
    dataset : zipline.pipeline.data.DataSet
        The dataset to which this column is bound.
    name : str
        The name of this column.
    metadata : dict
        Extra metadata associated with this column.
    """
    mask = AssetExists()
    window_safe = True

    def __new__(cls, dtype, missing_value, dataset, name, doc, metadata):
        return super(BoundColumn, cls).__new__(
            cls,
            domain=dataset.domain,
            dtype=dtype,
            missing_value=missing_value,
            dataset=dataset,
            name=name,
            ndim=dataset.ndim,
            doc=doc,
            metadata=metadata,
        )

    def _init(self, dataset, name, doc, metadata, *args, **kwargs):
        self._dataset = dataset
        self._name = name
        self.__doc__ = doc
        self._metadata = metadata
        return super(BoundColumn, self)._init(*args, **kwargs)

    @classmethod
    def _static_identity(cls, dataset, name, doc, metadata, *args, **kwargs):
        return (
            super(BoundColumn, cls)._static_identity(*args, **kwargs),
            dataset,
            name,
            doc,
            frozenset(sorted(metadata.items(), key=first)),
        )

    @property
    def dataset(self):
        """
        The dataset to which this column is bound.
        """
        return self._dataset

    @property
    def name(self):
        """
        The name of this column.
        """
        return self._name

    @property
    def metadata(self):
        """
        A copy of the metadata for this column.
        """
        return self._metadata.copy()

    @property
    def qualname(self):
        """
        The fully-qualified name of this column.

        Generated by doing '.'.join([self.dataset.__name__, self.name]).
        """
        return '.'.join([self.dataset.__name__, self.name])

    @property
    def latest(self):
        dtype = self.dtype
        if dtype in Filter.ALLOWED_DTYPES:
            Latest = LatestFilter
        elif dtype in Classifier.ALLOWED_DTYPES:
            Latest = LatestClassifier
        else:
            assert dtype in Factor.ALLOWED_DTYPES, "Unknown dtype %s." % dtype
            Latest = LatestFactor

        return Latest(
            inputs=(self,),
            dtype=dtype,
            missing_value=self.missing_value,
            ndim=self.ndim,
        )

    def __repr__(self):
        return "{qualname}::{dtype}".format(
            qualname=self.qualname,
            dtype=self.dtype.name,
        )

    def short_repr(self):
        """Short repr to use when rendering Pipeline graphs."""
        return self.qualname
示例#19
0
def _render(g, out, format_, include_asset_exists=False):
    """
    Draw `g` as a graph to `out`, in format `format`.

    Parameters
    ----------
    g : zipline.pipeline.graph.TermGraph
        Graph to render.
    out : file-like object
    format_ : str {'png', 'svg'}
        Output format.
    include_asset_exists : bool
        Whether to filter out `AssetExists()` nodes.
    """
    graph_attrs = {"rankdir": "TB", "splines": "ortho"}
    cluster_attrs = {"style": "filled", "color": "lightgoldenrod1"}

    in_nodes = g.loadable_terms
    out_nodes = list(g.outputs.values())

    f = BytesIO()
    with graph(f, "G", **graph_attrs):

        # Write outputs cluster.
        with cluster(f, "Output", labelloc="b", **cluster_attrs):
            for term in filter_nodes(include_asset_exists, out_nodes):
                add_term_node(f, term)

        # Write inputs cluster.
        with cluster(f, "Input", **cluster_attrs):
            for term in filter_nodes(include_asset_exists, in_nodes):
                add_term_node(f, term)

        # Write intermediate results.
        for term in filter_nodes(include_asset_exists, topological_sort(g.graph)):
            if term in in_nodes or term in out_nodes:
                continue
            add_term_node(f, term)

        # Write edges
        for source, dest in g.graph.edges():
            if source is AssetExists() and not include_asset_exists:
                continue
            add_edge(f, id(source), id(dest))

    cmd = ["dot", "-T", format_]
    try:
        proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
    except OSError as e:
        if e.errno == errno.ENOENT:
            raise RuntimeError(
                "Couldn't find `dot` graph layout program. "
                "Make sure Graphviz is installed and `dot` is on your path."
            )
        else:
            raise

    f.seek(0)
    proc_stdout, proc_stderr = proc.communicate(f.read())
    if proc_stderr:
        raise RuntimeError(
            "Error(s) while rendering graph: %s" % proc_stderr.decode("utf-8")
        )

    out.write(proc_stdout)
示例#20
0
class BoundColumn(LoadableTerm):
    """
    A column of data that's been concretely bound to a particular dataset.

    Attributes
    ----------
    dtype : numpy.dtype
        The dtype of data produced when this column is loaded.
    latest : zipline.pipeline.LoadableTerm
        A :class:`~zipline.pipeline.Filter`, :class:`~zipline.pipeline.Factor`,
        or :class:`~zipline.pipeline.Classifier` computing the most recently
        known value of this column on each date.
        See :class:`zipline.pipeline.mixins.LatestMixin` for more details.
    dataset : zipline.pipeline.data.DataSet
        The dataset to which this column is bound.
    name : str
        The name of this column.
    metadata : dict
        Extra metadata associated with this column.

    Notes
    -----
    Instances of this class are dynamically created upon access to attributes
    of :class:`~zipline.pipeline.data.DataSet`. For example,
    :attr:`~zipline.pipeline.data.EquityPricing.close` is an instance of this
    class. Pipeline API users should never construct instances of this
    directly.
    """
    mask = AssetExists()
    window_safe = True

    def __new__(cls, dtype, missing_value, dataset, name, doc, metadata):
        return super(BoundColumn, cls).__new__(
            cls,
            domain=dataset.domain,
            dtype=dtype,
            missing_value=missing_value,
            dataset=dataset,
            name=name,
            ndim=dataset.ndim,
            doc=doc,
            metadata=metadata,
        )

    def _init(self, dataset, name, doc, metadata, *args, **kwargs):
        self._dataset = dataset
        self._name = name
        self.__doc__ = doc
        self._metadata = metadata
        return super(BoundColumn, self)._init(*args, **kwargs)

    @classmethod
    def _static_identity(cls, dataset, name, doc, metadata, *args, **kwargs):
        return (
            super(BoundColumn, cls)._static_identity(*args, **kwargs),
            dataset,
            name,
            doc,
            frozenset(sorted(metadata.items(), key=first)),
        )

    def specialize(self, domain):
        """Specialize ``self`` to a concrete domain.
        """
        if domain == self.domain:
            return self

        return type(self)(
            dtype=self.dtype,
            missing_value=self.missing_value,
            dataset=self._dataset.specialize(domain),
            name=self._name,
            doc=self.__doc__,
            metadata=self._metadata,
        )

    def unspecialize(self):
        """
        Unspecialize a column to its generic form.

        This is equivalent to ``column.specialize(GENERIC)``.
        """
        return self.specialize(GENERIC)

    @property
    def dataset(self):
        """
        The dataset to which this column is bound.
        """
        return self._dataset

    @property
    def name(self):
        """
        The name of this column.
        """
        return self._name

    @property
    def metadata(self):
        """
        A copy of the metadata for this column.
        """
        return self._metadata.copy()

    @property
    def qualname(self):
        """
        The fully-qualified name of this column.

        Generated by doing '.'.join([self.dataset.__name__, self.name]).
        """
        return '.'.join([self.dataset.qualname, self.name])

    @property
    def latest(self):
        dtype = self.dtype
        if dtype in Filter.ALLOWED_DTYPES:
            Latest = LatestFilter
        elif dtype in Classifier.ALLOWED_DTYPES:
            Latest = LatestClassifier
        else:
            assert dtype in Factor.ALLOWED_DTYPES, "Unknown dtype %s." % dtype
            Latest = LatestFactor

        return Latest(
            inputs=(self, ),
            dtype=dtype,
            missing_value=self.missing_value,
            ndim=self.ndim,
        )

    def __repr__(self):
        return "{qualname}::{dtype}".format(
            qualname=self.qualname,
            dtype=self.dtype.name,
        )

    def graph_repr(self):
        """Short repr to use when rendering Pipeline graphs."""
        # Graphviz interprets `\l` as "divide label into lines, left-justified"
        return "BoundColumn:\\l  Dataset: {}\\l  Column: {}\\l".format(
            self.dataset.__name__, self.name)

    def recursive_repr(self):
        """Short repr used to render in recursive contexts."""
        return self.qualname
示例#21
0
    def verify_trace(self, trace, pipeline_start_date, pipeline_end_date,
                     expected_loads, expected_computes, expected_chunks):
        # Percent complete should be monotonically increasing through the whole
        # execution.
        for before, after in toolz.sliding_window(2, trace):
            self.assertGreaterEqual(
                after.percent_complete,
                before.percent_complete,
            )

        # First publish should contain precomputed terms from first chunk.
        first = trace[0]
        expected_first = TestingProgressPublisher.TraceState(
            state='loading',
            percent_complete=instance_of(float),
            execution_bounds=(pipeline_start_date, pipeline_end_date),
            current_chunk_bounds=expected_chunks[0],
            current_work=instance_of(list))
        self.assertEqual(first, expected_first)
        self.assertGreater(first.percent_complete, 0.0)
        self.assertEqual(
            set(first.current_work),
            {AssetExists(), PREPOPULATED_TERM},
        )

        # Last publish should have a state of success and be 100% complete.
        last = trace[-1]
        expected_last = TestingProgressPublisher.TraceState(
            state='success',
            percent_complete=100.0,
            execution_bounds=(pipeline_start_date, pipeline_end_date),
            current_chunk_bounds=expected_chunks[-1],
            # We don't know what the last work item will be, but it must be an
            # instance of a single ComputableTerm, because we only run
            # ComputableTerms one at a time, and a LoadableTerm will only be in
            # the graph if some ComputableTerm depends on it.
            current_work=[instance_of(ComputableTerm)],
        )
        self.assertEqual(last, expected_last)

        # Remaining updates should all be loads or computes.
        middle = trace[1:-1]
        for update in middle:
            self.assertIsInstance(update.current_work, list)
            if update.state == 'loading':
                for term in update.current_work:
                    self.assertIsInstance(
                        term,
                        (LoadableTerm, AssetExists, PrepopulatedFactor),
                    )
            elif update.state == 'computing':
                for term in update.current_work:
                    self.assertIsInstance(term, ComputableTerm)
            else:
                raise AssertionError(
                    "Unexpected state: {}".format(update.state), )

        # Break up the remaining updates by chunk.
        all_chunks = []
        grouped = itertools.groupby(middle, attrgetter('current_chunk_bounds'))
        for (chunk_start, chunk_stop), chunk_trace in grouped:
            all_chunks.append((chunk_start, chunk_stop))

            chunk_trace = list(chunk_trace)
            expected_end_progress = self.expected_chunk_progress(
                pipeline_start_date,
                pipeline_end_date,
                chunk_stop,
            )
            end_progress = chunk_trace[-1].percent_complete
            assert_almost_equal(end_progress, expected_end_progress)

        self.assertEqual(all_chunks, expected_chunks)
示例#22
0
class BoundColumn(LoadableTerm):
    """
    A column of data that's been concretely bound to a particular dataset.

    Attributes
    ----------
    dtype : numpy.dtype
        The dtype of data produced when this column is loaded.
    latest : zipline.pipeline.LoadableTerm
        A :class:`~zipline.pipeline.Filter`, :class:`~zipline.pipeline.Factor`,
        or :class:`~zipline.pipeline.Classifier` computing the most recently
        known value of this column on each date.
        See :class:`zipline.pipeline.mixins.LatestMixin` for more details.
    dataset : zipline.pipeline.data.DataSet
        The dataset to which this column is bound.
    name : str
        The name of this column.
    metadata : dict
        Extra metadata associated with this column.
    currency_aware : bool
        Whether or not this column produces currency-denominated data.

    Notes
    -----
    Instances of this class are dynamically created upon access to attributes
    of :class:`~zipline.pipeline.data.DataSet`. For example,
    :attr:`~zipline.pipeline.data.EquityPricing.close` is an instance of this
    class. Pipeline API users should never construct instances of this
    directly.
    """
    mask = AssetExists()
    window_safe = True

    def __new__(cls, dtype, missing_value, dataset, name, doc, metadata,
                currency_conversion, currency_aware):
        if currency_aware and dtype != float64_dtype:
            raise AssertionError(
                'The {} column on dataset {} cannot be constructed with '
                'currency_aware={}, dtype={}. Currency aware columns must '
                'have a float64 dtype.'.format(
                    name,
                    dataset,
                    currency_aware,
                    dtype,
                ))

        return super(BoundColumn, cls).__new__(
            cls,
            domain=dataset.domain,
            dtype=dtype,
            missing_value=missing_value,
            dataset=dataset,
            name=name,
            ndim=dataset.ndim,
            doc=doc,
            metadata=metadata,
            currency_conversion=currency_conversion,
            currency_aware=currency_aware,
        )

    def _init(self, dataset, name, doc, metadata, currency_conversion,
              currency_aware, *args, **kwargs):
        self._dataset = dataset
        self._name = name
        self.__doc__ = doc
        self._metadata = metadata
        self._currency_conversion = currency_conversion
        self._currency_aware = currency_aware
        return super(BoundColumn, self)._init(*args, **kwargs)

    @classmethod
    def _static_identity(cls, dataset, name, doc, metadata,
                         currency_conversion, currency_aware, *args, **kwargs):
        return (
            super(BoundColumn, cls)._static_identity(*args, **kwargs),
            dataset,
            name,
            doc,
            frozenset(sorted(metadata.items(), key=first)),
            currency_conversion,
            currency_aware,
        )

    def __lt__(self, other):
        msg = "Can't compare '{}' with '{}'. (Did you mean to use '.latest'?)"
        raise TypeError(msg.format(self.qualname, other.__class__.__name__))

    __gt__ = __le__ = __ge__ = __lt__

    def _replace(self, **kwargs):
        kw = dict(
            dtype=self.dtype,
            missing_value=self.missing_value,
            dataset=self._dataset,
            name=self._name,
            doc=self.__doc__,
            metadata=self._metadata,
            currency_conversion=self._currency_conversion,
            currency_aware=self._currency_aware,
        )
        kw.update(kwargs)

        return type(self)(**kw)

    def specialize(self, domain):
        """Specialize ``self`` to a concrete domain.
        """
        if domain == self.domain:
            return self

        return self._replace(dataset=self._dataset.specialize(domain))

    def unspecialize(self):
        """
        Unspecialize a column to its generic form.

        This is equivalent to ``column.specialize(GENERIC)``.
        """
        return self.specialize(GENERIC)

    @coerce_types(currency=(str, Currency))
    def fx(self, currency):
        """
        Construct a currency-converted version of this column.

        Parameters
        ----------
        currency : str or zipline.currency.Currency
            Currency into which to convert this column's data.

        Returns
        -------
        column : BoundColumn
            Column producing the same data as ``self``, but currency-converted
            into ``currency``.
        """
        conversion = self._currency_conversion

        if not self._currency_aware:
            raise TypeError(
                'The .fx() method cannot be called on {} because it does not '
                'produce currency-denominated data.'.format(self.qualname))
        elif conversion is not None and conversion.currency == currency:
            return self

        return self._replace(currency_conversion=CurrencyConversion(
            currency=currency,
            field=DEFAULT_FX_RATE,
        ))

    @property
    def currency_conversion(self):
        """Specification for currency conversions applied for this term.
        """
        return self._currency_conversion

    @property
    def currency_aware(self):
        """
        Whether or not this column produces currency-denominated data.
        """
        return self._currency_aware

    @property
    def dataset(self):
        """
        The dataset to which this column is bound.
        """
        return self._dataset

    @property
    def name(self):
        """
        The name of this column.
        """
        return self._name

    @property
    def metadata(self):
        """
        A copy of the metadata for this column.
        """
        return self._metadata.copy()

    @property
    def qualname(self):
        """The fully-qualified name of this column.
        """
        out = '.'.join([self.dataset.qualname, self.name])
        conversion = self._currency_conversion
        if conversion is not None:
            out += '.fx({!r})'.format(conversion.currency.code)
        return out

    @property
    def latest(self):
        dtype = self.dtype
        if dtype in Filter.ALLOWED_DTYPES:
            Latest = LatestFilter
        elif dtype in Classifier.ALLOWED_DTYPES:
            Latest = LatestClassifier
        else:
            assert dtype in Factor.ALLOWED_DTYPES, "Unknown dtype %s." % dtype
            Latest = LatestFactor

        return Latest(
            inputs=(self, ),
            dtype=dtype,
            missing_value=self.missing_value,
            ndim=self.ndim,
        )

    def __repr__(self):
        return "{qualname}::{dtype}".format(
            qualname=self.qualname,
            dtype=self.dtype.name,
        )

    def graph_repr(self):
        """Short repr to use when rendering Pipeline graphs."""
        # Graphviz interprets `\l` as "divide label into lines, left-justified"
        return "BoundColumn:\\l  Dataset: {}\\l  Column: {}\\l".format(
            self.dataset.__name__, self.name)

    def recursive_repr(self):
        """Short repr used to render in recursive contexts."""
        return self.qualname
示例#23
0
def filter_nodes(include_asset_exists, nodes):
    if include_asset_exists:
        return nodes
    return filter(lambda n: n is not AssetExists(), nodes)
示例#24
0
def _render(g, out, format_, include_asset_exists=False):
    """
    Draw `g` as a graph to `out`, in format `format`.

    Parameters
    ----------
    g : zipline.pipeline.graph.TermGraph
        Graph to render.
    out : file-like object
    format_ : str {'png', 'svg'}
        Output format.
    include_asset_exists : bool
        Whether to filter out `AssetExists()` nodes.
    """
    graph_attrs = {'rankdir': 'TB', 'splines': 'ortho'}
    cluster_attrs = {'style': 'filled', 'color': 'lightgoldenrod1'}

    in_nodes = list(node for node in g if node.atomic)
    out_nodes = list(g.outputs.values())

    f = BytesIO()
    with graph(f, "G", **graph_attrs):

        # Write outputs cluster.
        with cluster(f, 'Output', labelloc='b', **cluster_attrs):
            for term in out_nodes:
                add_term_node(f, term)

        # Write inputs cluster.
        with cluster(f, 'Input', **cluster_attrs):
            for term in in_nodes:
                if term is AssetExists() and not include_asset_exists:
                    continue
                add_term_node(f, term)

        # Write intermediate results.
        for term in topological_sort(g):
            if term in in_nodes or term in out_nodes:
                continue
            add_term_node(f, term)

        # Write edges
        for source, dest in g.edges():
            if source is AssetExists() and not include_asset_exists:
                continue
            add_edge(f, id(source), id(dest))

    cmd = ['dot', '-T', format_]
    try:
        proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
    except OSError as e:
        if e.errno == errno.ENOENT:
            raise RuntimeError(
                "Couldn't find `dot` graph layout program. "
                "Make sure Graphviz is installed and `dot` is on your path.")
        else:
            raise

    f.seek(0)
    proc_stdout, proc_stderr = proc.communicate(f.read())
    if proc_stderr:
        raise RuntimeError("Error(s) while rendering graph: %s" %
                           proc_stderr.decode('utf-8'))

    out.write(proc_stdout)
示例#25
0
文件: dataset.py 项目: uniwin/zipline
class BoundColumn(LoadableTerm):
    """
    A column of data that's been concretely bound to a particular dataset.

    Instances of this class are dynamically created upon access to attributes
    of DataSets.

    Attributes
    ----------
    dtype : numpy.dtype
        The dtype of data produced when this column is loaded.
    latest : zipline.pipeline.data.Factor or zipline.pipeline.data.Filter
        A Filter/Factor computing the most recently known value of this column
        on each date. Produces a Filter if self.dtype == ``np.bool_``,
        otherwise produces a Factor.
    dataset : zipline.pipeline.data.DataSet
        The dataset to which this column is bound.
    name : str
        The name of this column.
    """
    mask = AssetExists()
    extra_input_rows = 0
    inputs = ()

    def __new__(cls, dtype, missing_value, dataset, name):
        return super(BoundColumn, cls).__new__(
            cls,
            domain=dataset.domain,
            dtype=dtype,
            missing_value=missing_value,
            dataset=dataset,
            name=name,
        )

    def _init(self, dataset, name, *args, **kwargs):
        self._dataset = dataset
        self._name = name
        return super(BoundColumn, self)._init(*args, **kwargs)

    @classmethod
    def static_identity(cls, dataset, name, *args, **kwargs):
        return (
            super(BoundColumn, cls).static_identity(*args, **kwargs),
            dataset,
            name,
        )

    @property
    def dataset(self):
        """
        The dataset to which this column is bound.
        """
        return self._dataset

    @property
    def name(self):
        """
        The name of this column.
        """
        return self._name

    @property
    def qualname(self):
        """
        The fully-qualified of this column.

        Generated by doing '.'.join([self.dataset.__name__, self.name]).
        """
        return '.'.join([self.dataset.__name__, self.name])

    @property
    def latest(self):
        if self.dtype == bool_dtype:
            from zipline.pipeline.filters import Latest
        else:
            from zipline.pipeline.factors import Latest
        return Latest(
            inputs=(self, ),
            dtype=self.dtype,
            missing_value=self.missing_value,
        )

    def __repr__(self):
        return "{qualname}::{dtype}".format(
            qualname=self.qualname,
            dtype=self.dtype.name,
        )

    def short_repr(self):
        return self.qualname