示例#1
0
    def to_pipeline(self):
        """Convert an annotated Notebook to a Pipeline object."""
        (pipeline_parameters_source, pipeline_metrics_source,
         imports_and_functions) = self.parse_notebook()

        self.parse_pipeline_parameters(pipeline_parameters_source)

        # get a list of variables that need to be logged as pipeline metrics
        pipeline_metrics = astutils.parse_metrics_print_statements(
            pipeline_metrics_source)

        # run static analysis over the source code
        self.dependencies_detection(imports_and_functions)
        self.assign_metrics(pipeline_metrics)
示例#2
0
文件: nb.py 项目: ydataai/kale
def get_pipeline_metrics(request, source_notebook_path):
    """Get the pipeline metrics tagged in the notebook."""
    # read notebook
    log = request.log if hasattr(request, "log") else logger
    try:
        processor = NotebookProcessor(os.path.expanduser(source_notebook_path),
                                      skip_validation=True)
        metrics_source = processor.get_pipeline_metrics_source()
        if metrics_source == '':
            raise ValueError("No pipeline metrics found. Please tag a cell"
                             " of the notebook with the `pipeline-metrics`"
                             " tag.")
        # get a dict from the 'pipeline parameters' cell source code
        metrics = astutils.parse_metrics_print_statements(metrics_source)
    except ValueError as e:
        log.exception("Failed to parse pipeline metrics")
        raise RPCInternalError(details=str(e), trans_id=request.trans_id)
    log.info("Pipeline metrics: {}".format(metrics))
    return metrics
示例#3
0
    def to_pipeline(self):
        """Convert an annotated Notebook to a Pipeline object."""
        (pipeline_parameters_source, pipeline_metrics_source,
         imports_and_functions) = self.parse_notebook()

        self.parse_pipeline_parameters(pipeline_parameters_source)
        self.pipeline.set_volume_pipeline_parameters()

        # get a list of variables that need to be logged as pipeline metrics
        pipeline_metrics = astutils.parse_metrics_print_statements(
            pipeline_metrics_source)

        # run static analysis over the source code
        self.dependencies_detection(imports_and_functions)
        self.assign_metrics(pipeline_metrics)

        # if there are multiple DAG leaves, add an empty step at the end of the
        # pipeline for final snapshot
        leaf_steps = self.pipeline.get_leaf_steps()
        if self.config.autosnapshot and len(leaf_steps) > 1:
            _name = "final_auto_snapshot"
            self.pipeline.add_step(Step(name=_name, source=[]))
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            for step in leaf_steps:
                self.pipeline.add_edge(step.name, _name)

        # FIXME: Move this to a base class Processor, to be executed by default
        #  after `to_pipeline`, so that it is agnostic to the type of
        #  processor.
        for step in self.pipeline.steps:
            step.config.update(self.pipeline.config.steps_defaults)

        # TODO: Additional action required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return self.pipeline
示例#4
0
def test_parse_metrics_print_statements_exc(code):
    """Tests a exception cases for parse_metrics_print_statements function."""
    with pytest.raises(ValueError):
        kale_ast.parse_metrics_print_statements(code)
示例#5
0
def test_parse_metrics_print_statements(code, target):
    """Tests parse_metrics_print_statements function."""
    res = kale_ast.parse_metrics_print_statements(code)
    assert res == target