def to_pipeline(self): """Convert an annotated Notebook to a Pipeline object.""" (pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = self.parse_notebook() self.parse_pipeline_parameters(pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = astutils.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code self.dependencies_detection(imports_and_functions) self.assign_metrics(pipeline_metrics)
def get_pipeline_metrics(request, source_notebook_path): """Get the pipeline metrics tagged in the notebook.""" # read notebook log = request.log if hasattr(request, "log") else logger try: processor = NotebookProcessor(os.path.expanduser(source_notebook_path), skip_validation=True) metrics_source = processor.get_pipeline_metrics_source() if metrics_source == '': raise ValueError("No pipeline metrics found. Please tag a cell" " of the notebook with the `pipeline-metrics`" " tag.") # get a dict from the 'pipeline parameters' cell source code metrics = astutils.parse_metrics_print_statements(metrics_source) except ValueError as e: log.exception("Failed to parse pipeline metrics") raise RPCInternalError(details=str(e), trans_id=request.trans_id) log.info("Pipeline metrics: {}".format(metrics)) return metrics
def to_pipeline(self): """Convert an annotated Notebook to a Pipeline object.""" (pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = self.parse_notebook() self.parse_pipeline_parameters(pipeline_parameters_source) self.pipeline.set_volume_pipeline_parameters() # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = astutils.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code self.dependencies_detection(imports_and_functions) self.assign_metrics(pipeline_metrics) # if there are multiple DAG leaves, add an empty step at the end of the # pipeline for final snapshot leaf_steps = self.pipeline.get_leaf_steps() if self.config.autosnapshot and len(leaf_steps) > 1: _name = "final_auto_snapshot" self.pipeline.add_step(Step(name=_name, source=[])) # add a link from all the last steps of the pipeline to # the final auto snapshot one. for step in leaf_steps: self.pipeline.add_edge(step.name, _name) # FIXME: Move this to a base class Processor, to be executed by default # after `to_pipeline`, so that it is agnostic to the type of # processor. for step in self.pipeline.steps: step.config.update(self.pipeline.config.steps_defaults) # TODO: Additional action required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return self.pipeline
def test_parse_metrics_print_statements_exc(code): """Tests a exception cases for parse_metrics_print_statements function.""" with pytest.raises(ValueError): kale_ast.parse_metrics_print_statements(code)
def test_parse_metrics_print_statements(code, target): """Tests parse_metrics_print_statements function.""" res = kale_ast.parse_metrics_print_statements(code) assert res == target