def notebook_to_graph(self): """Convert an annotated Notebook to a Graph.""" # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = parser.parse_notebook( self.notebook, self.pipeline_metadata) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict, imports_and_functions=imports_and_functions) dependencies.assign_metrics(pipeline_graph, pipeline_metrics) # if there are multiple DAG leaves, add an empty step at the end of the # pipeline for final snapshot leaf_steps = graphutils.get_leaf_nodes(pipeline_graph) if self.pipeline_metadata.get("autosnapshot") and len(leaf_steps) > 1: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) step_defaults = parser.parse_steps_defaults( self.pipeline_metadata.get("steps_defaults", [])) data = { auto_snapshot_name: { "source": "", "ins": [], "outs": [], "annotations": step_defaults.get("annotations"), "labels": step_defaults.get("labels"), "limits": step_defaults.get("limits") } } nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def get_pipeline_metrics(request, source_notebook_path): """Get the pipeline metrics tagged in the notebook.""" # read notebook log = request.log if hasattr(request, "log") else logger try: notebook = nbformat.read(source_notebook_path, as_version=nbformat.NO_CONVERT) metrics_source = parser.get_pipeline_metrics_source(notebook) if metrics_source == '': raise ValueError("No pipeline metrics found. Please tag a cell" " of the notebook with the `pipeline-metrics`" " tag.") # get a dict from the 'pipeline parameters' cell source code metrics = ast.parse_metrics_print_statements(metrics_source) except ValueError as e: log.exception("Failed to parse pipeline metrics") raise RPCInternalError(details=str(e), trans_id=request.trans_id) log.info("Pipeline metrics: {}".format(metrics)) return metrics
def get_pipeline_metrics(request, source_notebook_path): """Get the pipeline metrics tagged in the notebook.""" # read notebook log = request.log if hasattr(request, "log") else logger try: processor = NotebookProcessor(os.path.expanduser(source_notebook_path), skip_validation=True) metrics_source = processor.get_pipeline_metrics_source() if metrics_source == '': raise ValueError("No pipeline metrics found. Please tag a cell" " of the notebook with the `pipeline-metrics`" " tag.") # get a dict from the 'pipeline parameters' cell source code metrics = ast.parse_metrics_print_statements(metrics_source) except ValueError as e: log.exception("Failed to parse pipeline metrics") raise RPCInternalError(details=str(e), trans_id=request.trans_id) log.info("Pipeline metrics: {}".format(metrics)) return metrics
def to_pipeline(self): """Convert an annotated Notebook to a Pipeline object.""" (pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = self.parse_notebook() self.parse_pipeline_parameters(pipeline_parameters_source) self.pipeline.set_volume_pipeline_parameters() # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code dependencies.dependencies_detection( self.pipeline, imports_and_functions=imports_and_functions) dependencies.assign_metrics(self.pipeline, pipeline_metrics) # if there are multiple DAG leaves, add an empty step at the end of the # pipeline for final snapshot leaf_steps = self.pipeline.get_leaf_steps() if self.config.autosnapshot and len(leaf_steps) > 1: _name = "final_auto_snapshot" self.pipeline.add_step(Step(name=_name, source=[])) # add a link from all the last steps of the pipeline to # the final auto snapshot one. for step in leaf_steps: self.pipeline.add_edge(step.name, _name) # FIXME: Move this to a base class Processor, to be executed by default # after `to_pipeline`, so that it is agnostic to the type of # processor. for step in self.pipeline.steps: step.config.update(self.pipeline.config.steps_defaults) # TODO: Additional action required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return self.pipeline
def notebook_to_graph(self): """Convert an annotated Notebook to a Graph.""" # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = parser.parse_notebook(self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict, imports_and_functions=imports_and_functions) dependencies.assign_metrics(pipeline_graph, pipeline_metrics) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = graph_utils.get_leaf_nodes(pipeline_graph) for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def notebook_to_graph(self): # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source) = parser.parse_notebook(self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # if there are some pipeline metrics, create an additional step at the # end of the pipeline to log them. # By adding this step before dependencies detection, we make sure that # the necessary variables are marshalled at the beginning of the step. if len(pipeline_metrics): pipeline_metrics_name = "pipeline_metrics" # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, pipeline_metrics_name) # generate the code that dumps the pipeline metrics to file template_env = _initialize_templating_env() metrics_template = template_env.get_template( 'pipeline_metrics_template.jinja2') # need to be a list since it will be treated as a code cell and # passed to the ipykernel metrics_source = [ metrics_template.render(pipeline_metrics=pipeline_metrics) ] data = { pipeline_metrics_name: { 'source': metrics_source, 'ins': [], 'outs': [] } } nx.set_node_attributes(pipeline_graph, data) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def test_parse_metrics_print_statements_exc(code): """Tests a exception cases for parse_metrics_print_statements function.""" with pytest.raises(ValueError): kale_ast.parse_metrics_print_statements(code)
def test_parse_metrics_print_statements(code, target): """Tests parse_metrics_print_statements function.""" res = kale_ast.parse_metrics_print_statements(code) assert res == target