def run_pipeline(self, arg, line='', cell='', local_ns=None): """Run notebooks sequentially in a pipeline. A dictionary called _pipeline_workspace is created by the magic that will be shared by all the notebooks in the pipeline. The state can contain DataFrames, Lists, Dictionaries and objects. Notebook parameterization can be used to load and read from the shared state. The pipeline supports execution of parameterized notebooks. If parameters are used, the first code cell will be treated to contain only parameter assignments. Parameters can be a string, number, list or dictionary. To save a notebook's execution in the pipeline, the save name should be specified along with the execution notebook separated with a colon. Run parameters will only change their equivalent parameters from the first code cell. Unknown parameters will be ignored. Adding parameters on an execution is optional. # simple pipeline Example1: %%run_pipeline first notebook in pipeline; second notebook in pipeline; third notebook in pipeline # pipleine with parameterized notebooks Example2: %%run_pipeline first notebook in pipeline key01=int key01=string key02={'key01': param01}; second notebook in pipeline; third notebook in pipeline:your save name key01=int key02=string key03=[param01, param02] """ # save globals and locals so they can be referenced in bind vars clear_namespace_cell = nbformat.v4.new_code_cell( source="from IPython import get_ipython\n" + "_ip = get_ipython()\n" + "_user_vars = %who_ls\n" + "for _var in _user_vars:\n" + " if _var != '_pipeline_workspace':\n" + " del _ip.user_ns[_var]\n" + "import gc\n" + "gc.collect()") pipeline_state_cell = nbformat.v4.new_code_cell( source="_pipeline_workspace = {'frames': list()}") if not (line or cell): if not arg.startswith("-"): line = arg arg = '' args = ParameterArgs(parse_argstring(self.run, arg)) user_ns = self.shell.user_ns.copy() if local_ns: user_ns.update(local_ns) if not cell: cell = line notebook_run_cmds = cell.split(';') notebook_run_cmds = [ notebook_run_cmd.strip() for notebook_run_cmd in notebook_run_cmds ] execute_preprocessor = ExecutePreprocessor( kernel_name='python3', timeout=args.get('cell_timeout')) kernel_manager, kernel_comm = start_new_kernel(kernel_name='python3') execute_preprocessor.km = kernel_manager execute_preprocessor.kc = kernel_comm def execute_cell(nb4_cell): try: execute_preprocessor.run_cell(nb4_cell) except BaseException: if kernel_manager or kernel_comm: kernel_comm.stop_channels() kernel_manager.shutdown_kernel() def execute_notebook(notebook_filename, notebook_save_filename, params): with open(notebook_filename) as file_handler: notebook = nbformat.read(file_handler, as_version=4) b_errors = False if params: for nb_cell in notebook.cells: if nb_cell.cell_type == 'code': new_cell_source = utils.substitute_params( nb_cell.source, params) nb_cell.source = new_cell_source break try: execute_preprocessor.nb = notebook progress_bar = widgets.IntProgress( value=0, min=0, max=len(notebook.cells), step=1, bar_style= 'info', # 'success', 'info', 'warning', 'danger' or '' orientation='horizontal') display_label = notebook_filename if notebook_save_filename: display_label = display_label + ' : ' + notebook_save_filename display( widgets.HBox( [widgets.Label(display_label), progress_bar])) for idx, nb_cell in enumerate(notebook.cells): execute_preprocessor.preprocess_cell( nb_cell, resources={'metadata': {}}, cell_index=idx) progress_bar.value = idx + 1 except CellExecutionError: b_errors = True progress_bar.bar_style = 'danger' if kernel_manager or kernel_comm: kernel_comm.stop_channels() kernel_manager.shutdown_kernel() raise finally: if notebook_save_filename: with open(notebook_save_filename, mode='wt') as file_handler: nbformat.write(notebook, file_handler) if not b_errors: progress_bar.bar_style = 'success' execute_cell(pipeline_state_cell) for notebook_run_cmd in notebook_run_cmds: run_notebook_name, notebook_save_name, nb_params = utils.parse_run_str( notebook_run_cmd) execute_notebook(run_notebook_name, notebook_save_name, nb_params) execute_cell(clear_namespace_cell) if kernel_manager or kernel_comm: kernel_comm.stop_channels() kernel_manager.shutdown_kernel()
def execute_notebook(notebook_filename, notebook_save_filename, params): log = UserMessages() with open(notebook_filename) as file_handler: notebook = nbformat.read(file_handler, as_version=4) b_errors = False execute_preprocessor = ExecutePreprocessor( timeout=args.get('cell_timeout'), allow_errors=args.get('allow_errors')) kernel_manager = None kernel_comm = None progress_bar = args.get('enable_progress_bar') if params: for nb_cell in notebook.cells: if nb_cell.cell_type == 'code': new_cell_source = utils.substitute_params( nb_cell.source, params) nb_cell.source = new_cell_source break try: if progress_bar: progress_bar = widgets.IntProgress( value=0, min=0, max=len(notebook.cells), step=1, bar_style= 'info', # 'success', 'info', 'warning', 'danger' or '' orientation='horizontal') kernel_manager, kernel_comm = start_new_kernel( kernel_name=notebook['metadata']['kernelspec'] ['name']) execute_preprocessor.km = kernel_manager execute_preprocessor.kc = kernel_comm execute_preprocessor.nb = notebook display_label = notebook_filename if notebook_save_filename: display_label = display_label + ' : ' + notebook_save_filename display( widgets.HBox( [widgets.Label(display_label), progress_bar])) for idx, nb_cell in enumerate(notebook.cells): execute_preprocessor.preprocess_cell( nb_cell, resources={'metadata': {}}, cell_index=idx) progress_bar.value = idx + 1 else: log.info("Running Notebook: " + notebook_filename) execute_preprocessor.preprocess( notebook, {'metadata': {}}) except CellExecutionError: b_errors = True if progress_bar: progress_bar.bar_style = 'danger' raise except AttributeError: b_errors = True if progress_bar: progress_bar.bar_style = 'danger' raise finally: if notebook_save_filename: with open(notebook_save_filename, mode='wt') as file_handler: nbformat.write(notebook, file_handler) if kernel_manager or kernel_comm: kernel_comm.stop_channels() kernel_manager.shutdown_kernel() if not b_errors: if progress_bar: progress_bar.bar_style = 'success' else: log.info(notebook_filename + " was executed successfully.") elif b_errors and not progress_bar: log.error(notebook_filename + " execution failed.")