def jupytext(args=None): """Internal implementation of Jupytext command line""" args = parse_jupytext_args(args) def log(text): if not args.quiet: sys.stdout.write(text + '\n') if args.version: log(__version__) return 0 if args.pre_commit: if args.notebooks: raise ValueError( '--pre-commit takes notebooks from the git index. Do not pass any notebook here.' ) args.notebooks = notebooks_in_git_index(args.input_format) log('[jupytext] Notebooks in git index are:') for nb_file in args.notebooks: log(nb_file) def writef_git_add(notebook_, nb_file_, fmt_): write(notebook_, nb_file_, fmt=fmt_) if args.pre_commit: system('git', 'add', nb_file_) # Read notebook from stdin if not args.notebooks: if not args.pre_commit: args.notebooks = ['-'] if args.set_formats is not None: # Replace empty string with None args.update_metadata = recursive_update( args.update_metadata, {'jupytext': { 'formats': args.set_formats or None }}) args.sync = True if args.paired_paths: if len(args.notebooks) != 1: raise ValueError('--paired-paths applies to a single notebook') print_paired_paths(args.notebooks[0], args.input_format) return 1 if not args.to and not args.output and not args.sync \ and not args.pipe and not args.check \ and not args.test and not args.test_strict \ and not args.update_metadata and not args.set_kernel: raise ValueError('Please select an action') if args.output and len(args.notebooks) != 1: raise ValueError('Please input a single notebook when using --output') if args.input_format: args.input_format = long_form_one_format(args.input_format) if args.to: args.to = long_form_one_format(args.to) set_format_options(args.to, args.format_options) # Main loop round_trip_conversion_errors = 0 # Wildcard extension on Windows #202 notebooks = [] for pattern in args.notebooks: if '*' in pattern or '?' in pattern: notebooks.extend(glob.glob(pattern)) else: notebooks.append(pattern) for nb_file in notebooks: try: if nb_file == '-' and args.sync: raise ValueError('Cannot sync a notebook on stdin') nb_dest = args.output or (None if not args.to else ( '-' if nb_file == '-' else full_path( base_path(nb_file, args.input_format), args.to))) # Just acting on metadata / pipe => save in place if not nb_dest and not args.sync: nb_dest = nb_file if nb_dest == '-': args.quiet = True # I. ### Read the notebook ### fmt = copy(args.input_format) or {} set_format_options(fmt, args.format_options) log('[jupytext] Reading {}{}'.format( nb_file if nb_file != '-' else 'stdin', ' in format {}'.format( short_form_one_format(fmt)) if 'extension' in fmt else '')) notebook = read(nb_file, fmt=fmt) if not fmt: text_representation = notebook.metadata.get( 'jupytext', {}).get('text_representation', {}) ext = os.path.splitext(nb_file)[1] if text_representation.get('extension') == ext: fmt = { key: text_representation[key] for key in text_representation if key in ['extension', 'format_name'] } elif ext: fmt = {'extension': ext} # Compute actual extension when using script/auto, and update nb_dest if necessary dest_fmt = args.to if dest_fmt and dest_fmt['extension'] == '.auto': auto_ext = auto_ext_from_metadata(notebook.metadata) if not auto_ext: raise ValueError( 'The notebook has no language information. ' 'Please provide an explicit script extension.') dest_fmt['extension'] = auto_ext if not args.output and nb_file != '-': nb_dest = full_path(base_path(nb_file, args.input_format), dest_fmt) # Set the kernel set_kernel = args.set_kernel if args.execute and notebook.metadata.get('kernelspec', {}).get('name') is None: log("[jupytext] Setting default kernel with --set-kernel -") set_kernel = '-' if set_kernel: if set_kernel == '-': language = notebook.metadata.get('jupytext', {}).get('main_language') \ or notebook.metadata['kernelspec']['language'] if not language: raise ValueError( 'Cannot infer a kernel as notebook language is not defined' ) kernelspec = kernelspec_from_language(language) if not kernelspec: raise ValueError( 'Found no kernel for {}'.format(language)) else: try: kernelspec = get_kernel_spec(set_kernel) except KeyError: raise KeyError( 'Please choose a kernel name among {}'.format( [name for name in find_kernel_specs()])) kernelspec = { 'name': args.set_kernel, 'language': kernelspec.language, 'display_name': kernelspec.display_name } args.update_metadata['kernelspec'] = kernelspec # Update the metadata if args.update_metadata: log("[jupytext] Updating notebook metadata with '{}'".format( json.dumps(args.update_metadata))) # Are we updating a text file that has a metadata filter? #212 if fmt['extension'] != '.ipynb' and \ notebook.metadata.get('jupytext', {}).get('notebook_metadata_filter') == '-all': notebook.metadata.get('jupytext', {}).pop('notebook_metadata_filter') recursive_update(notebook.metadata, args.update_metadata) if 'kernelspec' in args.update_metadata and 'main_language' in notebook.metadata.get( 'jupytext', {}): notebook.metadata['jupytext'].pop('main_language') # Read paired notebooks if args.sync: set_prefix_and_suffix(fmt, notebook, nb_file) try: notebook, inputs_nb_file, outputs_nb_file = load_paired_notebook( notebook, fmt, nb_file, log) except NotAPairedNotebook as err: sys.stderr.write('[jupytext] Warning: ' + str(err) + '\n') continue # II. ### Apply commands onto the notebook ### # Pipe the notebook into the desired commands for cmd in args.pipe or []: notebook = pipe_notebook(notebook, cmd, args.pipe_fmt) # and/or test the desired commands onto the notebook for cmd in args.check or []: pipe_notebook(notebook, cmd, args.pipe_fmt, update=False) # Execute the notebook if args.execute: log("[jupytext] Executing notebook") kernel_name = notebook.metadata.get('kernelspec', {}).get('name') exec_proc = ExecutePreprocessor(timeout=None, kernel_name=kernel_name) exec_proc.preprocess(notebook, resources={}) # III. ### Possible actions ### modified = args.update_metadata or args.pipe or args.execute # a. Test round trip conversion if args.test or args.test_strict: try: test_round_trip_conversion( notebook, dest_fmt, update=args.update, allow_expected_differences=not args.test_strict, stop_on_first_error=args.stop_on_first_error) except NotebookDifference as err: round_trip_conversion_errors += 1 sys.stdout.write('{}: {}'.format(nb_file, str(err))) continue # b. Output to the desired file or format if nb_dest: if nb_dest == nb_file and not dest_fmt: dest_fmt = fmt # Test consistency between dest name and output format if dest_fmt and nb_dest != '-': base_path(nb_dest, dest_fmt) # Describe what jupytext is doing if os.path.isfile(nb_dest) and args.update: if not nb_dest.endswith('.ipynb'): raise ValueError('--update is only for ipynb files') action = ' (destination file updated)' check_file_version(notebook, nb_file, nb_dest) combine_inputs_with_outputs(notebook, read(nb_dest), fmt=fmt) elif os.path.isfile(nb_dest): action = ' (destination file replaced)' else: action = '' log('[jupytext] Writing {nb_dest}{format}{action}'.format( nb_dest=nb_dest, format=' in format ' + short_form_one_format(dest_fmt) if dest_fmt and 'format_name' in dest_fmt else '', action=action)) writef_git_add(notebook, nb_dest, dest_fmt) # c. Synchronize paired notebooks if args.sync: # Also update the original notebook if the notebook was modified if modified: inputs_nb_file = outputs_nb_file = None formats = notebook.metadata['jupytext']['formats'] for ipynb in [True, False]: # Write first format last so that it is the most recent file for alt_path, alt_fmt in paired_paths( nb_file, fmt, formats)[::-1]: # Write ipynb first for compatibility with our contents manager if alt_path.endswith('.ipynb') != ipynb: continue # Do not write the ipynb file if it was not modified # But, always write text representations to make sure they are the most recent if alt_path == inputs_nb_file and alt_path == outputs_nb_file: continue log("[jupytext] Updating '{}'".format(alt_path)) writef_git_add(notebook, alt_path, alt_fmt) except (ValueError, TypeError, IOError) as err: if args.warn_only: sys.stderr.write('[jupytext] Error: {}\n'.format(str(err))) else: raise err return round_trip_conversion_errors
import nbformat from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError import traceback import os if __name__ == '__main__': notebook_filename = "C:/Users/1/Desktop/abc.ipynb" # notebook_filename = "D:/jupyter/宏观团队/HG/1.高频跟踪/3.周度策略观点@/3.周度策略观点.ipynb" with open(notebook_filename, encoding='utf8') as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=1000, kernel_name='python3') try: out = ep.preprocess( nb, {'metadata': { 'C:/Users/1/Desktop/': 'notebooks/' }}) print("成功") except CellExecutionError as e: # out = None msg = '报告执行出错 "%s".\n' % notebook_filename msg += '请查看 "%s"' % 'C:/Users/1/Desktop/abc_123.ipynb' print(msg) print('-------------begin-------') exc = traceback.format_exc() print(exc.replace("\n", "<br/>")) print('-------------end-------') with open('C:/Users/1/Desktop/abc_123.ipynb',
def execute_nb(nb, path, nbargs=None): nbargs = {} if nbargs is None else nbargs ep = ExecutePreprocessor(**nbargs) ep.preprocess(nb, {'metadata': {'path': path}}) return nb
import string import os import sys import json import urllib.parse import nbformat as nbf import pandas as pd sys.path.append('app/static/py') from nbconvert.preprocessors import ExecutePreprocessor from nbconvert.preprocessors.execute import executenb ############################################# ########## 2. Variables ############################################# ##### 1. Notebook Execution ##### ep = ExecutePreprocessor(timeout=600, kernel_name='venv') ### from nbconvert import HTMLExporter from traitlets.config import Config c = Config() c.HTMLExporter.preprocessors = [ 'nbconvert.preprocessors.ExtractOutputPreprocessor' ] html_exporter_with_figs = HTMLExporter(config=c) ################################################################# ################################################################# ############### 1. Functions #################################### ################################################################# #################################################################
def test_cli_datasource_new(mock_subprocess, caplog, monkeypatch, empty_data_context, filesystem_csv_2): context = empty_data_context root_dir = context.root_directory assert context.list_datasources() == [] runner = CliRunner(mix_stderr=False) monkeypatch.chdir(os.path.dirname(root_dir)) result = runner.invoke( cli, "--v3-api datasource new", input=f"1\n1\n{filesystem_csv_2}\n", catch_exceptions=False, ) stdout = result.stdout assert context.list_datasources() == [] assert "What data would you like Great Expectations to connect to?" in stdout assert "What are you processing your files with?" in stdout assert result.exit_code == 0 uncommitted_dir = os.path.join(root_dir, context.GE_UNCOMMITTED_DIR) expected_notebook = os.path.join(uncommitted_dir, "datasource_new.ipynb") assert os.path.isfile(expected_notebook) mock_subprocess.assert_called_once_with( ["jupyter", "notebook", expected_notebook]) # Run notebook with open(expected_notebook) as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=600, kernel_name="python3") ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}}) del context context = DataContext(root_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [{ "name": "my_datasource", "class_name": "Datasource", "module_name": "great_expectations.datasource", "execution_engine": { "module_name": "great_expectations.execution_engine", "class_name": "PandasExecutionEngine", }, "data_connectors": { "my_datasource_example_data_connector": { "default_regex": { "group_names": "data_asset_name", "pattern": "(.*)", }, "module_name": "great_expectations.datasource.data_connector", "base_directory": "../../filesystem_csv_2", "class_name": "InferredAssetFilesystemDataConnector", } }, }] assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_new_connection_string( mock_subprocess, mock_emit, empty_data_context, empty_sqlite_db, caplog, monkeypatch ): monkeypatch.delenv( "GE_USAGE_STATS", raising=False ) # Undo the project-wide test default root_dir = empty_data_context.root_directory context: DataContext = empty_data_context assert context.list_datasources() == [] runner = CliRunner(mix_stderr=False) monkeypatch.chdir(os.path.dirname(context.root_directory)) result = runner.invoke( cli, "--v3-api datasource new", input="2\n6\n", catch_exceptions=False, ) stdout = result.stdout assert "What data would you like Great Expectations to connect to?" in stdout assert result.exit_code == 0 uncommitted_dir = os.path.join(root_dir, context.GE_UNCOMMITTED_DIR) expected_notebook = os.path.join(uncommitted_dir, "datasource_new.ipynb") assert os.path.isfile(expected_notebook) mock_subprocess.assert_called_once_with(["jupyter", "notebook", expected_notebook]) expected_call_args_list = [ mock.call( {"event_payload": {}, "event": "data_context.__init__", "success": True} ), mock.call( { "event": "cli.datasource.new.begin", "event_payload": {"api_version": "v3"}, "success": True, } ), mock.call( { "event": "cli.new_ds_choice", "event_payload": { "type": "sqlalchemy", "db": "other", "api_version": "v3", }, "success": True, } ), mock.call( { "event": "cli.datasource.new.end", "event_payload": {"api_version": "v3"}, "success": True, } ), ] assert mock_emit.call_args_list == expected_call_args_list assert mock_emit.call_count == len(expected_call_args_list) # Run notebook with open(expected_notebook) as f: nb = nbformat.read(f, as_version=4) # mock the user adding a connection string into the notebook by overwriting the right cell assert "connection_string" in nb["cells"][5]["source"] nb["cells"][5]["source"] = ' connection_string = "sqlite://"' ep = ExecutePreprocessor(timeout=60, kernel_name="python3") ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}}) del context context = DataContext(root_dir) assert context.list_datasources() == [ { "module_name": "great_expectations.datasource", "execution_engine": { "module_name": "great_expectations.execution_engine", "connection_string": "sqlite://", "class_name": "SqlAlchemyExecutionEngine", }, "class_name": "Datasource", "data_connectors": { "default_runtime_data_connector_name": { "batch_identifiers": ["default_identifier_name"], "class_name": "RuntimeDataConnector", "module_name": "great_expectations.datasource.data_connector", }, "default_inferred_data_connector_name": { "class_name": "InferredAssetSqlDataConnector", "module_name": "great_expectations.datasource.data_connector", "include_schema_name": True, }, }, "name": "my_datasource", } ] assert_no_logging_messages_or_tracebacks(caplog, result)
# Find notebooks but not notebooks previously output from this script if f.endswith('.ipynb') and not f.endswith('_out.ipynb'): print(" - ", f) notebooks.append(f[:-6]) # Want the filename without '.ipynb' # Execute notebooks and output num_notebooks = len(notebooks) print('*****') for i, n in enumerate(notebooks): n_out = n + '_out' if not args.inplace else n with open(n + '.ipynb') as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=int(args.timeout), kernel_name=args.kernel) try: print('Running', n, ':', i, '/', num_notebooks) out = ep.preprocess(nb, {'metadata': {'path': args.run_path}}) except CellExecutionError as e: out = None msg = 'Error executing the notebook "%s".\n' % n msg += "{traceback}".format(traceback=e.traceback) # msg += 'See notebook "%s" for the traceback.' % n_out print(msg) except TimeoutError: msg = 'Timeout executing the notebook "%s".\n' % n print(msg) finally: # Write output file with open(n_out + '.ipynb', mode='wt') as f:
'IE', [85.8, 84.6, 84.7, 74.5, 66, 58.6, 54.7, 44.8, 36.2, 26.6, 20.1]) line_chart.add('Others', [14.2, 15.4, 15.3, 8.9, 9, 10.4, 8.9, 5.8, 6.7, 6.8, 7.5]) longtext = html_pygal.format(pygal_render=line_chart.render()) line_chart.render_to_png('./test.png') #send_html_email(subject, longtext) #send_html_email(subject, html_png) #send_html_email(subject, line_chart.render_response()) #line_chart.render_to_file('test.svg') import nbformat from nbconvert.preprocessors import ExecutePreprocessor with open('/home/pi/practise/python_learning/test1.ipynb') as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=600) ep.preprocess(nb, {'metadata': {'path': '/home/pi/practise/python_learning'}}) from traitlets.config import Config from nbconvert import HTMLExporter html_exporter = HTMLExporter() #html_exporter.template_file = 'basic' (body, resources) = html_exporter.from_notebook_node(nb) send_html_email(subject, body)
def test_example_notebooks(tmp_path, ipynb): with open(ipynb) as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=600, kernel_name="python3") ep.preprocess(nb, {"metadata": {"path": tmp_path}})
def execute_jupyter_notebook(notebook): """Execute a jupyter notebook and return the execution result.""" # Import Jupyter tools. Done in Jupyter scope so that they do not need to # be installed while validating plain Python snippets. import nbformat from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor # Execute try: # Open notebook file and read into a notebook object # The notebook object is automatically converted to version 4 because # nbconvert will only handle the most recent notebook format # https://github.com/ipython/ipython/issues/6992#issuecomment-63746907 with open(notebook, 'r') as fd: original_nb = nbformat.read(fd, as_version=NBFORMAT_V4) # Make sure that cells exists if CELLS not in original_nb: original_nb[CELLS] = [] # Create a copy of the original notebook and clean any previous # execution outputs and metadata from cells. We do this so that after # execution, we're guaranteed that the outputs present are from us # running the cells in order, not old execution results. nb = deepcopy(original_nb) for cell in nb[CELLS]: if cell[CELL_TYPE] == CODE: cell[EXECUTION_COUNT] = None cell[METADATA] = {} cell[OUTPUTS] = [] # Create execution preprocessor. # timeout=None disables cell execution timeout. We disable cell timeout # in Jupyter because we want to measure timeout of the entire notebook. preprocessor = ExecutePreprocessor( kernel_name=KERNEL, timeout=None, extra_arguments=[ '--InteractiveShellApp.extra_extension=exception_handler', '--colors=NoColor', ] ) # Run notebook try: # Set allowed timeout seconds to be JUPYTER_BASE_TIMEOUT_SECONDS # plus an additional JUPYTER_CELL_TIMEOUT_SECONDS for each cell. # Jupyter notebooks often take longer to run that snippets. seconds = ( JUPYTER_BASE_TIMEOUT_SECONDS + JUPYTER_CELL_TIMEOUT_SECONDS * len(nb[CELLS]) ) # Execute with timeout. logger.info( 'Running ExecutePreprocessor on notebook with {} ' 'second timeout.'.format(seconds) ) with Timeout(seconds=seconds): preprocessor.preprocess(nb, {}) # Return success logger.info('Execution succeeded') return {STATUS_CODE: SUCCESS} except TimeoutError: logger.info('Execution timed out') return {STATUS_CODE: TIMEOUT} except CellExecutionError: # CellExecutionError indicates that one of the cells from # the notebook has an error output. Look for the first error # output from an executed code cell. We do this to get the # error name, message, and traceback in a structured format. logger.info('CellExecutionError, parsing root error') # Get all notebook code cells code_cells = list( cell for cell in nb[CELLS] if cell[CELL_TYPE] == CODE ) # Find the first error output lines = 0 error = None for cell in code_cells: # Look for an error output from the cell error_output = next( (o for o in cell[OUTPUTS] if o[OUTPUT_TYPE] == ERROR), None ) # If there was an error output, save it. Otherwise, increment # the total number of source lines seen. if error_output: error = error_output break else: lines += len(cell[SOURCE].split('\n')) # Raise exception if unable to find the error output. if not error: raise Exception('Unable to find notebook error output') # And parse the stack stack = list(map(ast.literal_eval, error[TRACEBACK])) # Override the file name for the input script stack[0][0] = notebook # Increment the line number to include all lines in earlier cells stack[0][1] += lines # Get the summary for the line that raised the exception e_filename, e_lineno, _, e_line = stack[-1] # Return status return { STATUS_CODE: EXCEPTION, EXCEPTION_NAME: error[ENAME], EXCEPTION_MESSAGE: error[EVALUE], EXCEPTION_FILE_NAME: e_filename, EXCEPTION_LINE_NUMBER: e_lineno, EXCEPTION_LINE: e_line, EXCEPTION_STACK: stack } except BaseException as e: logger.info('Execution produced an exception.') logger.error(e) return _get_exception_information(code=UNKNOWN_EXCEPTION)
def execute(self, force=False): """ Executes the specified notebook file, and optionally write out the executed notebook to a new file. Parameters ---------- write : bool, optional Write the executed notebook to a new file, or not. Returns ------- executed_nb_path : str, ``None`` The path to the executed notebook path, or ``None`` if ``write=False``. """ with open(self.nb_path) as f: nb = nbformat.read(f, as_version=IPYTHON_VERSION) is_executed = nb['metadata'].get('docs_executed') if is_executed == 'executed' and not force: _logger.info( f"Notebook {self.nb} in {self.nb_dir} already executed, skipping" ) else: # Execute the notebook _logger.info(f"Executing notebook {self.nb} in {self.nb_dir}") t0 = time.time() clear_executor = ClearOutputPreprocessor() executor = ExecutePreprocessor(**self.execute_kwargs) # First clean up the notebook and remove any cells that have been run clear_executor.preprocess(nb, {}) try: executor.preprocess(nb, {'metadata': {'path': self.nb_dir}}) execute_dict = {'docs_executed': 'executed'} nb['metadata'].update(execute_dict) except CellExecutionError as err: execute_dict = {'docs_executed': 'errored'} nb['metadata'].update(execute_dict) _logger.error(f"Error executing notebook {self.nb}") _logger.error(err) _logger.info(f"Finished running notebook ({time.time() - t0})") _logger.info( f"Writing executed notebook to {self.executed_nb_path}") # Makes sure original notebook isn't left blank in case of error during writing if self.overwrite: with open(self.temp_nb_path, 'w', encoding='utf-8') as f: #with open(self.temp_nb_path, 'w') as f: nbformat.write(nb, f) shutil.copyfile(self.temp_nb_path, self.executed_nb_path) os.remove(self.temp_nb_path) else: with open(self.executed_nb_path, 'w', encoding='utf-8') as f: #with open(self.temp_nb_path, 'w') as f: nbformat.write(nb, f) return self.executed_nb_path
def execute(self, path): with open(path) as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=600, kernel_name='python3') ep.preprocess(nb)
def notebook_tester(fname, kernelspec='python'): raw_nb = Exporter().from_filename(fname) raw_nb[0].metadata.setdefault('kernelspec', {})['name'] = kernelspec preproc = ExecutePreprocessor(timeout=-1) preproc.preprocess(*raw_nb)
def execute_notebook(nb_path, serial_number=None, baud=None, allow_errors=True, SCOPETYPE='OPENADC', PLATFORM='CWLITEARM', **kwargs): """Execute a notebook via nbconvert and collect output. :returns (parsed nb object, execution errors) """ notebook_dir, file_name = os.path.split(nb_path) real_path = Path(nb_path).absolute() with open(real_path, encoding='utf-8') as nbfile: nb = nbformat.read(nbfile, as_version=4) orig_parameters = extract_parameters(nb) params = parameter_values(orig_parameters, SCOPETYPE=SCOPETYPE, PLATFORM=PLATFORM, **kwargs) kwargs['SCOPETYPE'] = SCOPETYPE kwargs['PLATFORM'] = PLATFORM put_all_kwargs_in_notebook(params, **kwargs) nb = replace_definitions(nb, params, execute=False) ep = ExecutePreprocessor(timeout=None, kernel_name='python3', allow_errors=allow_errors) if serial_number or baud: ip = InLineCodePreprocessor(notebook_dir) # inline all code before doing any replacements nb, resources = ip.preprocess(nb, {}) replacements = {} if serial_number: replacements.update({ r'cw.scope(\(\))': 'cw.scope(sn=\'{}\')'.format(serial_number), r'chipwhisperer.scope()': 'chipwhisperer.scope(sn=\'{}\')'.format(serial_number) }) if baud: replacements.update({ r'program_target\(((?:[\w=\+/*\s]+\s*,\s*)*[\w=+/*]+)': r"program_target(\g<1>, baud=38400" }) # %matplotlib notebook won't show up in blank plots # so replace with %matplotlib inline for now replacements.update({'%matplotlib notebook': '%matplotlib inline'}) # complete all regex subtitutions if replacements: rp = RegexReplacePreprocessor(replacements) nb, resources = rp.preprocess(nb, {}) if notebook_dir: with cd(notebook_dir): nb, resources = ep.preprocess(nb, {'metadata': {'path': './'}}) else: nb, resources = ep.preprocess(nb, {'metadata': {'path': './'}}) errors = [[i + 1, output] for i, cell in enumerate(nb.cells) if "outputs" in cell for output in cell["outputs"] \ if output.output_type == "error"] export_kwargs = {'SCOPETYPE': SCOPETYPE, 'PLATFORM': PLATFORM} return nb, errors, export_kwargs
tutorials = [] for fname in sorted(glob.glob1(src_dir, '*.ipynb')): basename = fname[:-6] output_ipynb_fname = os.path.join(target_dir, fname) output_rst_fname = os.path.join(target_dir, basename + '.rst') print('Running', fname) with open(os.path.join(src_dir, fname), 'r') as f: notebook = reads(f.read()) # The first line of the tutorial file should give the title title = notebook.cells[0]['source'].split('\n')[0].strip('# ') tutorials.append((basename, title)) # Execute the notebook preprocessor = ExecutePreprocessor() preprocessor.allow_errors = True notebook, _ = preprocessor.preprocess(notebook, {'metadata': { 'path': src_dir }}) print('Saving notebook and converting to RST') exporter = NotebookExporter() output, _ = exporter.from_notebook_node(notebook) with codecs.open(output_ipynb_fname, 'w', encoding='utf-8') as f: f.write(output) # Insert a note about ipython notebooks with a download link note = deindent(u''' .. only:: html
def execute_notebook(filename): with open(filename) as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=600, kernel_name='python3') ep.preprocess(nb, {'metadata': {'path': os.path.dirname(filename)}})
def run_notebook(notebook_path, out_path=None, nb_suffix='-out', nb_kwargs=None, hide_input=True, insert_pos=1, timeout=3600, execute_kwargs=None, ): """Runs a notebook and saves the output in a new notebook. Executes a notebook, optionally passing "arguments" in a way roughly similar to passing arguments to a function. Notebook arguments are passed in a dictionary (`nb_kwargs`) which is converted to a string containing python code, then inserted in the notebook as a code cell. The code contains only assignments of variables which can be used to control the execution of a suitably written notebook. When calling a notebook, you need to know which arguments (variables) to pass. Differently from functions, no check on the input arguments is performed. The "notebook signature" is only informally declared in a conventional markdown cell at the beginning of the notebook. Arguments: notebook_path (path-like object): path of the notebook to be executed. Valid values are strings or pathlib.Path objects. nb_suffix (string): suffix to append to the file name of the executed notebook. nb_kwargs (dict or None): If not None, this dict is converted to a string of python assignments with keys representing variables names and values variables content. This string is inserted as code-cell in the notebook to be executed. insert_pos (int): position of insertion of the code-cell containing the input arguments. Default is 1 (i.e. second cell). With this default, the input notebook can define, in the first cell, default values of input arguments (used when the notebook is executed with no arguments or through the Notebook GUI). timeout (int): timeout in seconds after which the execution is aborted. execute_kwargs (dict): additional arguments passed to `ExecutePreprocessor`. out_path (path-like or None): folder where to save the output notebook. If None, saves the notebook in the same folder as the template. Valid values are strings or pathlib.Path objects. hide_input (bool): whether to create a notebook with input cells hidden (useful to remind user that the auto-generated output is not meant to have the code edited. """ timestamp_cell = ("**Executed:** %s\n\n**Duration:** %d seconds.\n\n" "**Autogenerated from:** [%s](%s)") if nb_kwargs is not None: header = '# Cell inserted during automated execution.' code = dict_to_code(nb_kwargs) code_cell = '\n'.join((header, code)) notebook_path = Path(notebook_path) if not notebook_path.is_file(): raise FileNotFoundError("Path '%s' not found." % notebook_path) if out_path is None: out_path = notebook_path.parent out_path = Path(out_path) if not out_path.exists(): raise FileNotFoundError("Output path '%s' not found." % out_path) out_notebook_path = (out_path / ('%s%s.ipynb' % (notebook_path.stem, nb_suffix))) display(FileLink(str(notebook_path))) if execute_kwargs is None: execute_kwargs = {} ep = ExecutePreprocessor(timeout=timeout, **execute_kwargs) nb = nbformat.read(str(notebook_path), as_version=4) if hide_input: nb["metadata"].update({"hide_input": True}) if len(nb_kwargs) > 0: nb['cells'].insert(insert_pos, nbformat.v4.new_code_cell(code_cell)) start_time = time.time() try: # Execute the notebook ep.preprocess(nb, {'metadata': {'path': './'}}) except: # Execution failed, print a message then raise. msg = 'Error executing the notebook "%s".\n\n' % notebook_path msg += 'See notebook "%s" for the traceback.' % out_notebook_path print(msg) raise else: # On successful execution, add timestamping cell duration = time.time() - start_time timestamp_cell = timestamp_cell % (time.ctime(start_time), duration, notebook_path, out_notebook_path) nb['cells'].insert(0, nbformat.v4.new_markdown_cell(timestamp_cell)) finally: # Save the notebook even when it raises an error nbformat.write(nb, str(out_notebook_path)) display(FileLink(str(out_notebook_path)))
def gen_tutorials(repo_dir: str, exec_tutorials: bool, kernel_name: Optional[str] = None) -> None: """Generate HTML tutorials for Docusaurus Ax site from Jupyter notebooks. Also create ipynb and py versions of tutorial in Docusaurus site for download. """ has_errors = False with open(os.path.join(repo_dir, "website", "tutorials.json"), "r") as infile: tutorial_config = json.loads(infile.read()) # flatten config dict tutorial_configs = [ config for category in tutorial_config.values() for config in category ] # prepare paths for converted tutorials & files os.makedirs(os.path.join(repo_dir, "website", "_tutorials"), exist_ok=True) os.makedirs(os.path.join(repo_dir, "website", "static", "files"), exist_ok=True) for config in tutorial_configs: tid = config["id"] t_dir = config.get("dir") exec_on_build = config.get("exec_on_build", True) print("Generating {} tutorial".format(tid)) if t_dir is not None: tutorial_dir = os.path.join(repo_dir, "tutorials", t_dir) html_dir = os.path.join(repo_dir, "website", "_tutorials", t_dir) js_dir = os.path.join(repo_dir, "website", "pages", "tutorials", t_dir) py_dir = os.path.join(repo_dir, "website", "static", "files", t_dir) for d in [tutorial_dir, html_dir, js_dir, py_dir]: os.makedirs(d, exist_ok=True) tutorial_path = os.path.join(tutorial_dir, "{}.ipynb".format(tid)) html_path = os.path.join(html_dir, "{}.html".format(tid)) js_path = os.path.join(js_dir, "{}.js".format(tid)) ipynb_path = os.path.join(py_dir, "{}.ipynb".format(tid)) py_path = os.path.join(py_dir, "{}.py".format(tid)) tar_path = os.path.join(py_dir, "{}.tar.gz".format(tid)) else: tutorial_dir = os.path.join(repo_dir, "tutorials") tutorial_path = os.path.join(repo_dir, "tutorials", "{}.ipynb".format(tid)) html_path = os.path.join(repo_dir, "website", "_tutorials", "{}.html".format(tid)) js_path = os.path.join(repo_dir, "website", "pages", "tutorials", "{}.js".format(tid)) ipynb_path = os.path.join(repo_dir, "website", "static", "files", "{}.ipynb".format(tid)) py_path = os.path.join(repo_dir, "website", "static", "files", "{}.py".format(tid)) # load notebook with open(tutorial_path, "r") as infile: nb_str = infile.read() nb = nbformat.reads(nb_str, nbformat.NO_CONVERT) # track total exec time (non-None if exec_on_build=True) total_time = None if exec_tutorials and exec_on_build: print("Executing tutorial {}".format(tid)) kwargs = { "kernel_name": kernel_name } if kernel_name is not None else {} # 2.5 hours, in seconds timeout = int(60 * 60 * 2.5) ep = ExecutePreprocessor(timeout=timeout, **kwargs) start_time = time.time() # try / catch failures for now # will re-raise at the end try: # execute notebook, using `tutorial_dir` as working directory ep.preprocess(nb, {"metadata": {"path": tutorial_dir}}) total_time = time.time() - start_time print( "Done executing tutorial {}. Took {:.2f} seconds.".format( tid, total_time)) except Exception as exc: has_errors = True print("Couldn't execute tutorial {}!".format(tid)) print(exc) total_time = None # convert notebook to HTML exporter = HTMLExporter() html, meta = exporter.from_notebook_node(nb) # pull out html div for notebook soup = BeautifulSoup(html, "html.parser") nb_meat = soup.find("div", {"id": "notebook-container"}) del nb_meat.attrs["id"] nb_meat.attrs["class"] = ["notebook"] # when output html, iframe it (useful for Ax reports) for html_div in nb_meat.findAll("div", {"class": "output_html"}): if html_div.html is not None: iframe = soup.new_tag("iframe") iframe.attrs["src"] = "data:text/html;charset=utf-8," + str( html_div.html) # replace `#` in CSS iframe.attrs["src"] = iframe.attrs["src"].replace("#", "%23") html_div.contents = [iframe] html_out = MOCK_JS_REQUIRES + str(nb_meat) # generate HTML file with open(html_path, "w") as html_outfile: html_outfile.write(html_out) # generate JS file t_dir_js = t_dir if t_dir else "" script = TEMPLATE.format( t_dir=t_dir_js, tid=tid, total_time=total_time if total_time is not None else "null", ) with open(js_path, "w") as js_outfile: js_outfile.write(script) # output tutorial in both ipynb & py form nbformat.write(nb, ipynb_path) exporter = ScriptExporter() script, meta = exporter.from_notebook_node(nb) with open(py_path, "w") as py_outfile: py_outfile.write(script) # create .tar archive (if necessary) if t_dir is not None: with tarfile.open(tar_path, "w:gz") as tar: tar.add(tutorial_dir, arcname=os.path.basename(tutorial_dir)) if has_errors: raise Exception( "There are errors in tutorials, will not continue to publish")
def run_nb(fn): nb = nbformat.read(open(fn), as_version=nbformat.NO_CONVERT) # TODO: filter out export cells print(f"Doing {fn}") ExecutePreprocessor(timeout=600).preprocess(nb, {})
# Get the desired ipynb file path and parse into components _, fpath, outdir = sys.argv basedir, fname = os.path.split(fpath) fstem = fname[:-6] # Read the notebook with open(fpath) as f: nb = nbformat.read(f, as_version=4) # Run the notebook kernel = os.environ.get("NB_KERNEL", None) if kernel is None: kernel = nb["metadata"]["kernelspec"]["name"] ep = ExecutePreprocessor(timeout=600, kernel_name=kernel, extra_arguments=["--rc figure.dpi=88"]) ep.preprocess(nb, {"metadata": {"path": basedir}}) # Remove plain text execution result outputs for cell in nb.get("cells", {}): if "show-output" in cell["metadata"].get("tags", []): continue fields = cell.get("outputs", []) for field in fields: if field["output_type"] == "execute_result": data_keys = field["data"].keys() for key in list(data_keys): if key == "text/plain": field["data"].pop(key) if not field["data"]:
default="png") parser.add_argument('--execute', type=bool, help='execute notebook?', default=True) parser.parse_args() try: args = parser.parse_args() except SystemExit: sys.exit(0) inputFile = args.input outputFile = args.output with open(inputFile) as fpin: text = fpin.read() text += """ # <markdowncell> # If you can read this, reads_py() is no longer broken! """ nbook = v3.reads_py(text) nb = v4.upgrade(nbook) # Upgrade v3 to v4 if args.execute: ep = ExecutePreprocessor(timeout=-1,\ extra_arguments=["--InlineBackend.figure_format="+args.image]) ep.preprocess(nb, {}) with open(outputFile, 'w', encoding='utf-8') as f: nbformat.write(nb, f)
def _preproc(): pythonkernel = 'python' + str(sys.version_info[0]) return ExecutePreprocessor(timeout=120, kernel_name=pythonkernel, interrupt_on_timeout=True)
def setUp(self): pythonkernel = 'python' + str(sys.version_info[0]) # see http://nbconvert.readthedocs.io/en/stable/execute_api.html self.ep = ExecutePreprocessor(timeout=600, kernel_name=pythonkernel, interrupt_on_timeout=True)
def main(arglist): """Process IPython notebooks from a list of files.""" args = parse_args(arglist) # Filter to only ipython notebook fikes nb_paths = [ arg for arg in args.files if arg.endswith(".ipynb") and "student/" not in arg ] if not nb_paths: print("No notebook files found") sys.exit(0) # Allow environment to override stored kernel name exec_kws = {"timeout": 600} if "NB_KERNEL" in os.environ: exec_kws["kernel_name"] = os.environ["NB_KERNEL"] # Defer failures until after processing all notebooks errors = {} notebooks = {} for nb_path in nb_paths: # Load the notebook structure with open(nb_path) as f: nb = nbformat.read(f, nbformat.NO_CONVERT) if not sequentially_executed(nb): if args.require_sequntial: err = ( "Notebook is not sequentially executed on a fresh kernel." "\n" "Please do 'Restart and run all' before pushing to Github." ) errors[nb_path] = err continue # Run the notebook from top to bottom, catching errors print(f"Executing {nb_path}") executor = ExecutePreprocessor(**exec_kws) try: executor.preprocess(nb) except Exception as err: # Log the error, but then continue errors[nb_path] = err else: notebooks[nb_path] = nb if errors or args.check_only: exit(errors) # TODO Check compliancy with PEP8, generate a report, but don't fail # TODO Check notebook name format? # (If implemented, update the CI workflow to only run on tutorials) # Post-process notebooks to remove solution code and write both versions for nb_path, nb in notebooks.items(): # Extract components of the notebook path nb_dir, nb_fname = os.path.split(nb_path) nb_name, _ = os.path.splitext(nb_fname) # Loop through the cells and fix any Colab badges we encounter for cell in nb.get("cells", []): if has_colab_badge(cell): redirect_colab_badge_to_master_branch(cell) # Set the colab metadata to have the notebook name match the filepath if "colab" in nb["metadata"]: nb["metadata"]["colab"]["name"] = f"NeuromatchAcademy_{nb_name}" # Write out the executed version of the original notebooks print(f"Writing complete notebook to {nb_path}") with open(nb_path, "w") as f: nbformat.write(nb, f) # Create subdirectories, if they don't exist student_dir = make_sub_dir(nb_dir, "student") static_dir = make_sub_dir(nb_dir, "static") solutions_dir = make_sub_dir(nb_dir, "solutions") # Generate the student version and save it to a subdirectory print(f"Extracting solutions from {nb_path}") processed = extract_solutions(nb, nb_dir, nb_name) student_nb, static_images, solution_snippets = processed # Loop through cells and point the colab badge at the student version for cell in student_nb.get("cells", []): if has_colab_badge(cell): redirect_colab_badge_to_student_version(cell) # Write the student version of the notebook student_nb_path = os.path.join(student_dir, nb_fname) print(f"Writing student notebook to {student_nb_path}") with open(student_nb_path, "w") as f: nbformat.write(student_nb, f) # Write the images extracted from the solution cells print(f"Writing solution images to {static_dir}") for fname, image in static_images.items(): fname = fname.replace("static", static_dir) image.save(fname) # Write the solution snippets print(f"Writing solution snippets to {solutions_dir}") for fname, snippet in solution_snippets.items(): fname = fname.replace("solutions", solutions_dir) with open(fname, "w") as f: f.write(snippet) exit(errors)
def jupytext_single_file(nb_file, args, log): """Apply the jupytext commmand, with given arguments, to a single file""" if nb_file == '-' and args.sync: raise ValueError('Cannot sync a notebook on stdin') nb_dest = args.output or (None if not args.to else ( '-' if nb_file == '-' else full_path( base_path(nb_file, args.input_format), args.to))) # Just acting on metadata / pipe => save in place if not nb_dest and not args.sync: nb_dest = nb_file if nb_dest == '-': args.quiet = True # I. ### Read the notebook ### fmt = copy(args.input_format) or {} set_format_options(fmt, args.format_options) log('[jupytext] Reading {}{}'.format( nb_file if nb_file != '-' else 'stdin', ' in format {}'.format( short_form_one_format(fmt)) if 'extension' in fmt else '')) notebook = read(nb_file, fmt=fmt) if not fmt: text_representation = notebook.metadata.get('jupytext', {}).get( 'text_representation', {}) ext = os.path.splitext(nb_file)[1] if text_representation.get('extension') == ext: fmt = { key: text_representation[key] for key in text_representation if key in ['extension', 'format_name'] } elif ext: fmt = {'extension': ext} # Compute actual extension when using script/auto, and update nb_dest if necessary dest_fmt = args.to if dest_fmt and dest_fmt['extension'] == '.auto': dest_fmt = check_auto_ext(dest_fmt, notebook.metadata, '--to') if not args.output and nb_file != '-': nb_dest = full_path(base_path(nb_file, args.input_format), dest_fmt) # Set the kernel set_kernel = args.set_kernel if (not set_kernel) and args.execute and notebook.metadata.get( 'kernelspec', {}).get('name') is None: set_kernel = '-' if set_kernel: if set_kernel == '-': language = notebook.metadata.get('jupytext', {}).get('main_language') \ or notebook.metadata['kernelspec']['language'] if not language: raise ValueError( 'Cannot infer a kernel as notebook language is not defined' ) kernelspec = kernelspec_from_language(language) if not kernelspec: raise ValueError('Found no kernel for {}'.format(language)) else: try: kernelspec = get_kernel_spec(set_kernel) except KeyError: raise KeyError('Please choose a kernel name among {}'.format( find_kernel_specs().keys())) kernelspec = { 'name': args.set_kernel, 'language': kernelspec.language, 'display_name': kernelspec.display_name } log("[jupytext] Setting kernel {}".format(kernelspec.get('name'))) args.update_metadata['kernelspec'] = kernelspec # Update the metadata if args.update_metadata: log("[jupytext] Updating notebook metadata with '{}'".format( json.dumps(args.update_metadata))) # Are we updating a text file that has a metadata filter? #212 if notebook.metadata.get('jupytext', {}).get('notebook_metadata_filter') == '-all': notebook.metadata.get('jupytext', {}).pop('notebook_metadata_filter') recursive_update(notebook.metadata, args.update_metadata) if 'kernelspec' in args.update_metadata and 'main_language' in notebook.metadata.get( 'jupytext', {}): notebook.metadata['jupytext'].pop('main_language') # Read paired notebooks, except if the pair is being created if args.sync: set_prefix_and_suffix(fmt, notebook, nb_file) if args.set_formats is None: try: notebook, inputs_nb_file, outputs_nb_file = load_paired_notebook( notebook, fmt, nb_file, log) except NotAPairedNotebook as err: sys.stderr.write('[jupytext] Warning: ' + str(err) + '\n') return 0 # II. ### Apply commands onto the notebook ### # Pipe the notebook into the desired commands prefix = None if nb_file == '-' else os.path.splitext( os.path.basename(nb_file))[0] for cmd in args.pipe or []: notebook = pipe_notebook(notebook, cmd, args.pipe_fmt, prefix=prefix) # and/or test the desired commands onto the notebook for cmd in args.check or []: pipe_notebook(notebook, cmd, args.pipe_fmt, update=False, prefix=prefix) # Execute the notebook if args.execute: kernel_name = notebook.metadata.get('kernelspec', {}).get('name') log("[jupytext] Executing notebook with kernel {}".format(kernel_name)) exec_proc = ExecutePreprocessor(timeout=None, kernel_name=kernel_name) if nb_dest is not None and nb_dest != '-': resources = {'metadata': {'path': str(os.path.dirname(nb_dest))}} elif nb_file != '-': resources = {'metadata': {'path': str(os.path.dirname(nb_file))}} else: resources = {} exec_proc.preprocess(notebook, resources=resources) # III. ### Possible actions ### modified = args.update_metadata or args.pipe or args.execute # a. Test round trip conversion if args.test or args.test_strict: try: # Round trip from an ipynb document if fmt['extension'] == '.ipynb': test_round_trip_conversion( notebook, dest_fmt, update=args.update, allow_expected_differences=not args.test_strict, stop_on_first_error=args.stop_on_first_error) # Round trip from a text file else: with open(nb_file) as fp: org_text = fp.read() # If the destination is not ipynb, we convert to/back that format if dest_fmt['extension'] != '.ipynb': dest_text = writes(notebook, fmt=dest_fmt) notebook = reads(dest_text, fmt=dest_fmt) text = writes(notebook, fmt=fmt) if args.test_strict: compare(text, org_text) else: # we ignore the YAML header in the comparison #414 comment = _SCRIPT_EXTENSIONS.get(fmt['extension'], {}).get('comment', '') # white spaces between the comment char and the YAML delimiters are allowed if comment: comment = comment + r'\s*' yaml_header = re.compile( r'^{comment}---\s*\n.*\n{comment}---\s*\n'.format( comment=comment), re.MULTILINE | re.DOTALL) compare(re.sub(yaml_header, '', text), re.sub(yaml_header, '', org_text)) except (NotebookDifference, AssertionError) as err: sys.stdout.write('{}: {}'.format(nb_file, str(err))) return 1 return 0 # b. Output to the desired file or format if nb_dest: if nb_dest == nb_file and not dest_fmt: dest_fmt = fmt # Test consistency between dest name and output format if dest_fmt and nb_dest != '-': base_path(nb_dest, dest_fmt) # Describe what jupytext is doing if os.path.isfile(nb_dest) and args.update: if not nb_dest.endswith('.ipynb'): raise ValueError('--update is only for ipynb files') action = ' (destination file updated)' check_file_version(notebook, nb_file, nb_dest) combine_inputs_with_outputs(notebook, read(nb_dest), fmt=fmt) elif os.path.isfile(nb_dest): action = ' (destination file replaced)' else: action = '' log('[jupytext] Writing {nb_dest}{format}{action}'.format( nb_dest=nb_dest, format=' in format ' + short_form_one_format(dest_fmt) if dest_fmt and 'format_name' in dest_fmt else '', action=action)) write(notebook, nb_dest, fmt=dest_fmt) if args.pre_commit: system('git', 'add', nb_dest) # c. Synchronize paired notebooks if args.sync: # Also update the original notebook if the notebook was modified if modified: inputs_nb_file = outputs_nb_file = None formats = notebook.metadata['jupytext']['formats'] for ipynb in [True, False]: # Write first format last so that it is the most recent file for alt_path, alt_fmt in paired_paths(nb_file, fmt, formats)[::-1]: # Write ipynb first for compatibility with our contents manager if alt_path.endswith('.ipynb') != ipynb: continue # Do not write the ipynb file if it was not modified # But, always write text representations to make sure they are the most recent if alt_path == inputs_nb_file and alt_path == outputs_nb_file: continue log("[jupytext] Updating '{}'".format(alt_path)) write(notebook, alt_path, fmt=alt_fmt) if args.pre_commit: system('git', 'add', alt_path) elif os.path.isfile(nb_file) and nb_dest.endswith('.ipynb') and not nb_file.endswith('.ipynb') and \ notebook.metadata.get('jupytext', {}).get('formats') is not None: # Update the original text file timestamp, as required by our Content Manager # Otherwise Jupyter will refuse to open the paired notebook #335 log("[jupytext] Sync timestamp of '{}'".format(nb_file)) os.utime(nb_file, None) return 0
def run_notebook(path): nb = nbformat.read(open(path), as_version=nbformat.NO_CONVERT) ExecutePreprocessor().preprocess(nb, {}) print('done')
def compile_tutorial(tutorial_name, force_recompile=False): print('- Compiling tutorial ' + tutorial_name + '...') notebook_path = 'tutorial_notebooks/' + tutorial_name + '/' + tutorial_name + '.ipynb' export_path = 'tutorials/' + tutorial_name + '/' + tutorial_name thumb_dest = os.path.dirname(export_path) + '/thumb.png' if not os.path.exists(os.path.dirname(export_path)): os.makedirs(os.path.dirname(export_path)) # Read in notebook notebook = nbformat.read(notebook_path, 4) # Scrape title, description and thumbnail first_cell = notebook.cells[0] title = first_cell.source.splitlines()[0] if '#' in title: title = title.replace('#', '').strip() description = '' for line in first_cell.source.splitlines()[1:]: if line.strip(): description = line.strip() break if not description: print(' Description could not be found in the notebook.') if 'thumbnail_figure_index' in notebook.metadata: thumbnail_figure_index = notebook.metadata['thumbnail_figure_index'] else: thumbnail_figure_index = -1 if 'level' in notebook.metadata: level = notebook.metadata['level'].capitalize() elif 'difficulty' in notebook.metadata: level = notebook.metadata['difficulty'].capitalize() else: level = 'Unknown' # Check if the tutorial was already compiled. if os.path.exists(export_path + '.rst'): if os.path.getmtime(export_path + '.rst') > os.path.getmtime(notebook_path): if force_recompile: print(' Already compiled. Recompiling anyway...') else: print(' Already compiled. Skipping...') return title, level, description, thumb_dest.split('/', 1)[-1] # Execute notebook if not already executed already_executed = any( c.get('outputs') or c.get('execution_count') for c in notebook.cells if c.cell_type == 'code') resources = {} if not already_executed: ep = ExecutePreprocessor(timeout=600, kernel_name='python3') try: start = time.time() additional_cell_1 = { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": r"%matplotlib inline" + '\n' + r"%config InlineBackend.print_figure_kwargs = {'bbox_inches': None, 'figsize': (8, 6)}" } additional_cell_2 = { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": "import matplotlib as mpl\nmpl.rcParams['figure.figsize'] = (8, 6)\nmpl.rcParams['figure.dpi'] = 150\nmpl.rcParams['savefig.dpi'] = 150" } notebook.cells.insert(1, nbformat.from_dict(additional_cell_1)) notebook.cells.insert(2, nbformat.from_dict(additional_cell_2)) km, kc = ep.start_new_kernel( cwd=os.path.abspath(os.path.dirname(notebook_path))) kc.allow_stdin = False notebook, resources = ep.preprocess(notebook, km=km) notebook.cells.pop(2) notebook.cells.pop(1) km.shutdown_kernel() end = time.time() print(' Compilation took %d seconds.' % (end - start)) except CellExecutionError as err: print(' Error while processing notebook:') print(' ', err) else: print(' Notebook was already executed.') exporter = RSTExporter() output, resources = exporter.from_notebook_node(notebook, resources) writer = FilesWriter(build_directory=os.path.dirname(export_path)) writer.write(output, resources, notebook_name=os.path.basename(export_path)) pictures = sorted(resources['outputs'], key=output.find) try: thumbnail_source = pictures[thumbnail_figure_index] # Read in thumbnail source image img = Image.open(os.path.dirname(export_path) + '/' + thumbnail_source) # Trim whitespace bg = Image.new(img.mode, img.size, img.getpixel((0, 0))) diff = ImageChops.difference(img, bg) diff = ImageChops.add(diff, diff) bbox = diff.getbbox() if bbox: img = img.crop(bbox) # Resize image to have a width of 400px img.thumbnail([400, 1000]) # Save thumbnail img.save(thumb_dest) except: shutil.copyfile('_static/no_thumb.png', thumb_dest) print(' Done!') return title, level, description, thumb_dest.split('/', 1)[-1]
def test_notebook_execution_with_pandas_backend( titanic_data_context_no_data_docs_no_checkpoint_store, ): """ This tests that the notebook is written to disk and executes without error. To set this test up we: - create a scaffold notebook - verify that no validations have happened We then: - execute that notebook (Note this will raise various errors like CellExecutionError if any cell in the notebook fails - create a new context from disk - verify that a validation has been run with our expectation suite """ # Since we'll run the notebook, we use a context with no data docs to avoid # the renderer's default behavior of building and opening docs, which is not # part of this test. context = titanic_data_context_no_data_docs_no_checkpoint_store root_dir = context.root_directory uncommitted_dir = os.path.join(root_dir, "uncommitted") suite_name = "my_suite" suite = context.create_expectation_suite(suite_name) csv_path = os.path.join(root_dir, "..", "data", "Titanic.csv") batch_kwargs = {"datasource": "mydatasource", "path": csv_path} # Sanity check test setup assert context.list_expectation_suite_names() == [suite_name] assert context.list_datasources() == [{ "module_name": "great_expectations.datasource", "class_name": "PandasDatasource", "data_asset_type": { "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, "batch_kwargs_generators": { "mygenerator": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data", } }, "name": "mydatasource", }] assert context.get_validation_result(suite_name) == {} notebook_path = os.path.join(uncommitted_dir, f"{suite_name}.ipynb") assert not os.path.isfile(notebook_path) # Create notebook renderer = SuiteScaffoldNotebookRenderer( titanic_data_context_no_data_docs_no_checkpoint_store, suite, batch_kwargs) renderer.render_to_disk(notebook_path) assert os.path.isfile(notebook_path) with open(notebook_path) as f: nb = nbformat.read(f, as_version=4) # Run notebook ep = ExecutePreprocessor(timeout=600, kernel_name="python3") ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}}) # Useful to inspect executed notebook output_notebook = os.path.join(uncommitted_dir, "output.ipynb") with open(output_notebook, "w") as f: nbformat.write(nb, f) # Assertions about output context = DataContext(root_dir) obs_validation_result = context.get_validation_result(suite_name) assert obs_validation_result.statistics == { "evaluated_expectations": 3, "successful_expectations": 3, "unsuccessful_expectations": 0, "success_percent": 100, } suite = context.get_expectation_suite(suite_name) assert suite.expectations
def setUp(self): from nbconvert.preprocessors import ExecutePreprocessor self.preprocessor = ExecutePreprocessor(timeout=600, enabled=True, allow_errors=False)
def run_pipeline(self, arg, line='', cell='', local_ns=None): """Run notebooks sequentially in a pipeline. A dictionary called _pipeline_workspace is created by the magic that will be shared by all the notebooks in the pipeline. The state can contain DataFrames, Lists, Dictionaries and objects. Notebook parameterization can be used to load and read from the shared state. The pipeline supports execution of parameterized notebooks. If parameters are used, the first code cell will be treated to contain only parameter assignments. Parameters can be a string, number, list or dictionary. To save a notebook's execution in the pipeline, the save name should be specified along with the execution notebook separated with a colon. Run parameters will only change their equivalent parameters from the first code cell. Unknown parameters will be ignored. Adding parameters on an execution is optional. # simple pipeline Example1: %%run_pipeline first notebook in pipeline; second notebook in pipeline; third notebook in pipeline # pipleine with parameterized notebooks Example2: %%run_pipeline first notebook in pipeline key01=int key01=string key02={'key01': param01}; second notebook in pipeline; third notebook in pipeline:your save name key01=int key02=string key03=[param01, param02] """ # save globals and locals so they can be referenced in bind vars clear_namespace_cell = nbformat.v4.new_code_cell( source="from IPython import get_ipython\n" + "_ip = get_ipython()\n" + "_user_vars = %who_ls\n" + "for _var in _user_vars:\n" + " if _var != '_pipeline_workspace':\n" + " del _ip.user_ns[_var]\n" + "import gc\n" + "gc.collect()" ) pipeline_state_cell = nbformat.v4.new_code_cell(source="_pipeline_workspace = {'frames': list()}") if not (line or cell): if not arg.startswith("-"): line = arg arg = '' args = ParameterArgs(parse_argstring(self.run, arg)) user_ns = self.shell.user_ns.copy() if local_ns: user_ns.update(local_ns) if not cell: cell = line notebook_run_cmds = cell.split(';') notebook_run_cmds = [notebook_run_cmd.strip() for notebook_run_cmd in notebook_run_cmds] execute_preprocessor = ExecutePreprocessor(kernel_name='python3', timeout=args.get('cell_timeout')) kernel_manager, kernel_comm = start_new_kernel(kernel_name='python3') execute_preprocessor.km = kernel_manager execute_preprocessor.kc = kernel_comm def execute_cell(nb4_cell): try: execute_preprocessor.run_cell(nb4_cell) except BaseException: if kernel_manager or kernel_comm: kernel_comm.stop_channels() kernel_manager.shutdown_kernel() def execute_notebook(notebook_filename, notebook_save_filename, params): with open(notebook_filename) as file_handler: notebook = nbformat.read(file_handler, as_version=4) b_errors = False if params: for nb_cell in notebook.cells: if nb_cell.cell_type == 'code': new_cell_source = utils.substitute_params(nb_cell.source, params) nb_cell.source = new_cell_source break try: execute_preprocessor.nb = notebook progress_bar = widgets.IntProgress( value=0, min=0, max=len(notebook.cells), step=1, bar_style='info', # 'success', 'info', 'warning', 'danger' or '' orientation='horizontal' ) display_label = notebook_filename if notebook_save_filename: display_label = display_label + ' : ' + notebook_save_filename display(widgets.HBox([widgets.Label(display_label), progress_bar])) for idx, nb_cell in enumerate(notebook.cells): execute_preprocessor.preprocess_cell(nb_cell, resources={'metadata': {}}, cell_index=idx) progress_bar.value = idx + 1 except CellExecutionError: b_errors = True progress_bar.bar_style = 'danger' if kernel_manager or kernel_comm: kernel_comm.stop_channels() kernel_manager.shutdown_kernel() raise finally: if notebook_save_filename: with open(notebook_save_filename, mode='wt') as file_handler: nbformat.write(notebook, file_handler) if not b_errors: progress_bar.bar_style = 'success' execute_cell(pipeline_state_cell) for notebook_run_cmd in notebook_run_cmds: run_notebook_name, notebook_save_name, nb_params = utils.parse_run_str(notebook_run_cmd) execute_notebook(run_notebook_name, notebook_save_name, nb_params) execute_cell(clear_namespace_cell) if kernel_manager or kernel_comm: kernel_comm.stop_channels() kernel_manager.shutdown_kernel()