def test__output_conv_ipynb(self): nb_cell = nbformat.NotebookNode({ 'cell_type': 'markdown', 'metadata': {}, 'source': "## Test" }) nb = nbformat.NotebookNode({ 'cells': [nb_cell], 'metadata': {}, 'nbformat': 4, 'nbformat_minor': 4 }) ret = self.output._output_conv(nb) self.assertEqual(type(ret).__name__, 'HTML')
def test_add_parameters_tag(source): with open("jovian_papermill/tests/resources/notebook1.ipynb") as f: nb = nbformat.read(f, as_version=4) nb.cells[0]["source"] = source nb = nbformat.NotebookNode( json.loads(add_parameters_tag(json.dumps(nb)))) assert "parameters" in nb.cells[0]["metadata"]["tags"]
async def async_exec_code(self, source, write_cell=settings.dev_mode): """ Execute code in a Kernel Parameters ---------- source (str): Execute this code write_cell (bool, default=None): Write a new cell to the notebook Returns ------- NotebookNode """ cell = nbformat.NotebookNode() cell.cell_type = "code" cell.execution_count = self.code_cells_executed + 1 cell.metadata = {} cell.outputs = [] cell.source = source self.nb["cells"].append(cell) cell_index = len(self.nb["cells"]) - 1 cell = await self.async_execute_cell( cell, cell_index, execution_count=cell.execution_count ) if write_cell == False: # Delete created cell del self.nb["cells"][cell_index] return cell
async def fix_notebook(self, notebook): """Returns a notebook object with a valid kernelspec. In case the kernel is not found, we search for a matching kernel based on the language. """ # Fetch kernel name from the notebook metadata if 'kernelspec' not in notebook.metadata: notebook.metadata.kernelspec = nbformat.NotebookNode() kernelspec = notebook.metadata.kernelspec kernel_name = kernelspec.get('name', self.kernel_manager.default_kernel_name) # We use `maybe_future` to support RemoteKernelSpecManager all_kernel_specs = await tornado.gen.maybe_future( self.kernel_spec_manager.get_all_specs()) # Find a spec matching the language if the kernel name does not exist in the kernelspecs if kernel_name not in all_kernel_specs: missing_kernel_name = kernel_name kernel_name = await self.find_kernel_name_for_language( kernelspec.language.lower(), kernel_specs=all_kernel_specs) self.log.warning('Could not find a kernel named %r, will use %r', missing_kernel_name, kernel_name) # We make sure the notebook's kernelspec is correct notebook.metadata.kernelspec.name = kernel_name notebook.metadata.kernelspec.display_name = all_kernel_specs[ kernel_name]['spec']['display_name'] notebook.metadata.kernelspec.language = all_kernel_specs[kernel_name][ 'spec']['language'] return notebook
def test_add_parameters_tag_raises_exception(source): with open("jovian_papermill/tests/resources/notebook1.ipynb") as f: nb = nbformat.read(f, as_version=4) nb.cells[0]["source"] = source with pytest.raises(Exception): nb = nbformat.NotebookNode( json.loads(add_parameters_tag(json.dumps(nb))))
def recombine(directory): directory = pathlib.Path(directory) with (directory / 'metadata.json').open() as f: metadata = json.load(f) nb = nbf.v4.new_notebook(metadata=metadata) with (directory / 'cells_sequence').open() as f: cells_sequence = f.read().splitlines() for cell_id in cells_sequence: cell_dir = directory / cell_id source_file = list(cell_dir.glob('source.*'))[0] if source_file.suffix == '.md': with source_file.open() as f: cell = nbf.v4.new_markdown_cell(f.read()) elif source_file.suffix == '.txt': with source_file.open() as f: cell = nbf.NotebookNode(cell_type='raw', source=f.read(), metadata=nbf.NotebookNode()) else: with source_file.open() as f: cell = nbf.v4.new_code_cell(f.read()) nb.cells.append(cell) if (cell_dir / 'metadata.json').exists(): with (cell_dir / 'metadata.json').open() as f: cell.metadata = nbf.from_dict(json.load(f)) cell.metadata['nbexplode_cell_id'] = cell_id if not (cell_dir / 'outputs_sequence').exists(): continue with (cell_dir / 'outputs_sequence').open() as f: outputs_seq = f.read().splitlines() cell.outputs = [ recombine_output(cell_dir, i, info) for (i, info) in enumerate(outputs_seq, start=1) ] return nb
def preprocess_cell(self, cell, resources, cell_index): if cell.cell_type != 'code': return cell, resources key = self.cache_key(cell.source, cell_index) if key in self.cache: self.log.debug("Cache hit[%i]: %s", cell_index, key) cell.outputs = [ nbformat.NotebookNode(output) for output in self.cache[key] ] else: # Apply the warnings filter. Fix for the edx converter. if self.warnings != "default": self.kc.execute("import warnings; " "warnings.simplefilter('{}')".format( self.warnings)) outputs = self.run_cell(cell, cell_index) # allow_errors inherited from ExecutePreprocessor: by default, is False. if not self.allow_errors: for out in outputs: if out.output_type == 'error': # If the current cell is not a setup cell, inform of error # and continue to the next cell without storing output if cell_index >= self.setup_cells: pattern = """\ An error occurred while executing the following cell: ------------------ {cell.source} ------------------ {out.ename}: {out.evalue} """ print(dedent(pattern).format(out=out, cell=cell), file=sys.stderr) return cell, resources else: # If current cell is a setup cell, do not run more cells pattern = """\ An error occurred while executing setup cell number {cell_index}. No further cells will be run. ------------------ {out.ename}: {out.evalue} """ msg = dedent(pattern).format(out=out, cell_index=cell_index) raise CellExecutionError(msg) # If no error, store output of cell cell.outputs = outputs self.cache[key] = cell.outputs else: # If we don't check for errors, store output of cell self.outputs = outputs self.cache[key] = cell.outputs return cell, resources
def run_cell(shell, iopub, cell, kc): # print cell.source #shell.execute(cell.source) kc.execute(cell.source) # wait for finish, maximum 20s shell.get_msg(timeout=1) # was 20 outs = [] while True: try: msg = iopub.get_msg(timeout=0.2) except Empty: break msg_type = msg['msg_type'] if msg_type in ('status', 'execute_input'): continue elif msg_type == 'clear_output': outs = [] continue content = msg['content'] # print msg_type, content out = nbformat.NotebookNode(output_type=msg_type) if msg_type == 'stream': out.stream = content['name'] out.text = content['text'] out.data = content['text'] out.name = content['name'] elif msg_type in ('display_data', 'pyout', 'execute_result'): out['metadata'] = content['metadata'] for mime, data in content['data'].items(): attr = mime.split('/')[-1].lower() # this gets most right, but fix svg+html, plain attr = attr.replace('+xml', '').replace('plain', 'text') setattr(out, attr, data) out.data = content['data'] if msg_type in ('execute_result', 'pyout'): out.execution_count = content['execution_count'] elif msg_type in ('pyerr', 'error'): out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] else: print("unhandled iopub msg:", msg_type) outs.append(out) return outs
def insertRef(self): nbDoc = nbformat.read(self.nbFileName, as_version=4) nbCells = nbDoc['cells'] markdownCells = [x for x in nbCells if x['cell_type'] == 'markdown'] hasRef = False for mdc in markdownCells: print (mdc['source']) if 'pluto.studio' in mdc['source']: hasRef = True break if not hasRef: outObj = nbformat.NotebookNode(cell_type='markdown', metadata={}, source=["This notebook was created using [pluto](http://pluto.studio). Learn more [here](https://github.com/shyams80/pluto)"]) nbCells.append(outObj) nbformat.write(nbDoc, self.nbFileName, version=4)
def split_cells(): cells = dropwhile((lambda cell: cell.cell_type != 'markdown'), nb.cells) for cell in cells: if cell.cell_type != 'markdown': yield cell else: split_sources = re.split('(^# .*$)', cell.source, flags=re.MULTILINE) for src in split_sources: yield nbformat.NotebookNode( source=src, cell_type='markdown', metadata={}, )
def post_process(language, cell, nodes, cell_config) -> nbformat.NotebookNode: """Construct what should be written to the contents for this cell. This simply creates a new raw cell containing everything - not because it's the best solution but the easiest for my use case. TODO figure out a better way that also accommodates potential HTML/interactive output better Modifies cell in-place (#4). """ out = [config["cell.source"].format(language=language, cell=cell)] for node in nodes: assert isinstance(node, nbformat.NotebookNode) # https://nbformat.readthedocs.io/en/latest/format_description.html if node.output_type == "execute_result": out.append(config["node.execute_result"].format(node=node)) elif node.output_type == "stream": # TODO use tags/raises-exception like in pytest (not raising raises error) # if <wherever that tags thing is> is set: # raise DidNotRaise(f"should have raised but didn't:\n{node}") out.append(config["node.stream"].format(node=node)) elif node.output_type == "error": if not cell_config["metadata.allow_errors"]: raise ErrorsNotAllowed( f"raised but errors not allowed:\n{node}") if cell_config["metadata.full_traceback"]: # TODO handle ANSI terminal colors stuff # see if how jupyter does it is reusable # to keep colours this would need to be HTML though out.append("".join(node.traceback)) else: out.append(config["node.exception"].format(node=node)) else: raise UnhandledOutputType( f"{node.output_type=} unknown - {cell.source=}") return nbformat.NotebookNode({ "cell_type": "raw", "metadata": {}, "source": "\n".join(out) })
This script can strip solution cells or insert grading cells in Jupyter notebooks. """ import argparse import nbformat as nb import os.path # Configuration Variables SOLN_SUFFIX = '-solutions.ipynb' SOLN_HEADER = '#### SOLUTION' # header for solution cells EXER_HEADER = '__Exercise' # header for exercise cells GRADE_HEADER = '<strong style="color:#F00">\nGrade: \n</strong>' GRADE_CELL = nb.NotebookNode(cell_type='markdown', metadata={}, source=GRADE_HEADER) def nb_strip(path): if not path.endswith(SOLN_SUFFIX): msg = "Error: input path '{}' doesn't end with '{}'." print(msg.format(path, SOLN_SUFFIX)) return out_path = path[:-len(SOLN_SUFFIX)] + '.ipynb' if os.path.exists(out_path): print("Error: output path '{}' already exists.".format(out_path)) return # Strip solution cells.
def insert_cell(self, index: int, cell: dict): new_cell = nbformat.NotebookNode(cell) self.nb_node.cells.insert(index, new_cell)
def run_cell(kernel_client, cell, timeout=300): if not hasattr(cell, 'source'): return [], False kernel_client.execute(cell.source) # wait for finish, maximum 5min by default reply = kernel_client.get_shell_msg(timeout=timeout)['content'] if reply['status'] == 'error': failed = True print("\nFAILURE:") print(cell.source) print('-----') print("raised:") print('\n'.join(reply['traceback'])) else: failed = False # Collect the outputs of the cell execution outs = [] while True: try: msg = kernel_client.get_iopub_msg(timeout=0.2) except Empty: break msg_type = msg['msg_type'] if msg_type in ('status', 'execute_input'): continue elif msg_type == 'clear_output': outs = [] continue content = msg['content'] out = nbformat.NotebookNode(output_type=msg_type) if msg_type == 'stream': out.name = content['name'] out.text = content['text'] elif msg_type in ('display_data', 'execute_result'): for mime, data in content['data'].items(): attr = mime.split('/')[-1].lower() # this gets most right, but fix svg+html, plain attr = attr.replace('+xml', '').replace('plain', 'text') setattr(out, attr, data) if msg_type == 'execute_result': out.execution_count = content['execution_count'] elif msg_type == 'error': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] elif msg_type == 'execute_input': print(content) else: print("unhandled iopub msg: %s" % msg_type) outs.append(out) # Special handling of ipcluster restarts if '!ipcluster stop' in cell.source: # wait some time for cluster commands to complete for i in range(10): try: if len(Client()) == 0: break except FileNotFoundError: pass sys.stdout.write("@") sys.stdout.flush() time.sleep(5) if '!ipcluster start' in cell.source: # wait some time for cluster commands to complete for i in range(10): try: if len(Client()) > 0: break except FileNotFoundError: pass sys.stdout.write("#") sys.stdout.flush() time.sleep(5) return outs, failed
original_nb = nbformat.read(file_path, nbformat.NO_CONVERT) nb_sans_cells = notebook_without_cells(original_nb) nb_sans_cells['cells'] = [] for cell in original_nb.cells: # only include cells that do not contain the following pattern if remove_cell_with(cell['source'], '# To avoid duplication'): # a list for all the cells that should be preserved new_lines = [] # go through the cell source and remove all unneeded lines and text for line in iterlines(cell['source']): new_line = remove_ex_comment(line) new_line = remove_ex(new_line) new_line = remove_line_with(new_line, '#remove_next') new_lines.append(new_line) # combine preserved lines into single string new_source = combine_lines(new_lines) # construct a new cell new_cell = {k: v for k, v in cell.iteritems() if k != u'source'} # add the cell source new_cell[u'source'] = new_source # convert cell to NotebookNode new_cell = nbformat.NotebookNode(new_cell) # add cell to the new notebook nb_sans_cells['cells'].append(new_cell) new_nb = nbformat.NotebookNode(nb_sans_cells) new_path = path.splitext(file_path) new_path = new_path[0] + '_clean' + new_path[1] nbformat.write(new_nb, new_path)
def md2nbcell(md: str) -> NotebookNode: """Convert markdown to Jupyter notebook cell.""" data = {"cell_type": "markdown", "metadata": {}, "source": md} cell = nbformat.NotebookNode(**data) return cell
def run_code(self, code): "Useful for debug, run arbitrary line of code" cell = nbformat.NotebookNode(source=code) return self.run_cell(cell)
def run_cell(kc, cell, timeout=300): if not hasattr(cell, 'input'): return [], False kc.execute(cell.input) # wait for finish, maximum 5min by default reply = kc.get_shell_msg(timeout=timeout)['content'] if reply['status'] == 'error': failed = True print("\nFAILURE:") print(cell.input) print('-----') print("raised:") print('\n'.join(reply['traceback'])) else: failed = False # Collect the outputs of the cell execution outs = [] while True: try: msg = kc.get_iopub_msg(timeout=0.2) except Empty: break msg_type = msg['msg_type'] if msg_type in ('status', 'pyin', 'execute_input'): continue elif msg_type == 'clear_output': outs = [] continue content = msg['content'] # IPython 3 writes pyerr/pyout in the notebook format but uses # error/execute_result in the message spec. This does the translation # needed for tests to pass with IPython 3 notebook3_format_conversions = { 'error': 'pyerr', 'execute_result': 'pyout' } msg_type = notebook3_format_conversions.get(msg_type, msg_type) out = nbformat.NotebookNode(output_type=msg_type) if 'execution_count' in content: cell['prompt_number'] = content['execution_count'] out.prompt_number = content['execution_count'] if msg_type == 'stream': out.stream = content['name'] # in msgspec 5, this is name, text # in msgspec 4, this is name, data if 'text' in content: out.text = content['text'] else: out.text = content['data'] elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): attr = mime.split('/')[-1].lower() # this gets most right, but fix svg+html, plain attr = attr.replace('+xml', '').replace('plain', 'text') setattr(out, attr, data) elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] else: print("unhandled iopub msg: %s" % msg_type) outs.append(out) # Special handling of ipcluster restarts if '!ipcluster stop' in cell.input: # wait some time for cluster commands to complete for i in range(10): try: if len(Client()) == 0: break except OSError: pass sys.stdout.write("@") sys.stdout.flush() time.sleep(5) if '!ipcluster start' in cell.input: # wait some time for cluster commands to complete for i in range(10): try: if len(Client()) > 0: break except OSError: pass sys.stdout.write("#") sys.stdout.flush() time.sleep(5) return outs, failed
"Nbconvert is not installed. To install it use: \n pip install nbconvert \n or visit: http://nbconvert.readthedocs.io/en/latest/install.html. \n OS error: {0}" .format(err)) raise from StringIO import StringIO import codecs from nbformat.v4.nbbase import (new_code_cell, new_markdown_cell, new_notebook, new_output, new_raw_cell) infilename = sys.argv[1] outfilename = sys.argv[2] f = open(infilename) # lines = f.readlines() # f.close() # text = "".join(lines) nb = nbformat.read(infilename, 4) nb_new = nbformat.NotebookNode() cells_new = [] offset = 0 reright = 0 for cell in nb.cells: if cell["cell_type"] == "code": if cell["source"] == '%jsroot on': offset = -1 elif cell["source"] == '%jsroot off': offset = -1 else: # print cell.execution_count # print type(cell.execution_count) cells_new.append( new_code_cell(source=cell.source,
def overwrite_cell(self, index: int, cell: dict): new_cell = nbformat.NotebookNode(cell) self.nb_node.cells[index] = new_cell
def loop(): request = db.q.find_one({'isProcessed': False}) if request == None: return subprocess.run(shlex.split("ufw allow out to any port 443"), env=os.environ, errors=True) subprocess.run(shlex.split("ufw deny out to any port 27017"), env=os.environ, errors=True) print(request['_id']) githubAcc = Github(request['githubTok']) user = githubAcc.get_user() repo = user.get_repo("plutons") githubUserName = request['githubUser'] print(f"processing for: {githubUserName}") qId = ObjectId(request['_id']) fullPath = request['file'] notebook = gzip.decompress(request['notebook']) tempFileName = plutoPath + fullPath[fullPath.rfind('/') + 1:] print(tempFileName) with open(tempFileName, mode='wb') as file: file.write(notebook) subprocess.run(shlex.split(f"chmod 666 {tempFileName}"), env=os.environ, errors=True) insertRef(tempFileName) subprocess.run(shlex.split("ufw deny out to any port 443"), env=os.environ, errors=True) cmdLine = f"sudo -E -H -u pluto jupyter nbconvert --to notebook --execute {tempFileName} --inplace --allow-errors" cpi = subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True) textLength = getOutputLength(tempFileName) print(f"total output length: {textLength}") if githubUserName != 'shyams80' and textLength > 10000: cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {tempFileName}" cpi = subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True) nbDoc = nbformat.read(tempFileName, as_version=4) for nbCell in nbDoc['cells']: if nbCell['cell_type'] != 'code' and nbCell['source'] != None: continue nbCell['execution_count'] = 1 outObj = nbformat.NotebookNode( output_type='stream', name='stderr', text=[ 'total output string length exceeded 10000 characters. please stay within the limit.' ]) nbCell['outputs'].append(outObj) break nbformat.write(nbDoc, tempFileName, version=4) with open(tempFileName, mode='rb') as file: outFileContent = file.read() tooBig = False subprocess.run(shlex.split("ufw allow out to any port 443"), env=os.environ, errors=True) try: fileContent = repo.get_contents(fullPath) repo.update_file(fullPath, "response", outFileContent, fileContent.sha) except Exception as exp: print(exp) if exp.data["errors"][0]['code'] == 'too_large': tooBig = True subprocess.run(shlex.split("ufw deny out to any port 443"), env=os.environ, errors=True) if tooBig: cmdLine = f"sudo -E -H -u pluto jupyter nbconvert --to markdown --execute {tempFileName} --allow-errors" cpi = subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True) filePattern = tempFileName.replace(".ipynb", "") subprocess.run(shlex.split("ufw allow out to any port 443"), env=os.environ, errors=True) upsertGithub(filePattern + ".md", repo) for fname in os.listdir(filePattern + "_files"): upsertGithub(filePattern + "_files/" + fname, repo) subprocess.run(shlex.split("ufw deny out to any port 443"), env=os.environ, errors=True) os.remove(tempFileName) subprocess.run(shlex.split("ufw allow out to any port 27017"), env=os.environ, errors=True) db.q.update_one({'_id': qId}, { '$set': { 'isProcessed': True, 'processedOn': datetime.now(), 'notebook': gzip.compress(outFileContent) } })
def preprocess(self, nb, resources, path): # Record initial loaded modules, including the proveit defaults and # proveit.magics. All other modules will be deleted when # we are done so we can "recycle" our Kernel to be used cleanly # for the next notebook. init_modules_source = """ import sys from proveit import * import proveit.magics __init_modules = list(sys.modules.keys()) __init_modules # avoid Prove-It magic assignment """ init_modules_cell = nbformat.NotebookNode(cell_type='code', source=init_modules_source, metadata=dict()) cell, _ = self.preprocess_cell(init_modules_cell, resources, 0) # change the working directory cd_source = 'import os\nos.chdir(r\"' + path + '")' cd_cell = nbformat.NotebookNode(cell_type='code', source=cd_source, metadata=dict()) self.preprocess_cell(cd_cell, resources, 0) # Execute each cell. We must correct the execution count so we treat this # like it was the only notebook executed in this session (even though we # are actually recycling the Kernel). exec_count = 0 for index, cell in enumerate(nb.cells): if hasattr(cell, 'source') and cell['source'].strip() != '': cell, resources = self.preprocess_cell(cell, resources, index) if 'execution_count' in cell: # make proper execution counts exec_count += 1 cell['execution_count'] = exec_count if 'outputs' in cell: for output in cell['outputs']: if 'execution_count' in output: output['execution_count'] = exec_count nb.cells[index] # "reset" the stored Prove-It data. Also, # Delete all modules except those that were initially loaded. # Also, %reset local variables and history. # We are preparing the Kernel to be recycled. reset_source = """ import sys import proveit proveit.reset() # delete all modules but initial modules and proveit._core_ modules for m in list(sys.modules.keys()): if m not in __init_modules: if '.' in m: parent, child = m.rsplit('.', 1) if parent in __init_modules: # remove the module being deleted from its parent package sys.modules[parent].__dict__.pop(child) del(sys.modules[m]) %reset %reset in %reset out """ reset_cell = nbformat.NotebookNode(cell_type='code', source=reset_source, metadata=dict()) cell, _ = self.preprocess_cell(reset_cell, resources, 0) # Garbage collect. garbage_collect_source = """import sys import gc gc.collect() len(gc.get_objects()) # used to check for memory leaks """ garbage_collect_cell = nbformat.NotebookNode(cell_type='code', source=garbage_collect_source, metadata=dict()) cell, _ = self.preprocess_cell(garbage_collect_cell, resources, 0) # Useful debugging to check for memory leaks: #print('num gc objects', cell['outputs'][0]['data']['text/plain']) return nb, resources
def run(self, cell, use_timeout=None): """ Run a notebook cell in the IPythonKernel Parameters ---------- cell : IPython.notebook.Cell the cell to be run use_timeout : int or None (default) the time in seconds after which a cell is stopped and assumed to have timed out. If set to None the value in `default_timeout` is used Returns ------- list of ex_cell_outputs a list of NotebookNodes of the returned types. This is similar to the list of outputs generated when a cell is run """ if timeout is not None: use_timeout = use_timeout else: use_timeout = self.default_timeout if hasattr(cell, 'source'): uid = self.execute(cell.source) else: raise AttributeError('No source/input key') outs = [] stdout_cells = {} while True: msg = self.listen(uid, use_timeout) msg_type = msg['msg_type'] if msg_type == 'execute_input': continue elif msg_type == 'clear_output': outs = [] continue elif msg_type == 'status': if msg['content']['execution_state'] == 'idle': # we are done with the cell, let's compare break continue out_cell = nbformat.NotebookNode(output_type=msg_type) content = msg['content'] if msg_type == 'stream': name = content['name'] if name not in stdout_cells: out_cell.name = name out_cell.text = content['text'] stdout_cells[name] = out_cell outs.append(out_cell) else: # we already have a stdout cell, so append to it stdout_cells[name].text += content['text'] elif msg_type in ('display_data', 'execute_result'): if hasattr(content, 'execution_count'): out_cell['execution_count'] = content['execution_count'] else: out_cell['execution_count'] = None out_cell['data'] = content['data'] out_cell['metadata'] = content['metadata'] outs.append(out_cell) elif msg_type == 'error': out_cell.ename = content['ename'] out_cell.evalue = content['evalue'] out_cell.traceback = content['traceback'] outs.append(out_cell) elif msg_type.startswith('comm_'): # messages used to initialize, close and unpdate widgets # we will ignore these and hope for the best pass else: tv.warning("Unhandled iopub msg of type `%s`" % msg_type) return outs
def Execute(self, meta): print(meta) qId = ObjectId(meta['id']) print('acquiring egg') self.status.Update(qId, 'acquiring egg') egg = self.cm.GetProcessor(qId, meta['githubUser']) request = self.db.q.find_one({'_id': qId}) githubUserName = request['githubUser'] print(f"processing for: {githubUserName}") self.status.Update(qId, 'processing') githubAcc = Github(request['githubTok']) user = githubAcc.get_user() self.repo = user.get_repo("plutons") self.plutoPath = "/home/pluto/notebook-temp/" + meta['id'] + "/" try: os.makedirs(self.plutoPath) except FileExistsError: pass fullPath = request['file'] notebook = gzip.decompress(request['notebook']) githubFileName = fullPath[fullPath.rfind('/')+1:] githubPath = fullPath[:fullPath.rfind('/')] self.nbFileName = self.plutoPath + githubFileName print(f"processing notebook: {self.nbFileName}") with open(self.nbFileName, mode='wb') as file: file.write(notebook) cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {self.nbFileName}" subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True) self.insertRef() egg.files.recursive_put(self.plutoPath, "/home/pluto/") print(f"executing in egg") self.status.Update(qId, 'executing in egg') egg.execute(shlex.split(f"jupyter nbconvert --to notebook --execute /home/pluto/{githubFileName} --inplace --allow-errors --ExecutePreprocessor.timeout=1200")) resp = egg.files.get(f"/home/pluto/{githubFileName}") with open(self.nbFileName, mode='wb') as file: file.write(resp) textLength = self.getOutputLength() print(f"total output length: {textLength}") if githubUserName != 'shyams80' and textLength > 10000: cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {self.nbFileName}" subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True) nbDoc = nbformat.read(self.nbFileName, as_version=4) for nbCell in nbDoc['cells']: if nbCell['cell_type'] != 'code' and nbCell['source'] != None: continue nbCell['execution_count'] = 1 outObj = nbformat.NotebookNode(output_type='stream', name='stderr', text=['total output string length exceeded 10000 characters. please stay within the limit.']) nbCell['outputs'].append(outObj) break nbformat.write(nbDoc, self.nbFileName, version=4) with open(self.nbFileName, mode='rb') as file: outFileContent = file.read() tooBig = False try: fileContent = self.repo.get_contents(fullPath) self.repo.update_file(fullPath, "response", outFileContent, fileContent.sha) except Exception as exp: print(exp) if exp.data["errors"][0]['code'] == 'too_large': tooBig = True if tooBig: print("file is too big!") self.status.Update(qId, 'file is too big!') egg.execute(shlex.split(f"jupyter nbconvert --to markdown --execute /home/pluto/{githubFileName} --allow-errors --ExecutePreprocessor.timeout=1200")) filePattern = githubFileName.replace(".ipynb", "") self.status.Update(qId, 'creating markdown...') egg.execute(shlex.split(f"./tard.sh {filePattern}")) resp = egg.files.get(f"/home/pluto/{filePattern}.tar.gz") with open(f"{self.plutoPath}{filePattern}.tar.gz", mode='wb') as file: file.write(resp) subprocess.run(shlex.split(f"tar xvf {self.plutoPath}{filePattern}.tar.gz -C {self.plutoPath}"), env=os.environ, errors=True) self.status.Update(qId, 'uploading markdown...') self.upsertGithub(f"{self.plutoPath}{filePattern}.md", f"{githubPath}/{filePattern}.md") if os.path.isdir(f"{self.plutoPath}{filePattern}_files"): self.status.Update(qId, 'uploading images...') for fname in os.listdir(f"{self.plutoPath}{filePattern}_files"): self.upsertGithub(f"{self.plutoPath}{filePattern}_files/{fname}", f"{githubPath}/{filePattern}_files/" + fname) egg.files.delete(f"/home/pluto/{githubFileName}") shutil.rmtree(self.plutoPath) self.db.q.update_one({'_id': qId}, {'$set': {'isProcessed': True, 'processedOn': datetime.now(), 'notebook': gzip.compress(outFileContent)}}) self.status.Update(qId, 'finished')