def test_remove_one_cell_beginning(): # Notice that there isn't [0], and the output is removed cells_with_one_removed = [ attr.evolve(THREE_CELL_CONTENTS.cells[1], output=None, index=0), attr.evolve(THREE_CELL_CONTENTS.cells[2], output=None, index=1), ] new_contents = NotebookContents(cells=cells_with_one_removed) opcodes = opcode_merge_cell_contents(THREE_CELL_CONTENTS, new_contents) assert len(opcodes) == 2 assert [x.op_code for x in opcodes] == [OpCodes.DELETE, OpCodes.EQUAL] assert opcodes[0].current == (0, 1) assert opcodes[1].current == (1, 3)
def merge_notebooks(comm: Comm, result: Dict[str, Any]) -> None: javascript_cells = result["javascript_cells"] current_notebook = NotebookContents(cells=[ JupyterCell( index=i, cell_type=x["cell_type"], source=x["source"], output=get_output_text(x), # metadata=x["metadata"], ) for i, x in enumerate(javascript_cells) ]) new_notebook = NotebookContents( cells=[JupyterCell(**x) for x in result["new_notebook"]]) opcodes = opcode_merge_cell_contents(current_notebook, new_notebook) J_LOGGER.info("Performing Opcodes...") J_LOGGER.info(opcodes) net_shift = 0 for op_action in opcodes: net_shift = perform_op_code(comm, op_action, current_notebook, new_notebook, net_shift)
def test_remove_one_cell(): # Notice that there isn't [2], and the output is removed cells_with_one_removed = [ attr.evolve(THREE_CELL_CONTENTS.cells[0], output=None), attr.evolve(THREE_CELL_CONTENTS.cells[1], output=None), ] new_contents = NotebookContents(cells=cells_with_one_removed) opcodes = opcode_merge_cell_contents(THREE_CELL_CONTENTS, new_contents) assert len(opcodes) == 2 assert [x.op_code for x in opcodes] == [OpCodes.EQUAL, OpCodes.DELETE] assert opcodes[0].current_start_idx == 0 assert opcodes[0].current_final_idx == 2 assert opcodes[0].updated_start_idx == 0 assert opcodes[0].updated_final_idx == 2
def test_reorder_multiple_values(): first_cell = attr.evolve(THREE_CELL_CONTENTS.cells[1], index=0, output=None) second_cell = attr.evolve(THREE_CELL_CONTENTS.cells[0], index=1, output=None) new_contents = NotebookContents(cells=[first_cell, second_cell, THREE_CELL_CONTENTS.cells[2]]) opcodes = opcode_merge_cell_contents(THREE_CELL_CONTENTS, new_contents) assert len(opcodes) == 5 assert [x.op_code for x in opcodes] == [ OpCodes.INSERT, OpCodes.EQUAL, OpCodes.DELETE, OpCodes.EQUAL, OpCodes.COPY_OUTPUT, ] assert opcodes[-1].current_final_idx == 1 assert opcodes[-1].updated_final_idx == 0
def test_remove_one_cell_and_update_another(): # Updated source for index=1 modified_source = ["y = 3; y"] cells_with_one_removed = [ attr.evolve(THREE_CELL_CONTENTS.cells[0], output=None, index=0), attr.evolve(THREE_CELL_CONTENTS.cells[1], source=modified_source, output=None, index=1), ] new_contents = NotebookContents(cells=cells_with_one_removed) opcodes = opcode_merge_cell_contents(THREE_CELL_CONTENTS, new_contents) assert [x.op_code for x in opcodes] == [OpCodes.EQUAL, OpCodes.REPLACE] assert opcodes[0].current == (0, 1) assert opcodes[0].updated == (0, 1) # Note: The replace takes the two cells and then replaces down to only one assert opcodes[1].current == (1, 3) assert opcodes[1].updated == (1, 2)
import attr from jupyter_ascending.notebook.data_types import JupyterCell from jupyter_ascending.notebook.data_types import NotebookContents from jupyter_ascending.notebook.evolve import evolve_cell_source from jupyter_ascending.notebook.evolve import evolve_notebook_cells from jupyter_ascending.notebook.merge import OpCodes from jupyter_ascending.notebook.merge import opcode_merge_cell_contents SIMPLE_CONTENTS = NotebookContents(cells=[JupyterCell(cell_type="code", index=0, source=["x = 1; x"], output=["1"])]) THREE_CELL_CONTENTS = NotebookContents( cells=[ JupyterCell(cell_type="code", index=0, source=["x = 1; x"], output=["1"]), JupyterCell(cell_type="code", index=1, source=["y = 2; y"], output=["2"]), JupyterCell(cell_type="code", index=2, source=["z = 3; z"], output=["3"]), ] ) def _insert_notebook_cell(notebook: NotebookContents, new_cell: JupyterCell) -> NotebookContents: # TODO: Should this be moved somewhere? original_cells = notebook.cells new_cells = [] for cell in original_cells: if cell.index < new_cell.index: new_cells.append(cell) else: if new_cell not in new_cells: new_cells.append(new_cell)
def merge_cell_contents( current_notebook: NotebookContents, updated_notebook: NotebookContents ) -> Tuple[NotebookContents, CellMovements]: # TODO: change name of current_notebook to be remote notebook? some other name that makes it obvious if current_notebook.content_equals(updated_notebook): return current_notebook, CellMovements(movements=[]) current_cell_stack = [x for x in current_notebook.cells] updated_cell_stack = [x for x in updated_notebook.cells] movements = [] final_cells = [] # 1. Check if we have cells that are exactly the same source # We just update to the new index current_cells_to_remove = [] for current_cell in current_cell_stack: for updated_cell in updated_cell_stack: if current_cell.source == updated_cell.source: # Create movements if current_cell.index != updated_cell.index: movements.append(Movement(previous=current_cell.index, current=updated_cell.index)) final_cells.append(attr.evolve(current_cell, index=updated_cell.index)) # Remove at the end, since it's not good to modify iterators while iterating current_cells_to_remove.append(current_cell) updated_cell_stack.remove(updated_cell) break for cell in current_cells_to_remove: current_cell_stack.remove(cell) # 2. gather up all the differences between the remaining cells distancer = LevenshteinDistance distance_between_cells: Dict[JupyterCell, List[CellDistance]] = defaultdict(list) for current_cell in current_cell_stack: # TODO: What if all the distances are bad? # TODO: maybe use a different measure? like some confidence that they're the same for updated_cell in updated_cell_stack: if current_cell.source == updated_cell.source: assert False, f"{current_cell} / {updated_cell}::\n{final_cells}" distance = distancer.find_distance(current_cell.joined_source, updated_cell.joined_source) distance_between_cells[current_cell].append(CellDistance(distance, updated_cell)) distance_between_cells[current_cell] = list( sorted(distance_between_cells[current_cell], key=distancer.sort_function) ) # 3. Find most closely related cells (so, small distance) # If I run out of updated_cell_stack, then we're done. No more cells to give # If I run out of current_cell_stack, then we need to delete those (however that looks). def find_most_likely_cell(x): distance = distance_between_cells[x] return distancer.sort_function(distance[0]) # TODO: This might be way too many loops? while current_cell_stack and distance_between_cells and updated_cell_stack: sorted_cell_list = sorted(current_cell_stack, key=find_most_likely_cell) current_cell = sorted_cell_list[0] cell_distances = distance_between_cells.pop(current_cell) best_updated_cell = next(x.cell for x in cell_distances if x.cell in updated_cell_stack) assert current_cell in current_cell_stack current_cell_stack.remove(current_cell) assert best_updated_cell in updated_cell_stack updated_cell_stack.remove(best_updated_cell) evolved_cell = attr.evolve(current_cell, index=best_updated_cell.index, source=best_updated_cell.source) final_cells.append(evolved_cell) # Any other updated cells we have must have been inserted. # We can simply insert them into the final cells # (Unless there is something I missed here...) while updated_cell_stack: final_cells.append(updated_cell_stack.pop()) # Should have no more updated cells to input assert not updated_cell_stack return evolve_notebook_cells(current_notebook, final_cells), CellMovements(movements=movements)