def _process_replace_chunk(self, chunk1, chunk2): """Process a replace chunk type and return the sub-chunks. The text is encoded in UTF-32 before to be passed to the Patience algorithm in order to have fixed character boundaries. """ sub_chunks = [] text1, text2 = [ str(chunk).encode('utf_32-BE') for chunk in (chunk1, chunk2) ] line_sequence_matcher = PatienceSequenceMatcher(None, text1, text2) opcodes = line_sequence_matcher.get_opcodes() for tag, start_1, stop_1, start_2, stop_2 in opcodes: slice1 = FlatSlice(start_1, stop_1) // 4 # 4-byte encoding slice2 = FlatSlice(start_2, stop_2) // 4 sub_chunk1 = chunk1[slice1] sub_chunk2 = chunk2[slice2] if tag == 'equal': sub_chunk_diff = TwoWayChunkEqual(sub_chunk1, sub_chunk2) elif tag == 'delete': sub_chunk_diff = TwoWayChunkDelete(sub_chunk1, sub_chunk2) elif tag == 'insert': sub_chunk_diff = TwoWayChunkInsert(sub_chunk1, sub_chunk2) elif tag == 'replace': sub_chunk_diff = TwoWayChunkReplace(sub_chunk1, sub_chunk2) sub_chunks.append(sub_chunk_diff) return sub_chunks
def _process_replace_chunk(self, chunk1, chunk2): """Process a replace chunk type and return the sub-chunks. The text is encoded in UTF-32 before to be passed to the Patience algorithm in order to have fixed character boundaries. """ sub_chunks = [] text1, text2 = [str(chunk).encode('utf_32-BE') for chunk in (chunk1, chunk2)] line_sequence_matcher = PatienceSequenceMatcher(None, text1, text2) opcodes = line_sequence_matcher.get_opcodes() for tag, start_1, stop_1, start_2, stop_2 in opcodes: slice1 = FlatSlice(start_1, stop_1) //4 # 4-byte encoding slice2 = FlatSlice(start_2, stop_2) //4 sub_chunk1 = chunk1[slice1] sub_chunk2 = chunk2[slice2] if tag == 'equal': sub_chunk_diff = TwoWayChunkEqual(sub_chunk1, sub_chunk2) elif tag == 'delete': sub_chunk_diff = TwoWayChunkDelete(sub_chunk1, sub_chunk2) elif tag == 'insert': sub_chunk_diff = TwoWayChunkInsert(sub_chunk1, sub_chunk2) elif tag == 'replace': sub_chunk_diff = TwoWayChunkReplace(sub_chunk1, sub_chunk2) sub_chunks.append(sub_chunk_diff) return sub_chunks
def process(self, document1, document2, number_of_lines_of_context=3): """Compute the difference between two :class:`RawTextDocument` documents using the Patience algorithm and return a :class:`TwoWayFileDiff` instance. The parameter *number_of_lines_of_context* provides the number of lines of context for the diff algorithm. """ file_diff = TwoWayFileDiff(document1, document2) sequence_matcher = PatienceSequenceMatcher(None, document1.lines(), document2.lines()) sequence_matcher_groups = sequence_matcher.get_grouped_opcodes(number_of_lines_of_context) for opcodes in sequence_matcher_groups: self._process_group(file_diff, opcodes) return file_diff