def _process_replace_chunk(self, chunk1, chunk2): """Process a replace chunk type and return the sub-chunks. The text is encoded in UTF-32 before to be passed to the Patience algorithm in order to have fixed character boundaries. """ sub_chunks = [] text1, text2 = [ str(chunk).encode('utf_32-BE') for chunk in (chunk1, chunk2) ] line_sequence_matcher = PatienceSequenceMatcher(None, text1, text2) opcodes = line_sequence_matcher.get_opcodes() for tag, start_1, stop_1, start_2, stop_2 in opcodes: slice1 = FlatSlice(start_1, stop_1) // 4 # 4-byte encoding slice2 = FlatSlice(start_2, stop_2) // 4 sub_chunk1 = chunk1[slice1] sub_chunk2 = chunk2[slice2] if tag == 'equal': sub_chunk_diff = TwoWayChunkEqual(sub_chunk1, sub_chunk2) elif tag == 'delete': sub_chunk_diff = TwoWayChunkDelete(sub_chunk1, sub_chunk2) elif tag == 'insert': sub_chunk_diff = TwoWayChunkInsert(sub_chunk1, sub_chunk2) elif tag == 'replace': sub_chunk_diff = TwoWayChunkReplace(sub_chunk1, sub_chunk2) sub_chunks.append(sub_chunk_diff) return sub_chunks
def line_slice_iterator(self, new_line_separator=True): """Return an iterator on the line's flat slices. If *new_line_separator* is set then the line separator is included. """ if new_line_separator: for start, end in pairwise(self._line_start_locations): yield FlatSlice(start, end) else: for i, start_end in enumerate(pairwise( self._line_start_locations)): start, end = start_end end -= len(self._line_separators[i]) yield FlatSlice(start, end)
def test_view(self): text_buffer = '' for i in range(10): text_buffer += 'azerty' + str(i) + '\n' raw_text_document = RawTextDocument(text_buffer) view = raw_text_document[LineSlice(1, 3)] self.assertEqual(str(view).splitlines(), text_buffer.splitlines()[1:3]) self.assertEqual(view.substring(FlatSlice(0, 6)), 'azerty') text_buffer = "012\n45\n78\n01" raw_text_document = RawTextDocument(text_buffer) view = raw_text_document[FlatSlice(5, 11)] self.assertEqual( str(view).splitlines(), [x.strip() for x in view.lines()])
def flat_slice(self): """Return a copy of the flat slice corresponding to the text chunk.""" # Fixme: copy? check consistency elsewhere. return FlatSlice(self._flat_slice)
def _lex(self, lexer): """ Lex the document. """ current_location = 0 for token, text in pygments.lex(str(self.raw_text_document), lexer): stop_position = current_location + len(text) flat_slice = FlatSlice(current_location, stop_position) self.append(HighlightedTextFragment(flat_slice, token)) current_location = stop_position
def test_light_view(self): text_buffer = 'azertyuiopqsdfghjklm' raw_text_document = RawTextDocument(text_buffer) raw_text_document.light_view_mode = True flat_slice = FlatSlice(5, 10) view = raw_text_document[flat_slice] self.assertEqual(str(view), text_buffer[flat_slice()])
def _test_text(self, text_buffer, line_slice, sub_string): raw_text_document = RawTextDocument(text_buffer) lines = raw_text_document.lines(new_line_separator=False) self.assertEqual(len(lines), len(text_buffer.splitlines())) lines = raw_text_document.lines() self.assertEqual(len(text_buffer), sum([len(x) for x in lines])) self.assertEqual(raw_text_document.substring(FlatSlice(5, 10)), text_buffer[5:10]) self.assertEqual(raw_text_document.substring(line_slice), sub_string)
def __init__(self, text_buffer): """The parameter *text_buffer* specifies the text buffer, cf. :class:`RawTextDocumentAbc` for explanations. """ super(RawTextDocument, self).__init__(text_buffer, FlatSlice(0, len(text_buffer)), *self._split_lines(text_buffer)) self.line_slice = self.flat_to_line_slice(self._flat_slice)
def line_to_flat_slice(self, line_slice): """Convert a line slice to a flat slice and return it.""" return FlatSlice(self._line_start_locations[line_slice.start], self._line_start_locations[line_slice.stop])