def diff(self, request, *args, **kwargs): # this can be cached because the underlying data won't change (although # the formatting might) version = self.get_object() # most recent version just before this one old_version = self.get_queryset().filter(id__lt=version.id).first() differ = AttributeDiffer() if old_version: old_document = old_version._object_version.object old_document.document_xml = differ.preprocess_document_diff( old_document.document_xml) old_html = old_document.to_html() else: old_html = "" new_document = version._object_version.object new_document.document_xml = differ.preprocess_document_diff( new_document.document_xml) new_html = new_document.to_html() old_tree = lxml.html.fromstring(old_html) if old_html else None new_tree = lxml.html.fromstring(new_html) n_changes = differ.diff_document_html(old_tree, new_tree) diff = lxml.html.tostring(new_tree, encoding='unicode') # TODO: include other diff'd attributes return Response({ 'content': diff, 'n_changes': n_changes, })
def post(self, request): serializer = DocumentAPISerializer(data=self.request.data) serializer.fields['document'].fields['content'].required = True serializer.is_valid(raise_exception=True) differ = AttributeDiffer() current_document = serializer.fields['document'].update_document( Document(), serializer.validated_data['document']) current_document.document_xml = differ.preprocess_document_diff( current_document.document_xml) current_html = current_document.to_html() comparison_doc_id = request.data['comparison_doc_id'] try: comparison_document = Document.objects.get(id=comparison_doc_id) except Document.DoesNotExist: raise Http404() comparison_document.document_xml = differ.preprocess_document_diff( comparison_document.document_xml) comparison_document_html = comparison_document.to_html() current_tree = lxml.html.fromstring(current_html) comparison_tree = lxml.html.fromstring(comparison_document_html) n_changes = differ.diff_document_html(comparison_tree, current_tree) diff = lxml.html.tostring(current_tree, encoding='utf-8') # TODO: include other diff'd attributes return Response({ 'content': diff, 'n_changes': n_changes, })
def post(self, request, document_id): serializer = DocumentDiffSerializer(instance=self.document, data=self.request.data) serializer.is_valid(raise_exception=True) differ = AttributeDiffer() local_doc = self.document # set this up to be the modified document remote_doc = Document.objects.get(pk=local_doc.pk) serializer.fields['document'].update_document( local_doc, serializer.validated_data['document']) local_doc.content = differ.preprocess_document_diff( local_doc.document_xml).decode('utf-8') remote_doc.content = differ.preprocess_document_diff( remote_doc.document_xml).decode('utf-8') element_id = serializer.validated_data.get('element_id') if element_id: # handle certain elements that don't have ids if element_id in ['preface', 'preamble', 'components']: xpath = f'//a:{element_id}' else: xpath = f'//a:*[@eId="{element_id}"]' # diff just this element local_element = local_doc.doc.root.xpath( xpath, namespaces={'a': local_doc.doc.namespace}) remote_element = remote_doc.doc.root.xpath( xpath, namespaces={'a': local_doc.doc.namespace}) local_html = local_doc.to_html( element=local_element[0]) if len(local_element) else None remote_html = remote_doc.to_html( element=remote_element[0]) if len(remote_element) else None else: # diff the whole document local_html = local_doc.to_html() remote_html = remote_doc.to_html() local_tree = lxml.html.fromstring(local_html or "<div></div>") remote_tree = lxml.html.fromstring( remote_html) if remote_html else None n_changes, diff = differ.diff_document_html(remote_tree, local_tree) if not isinstance(diff, str): diff = lxml.html.tostring(diff, encoding='utf-8') # TODO: include other diff'd attributes return Response({ 'html_diff': diff, 'n_changes': n_changes, })
class AttributeDifferTestCase(TestCase): def setUp(self): self.differ = AttributeDiffer() def test_text_changed(self): old = as_tree('<p>abc 123</p>') new = as_tree('<p>def 456</p>') self.differ.diff_document_html(old, new) self.assertEqual( as_html(new), '<p><del>abc</del><ins>def</ins> <del>123</del><ins>456</ins></p>', ) def test_text_partially_changed(self): old = as_tree('<p>some old text</p>') new = as_tree('<p>some new text</p>') self.differ.diff_document_html(old, new) self.assertEqual( as_html(new), '<p>some <del>old</del><ins>new</ins> text</p>', ) def test_text_partially_changed_with_elements(self): old = as_tree( '<p>some old text <b>no change</b> text <i>no change</i></p>') new = as_tree( '<p>some new text <b>no change</b> text <i>no change</i></p>') self.differ.diff_document_html(old, new) self.assertEqual( as_html(new), '<p>some <del>old</del><ins>new</ins> text <b>no change</b> text <i>no change</i></p>', ) def test_tail_changed(self): old = as_tree('<p>something <b>bold</b> 123 xx <i>and</i> same </p>') new = as_tree('<p>something <b>bold</b> 456 xx <i>and</i> same </p>') self.differ.diff_document_html(old, new) self.assertEqual( as_html(new), '<p>something <b>bold</b> <del>123</del><ins>456</ins> xx <i>and</i> same </p>', ) def test_inline_tag_removed(self): old = as_tree('<p>Some text <b>bold text</b> and a tail.</p>') new = as_tree('<p>Some text bold text and a tail.</p>') self.differ.diff_document_html(old, new) self.assertEqual( as_html(new), '<p>Some text <ins>bold text and a tail.</ins><b class="del">bold text</b><del> and a tail.</del></p>', ) def test_inline_tag_added(self): old = as_tree('<p>Some text bold text and a tail.</p>') new = as_tree('<p>Some text <b>bold text</b> and a tail.</p>') self.differ.diff_document_html(old, new) self.assertEqual( as_html(new), '<p>Some text <del>bold text and a tail.</del><b class="ins">bold text</b><ins> and a tail.</ins></p>', ) def test_diff_lists_deleted(self): diffs = self.differ.diff_lists('test', 'Test', ['1', '2', '3'], ['1', '3']) self.assertEqual( { 'attr': 'test', 'title': 'Test', 'type': 'list', 'changes': [{ 'html_new': '1', 'html_old': '1' }, { 'html_new': '', 'html_old': '<del>2</del>', 'new': None, 'old': '2' }, { 'html_new': '3', 'html_old': '3' }] }, diffs) def test_diff_lists_empty(self): diffs = self.differ.diff_lists('test', 'Test', ['1', '2', '3'], []) self.assertEqual( { 'attr': 'test', 'title': 'Test', 'type': 'list', 'changes': [{ 'html_new': '', 'html_old': '<del>1</del>', 'new': None, 'old': '1' }, { 'html_new': '', 'html_old': '<del>2</del>', 'new': None, 'old': '2' }, { 'html_new': '', 'html_old': '<del>3</del>', 'new': None, 'old': '3' }] }, diffs) def test_diff_lists_added(self): diffs = self.differ.diff_lists('test', 'Test', ['1', '3'], ['1', '2', '3']) self.assertEqual( { 'attr': 'test', 'title': 'Test', 'type': 'list', 'changes': [{ 'html_new': '1', 'html_old': '1' }, { 'html_new': '<ins>2</ins>', 'html_old': '', 'new': '2', 'old': None }, { 'html_new': '3', 'html_old': '3' }] }, diffs)