示例#1
0
 def test_edit_distance_text_empty2(self):
     s1 = ""
     s2 = "AA\nCC\nDD"
     d, aligned, final = edit_distance_text(s1, s2)  # pylint: disable=W0632
     self.assertEqual(len(aligned), 0)
     self.assertEqual(aligned, [])
     self.assertEqual(final, [(0, None), (None, 0), (None, 1), (None, 2)])
     self.assertEqual(d, 10.)
示例#2
0
 def test_edit_distance_text(self):
     s1 = "AA\nBB\nCC\nDD"
     s2 = "AA\nCC\nDD"
     d, aligned, final = edit_distance_text(s1, s2)  # pylint: disable=W0632
     self.assertEqual(len(aligned), 3)
     self.assertEqual(aligned, [(0, 0, 0.0, [(0, 0), (1, 1)]),
                                (2, 1, 0.0, [(0, 0), (1, 1)]),
                                (3, 2, 0.0, [(0, 0), (1, 1)])])
     self.assertEqual(final, [(0, 0), (1, None), (2, 1), (3, 2)])
     self.assertEqual(d, 3.)
     d, aligned, final = edit_distance_text(s2, s1)  # pylint: disable=W0632
     self.assertEqual(len(aligned), 3)
     self.assertEqual(aligned, [(0, 0, 0.0, [(0, 0), (1, 1)]),
                                (1, 2, 0.0, [(0, 0), (1, 1)]),
                                (2, 3, 0.0, [(0, 0), (1, 1)])])
     self.assertEqual(final, [(0, 0), (None, 1), (1, 2), (2, 3)])
     self.assertEqual(d, 3.)
示例#3
0
 def test_edit_distance_text_space(self):
     s1 = "AA\n\nCC"
     s2 = "AA\n\nCC"
     d, aligned, final = edit_distance_text(s1, s2)  # pylint: disable=W0632
     self.assertEqual(d, 0)
     self.assertEqual(len(aligned), 3)
     self.assertEqual(aligned, [(0, 0, 0.0, [(0, 0), (1, 1)]),
                                (1, 1, 0.0, []),
                                (2, 2, 0.0, [(0, 0), (1, 1)])])
     self.assertEqual(final, [(0, 0), (1, 1), (2, 2)])
示例#4
0
 def test_edit_distance_html2(self):
     s1 = "AA\nBB\nCC\nZZZZZA\nDD"
     s2 = "AA\nCC\nDD\nZZZZZB\nEE"
     _, aligned, final = edit_distance_text(s1, s2)  # pylint: disable=W0632
     ht = diff2html(s1, s2, aligned, final, two_columns=True)
     self.assertIn('<tr style="1px solid black;">', ht)
     self.assertIn('<td style="background-color:#ABEBC6;">', ht)
     self.assertIn('<td style="background-color:#E5E7E9;">', ht)
     self.assertIn(
         'AA</code></td><td style="background-color:#FFFFFF;">'
         '<code style="background-color:#FFFFFF;">AA', ht)
示例#5
0
 def test_edit_distance_no_align2(self):
     s1 = "AA\nBB\nCC\nreturn d\nDD"
     s2 = "AA\nBB\nCC\nfor i in jjjjjjjjj\nDD"
     d, aligned, final, mats = edit_distance_text(s1,
                                                  s2,
                                                  return_matrices=True)
     dd = edit_distance_string("return d", "for i in jjjjjjjjj")
     self.assertEqual(len(final), 6)
     self.assertEqual(len(mats), 2)
     self.assertEqual(len(aligned), 4)
     self.assertGreater(d, 0)
     self.assertGreater(dd[0], 0)
示例#6
0
    def test_edit_distance_text_big(self):
        f1 = dedent('''
            def edit_distance_string(s1, s2):
                """
                Computes the edit distance between strings *s1* and *s2*.

                :param s1: first string
                :param s2: second string
                :return: dist, list of tuples of aligned characters
                """
                n1 = len(s1) + 1
                n2 = len(s2) + 1
                dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
                pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

                for j in range(1, n2):
                    dist[0, j] = j
                    pred[0, j] = 2
                for i in range(0, n1):
                    dist[i, 0] = i
                    pred[i, 0] = 1
                pred[0, 0] = -1

                for j in range(1, n2):
                    for i in range(1, n1):
                        c = dist[i, j]

                        p = 0
                        if dist[i - 1, j] + 1 < c:
                            c = dist[i - 1, j] + 1
                            p = 1
                        if dist[i, j - 1] + 1 < c:
                            c = dist[i, j - 1] + 1
                            p = 2
                        d = 0 if s1[i - 1] == s2[j - 1] else 1
                        if dist[i - 1, j - 1] + d < c:
                            c = dist[i - 1, j - 1] + d
                            p = 3
                        if p == 0:
                            raise RuntimeError(
                                "Unexpected value for p=%d at position=%r." % (p, (i, j)))

                        dist[i, j] = c
                        pred[i, j] = p

                d = dist[len(s1), len(s2)]
                return d
            ''')

        f2 = dedent('''
            def edit_distance_string(s1, s2):
                """
                Computes the edit distance between strings *s1* and *s2*.

                :param s1: first string
                :param s2: second string
                :return: dist, list of tuples of aligned characters
                """
                n1 = len(s1) + 1
                n2 = len(s2) + 1
                dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
                pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

                for i in range(0, n1):
                    dist[i, 0] = i
                    pred[i, 0] = 1
                for j in range(1, n2):
                    dist[0, j] = j
                    pred[0, j] = 2
                pred[0, 0] = -1

                for i in range(1, n1):
                    for j in range(1, n2):
                        c = dist[i, j]

                        p = 0
                        if dist[i - 1, j] + 1 < c:
                            c = dist[i - 1, j] + 1
                            p = 1
                        if dist[i, j - 1] + 1 < c:
                            c = dist[i, j - 1] + 1
                            p = 2
                        d = 0 if s1[i - 1] == s2[j - 1] else 1
                        if dist[i - 1, j - 1] + d < c:
                            c = dist[i - 1, j - 1] + d
                            p = 3
                        if p == 0:
                            raise RuntimeError(
                                "Unexpected value for p=%d at position=%r." % (p, (i, j)))

                        dist[i, j] = c
                        pred[i, j] = p

                d = dist[len(s1), len(s2)]
                equals = []
                i, j = len(s1), len(s2)
                p = pred[i, j]
                while p != -1:
                    if p == 3:
                        equals.append((i - 1, j - 1))
                        i -= 1
                        j -= 1
                    elif p == 2:
                        j -= 1
                    elif p == 1:
                        i -= 1
                    else:
                        raise RuntimeError(
                            "Unexpected value for p=%d at position=%r." % (p, (i, j)))
                    p = pred[i, j]
                return d, list(reversed(equals))
            ''')
        res = self.profile(lambda: edit_distance_text(f1, f2, verbose=False),
                           return_results=True,
                           rootrem=self.abs_path_join(__file__, '..', '..',
                                                      '..'))
        d, aligned, final = res[0]
        if __name__ == '__main__':
            print(res[-1])
        self.assertGreater(d, 0)
        self.assertGreater(len(aligned), 10)
        self.assertIn((1, 1), final)