def test_edit_distance_text_empty2(self): s1 = "" s2 = "AA\nCC\nDD" d, aligned, final = edit_distance_text(s1, s2) # pylint: disable=W0632 self.assertEqual(len(aligned), 0) self.assertEqual(aligned, []) self.assertEqual(final, [(0, None), (None, 0), (None, 1), (None, 2)]) self.assertEqual(d, 10.)
def test_edit_distance_text(self): s1 = "AA\nBB\nCC\nDD" s2 = "AA\nCC\nDD" d, aligned, final = edit_distance_text(s1, s2) # pylint: disable=W0632 self.assertEqual(len(aligned), 3) self.assertEqual(aligned, [(0, 0, 0.0, [(0, 0), (1, 1)]), (2, 1, 0.0, [(0, 0), (1, 1)]), (3, 2, 0.0, [(0, 0), (1, 1)])]) self.assertEqual(final, [(0, 0), (1, None), (2, 1), (3, 2)]) self.assertEqual(d, 3.) d, aligned, final = edit_distance_text(s2, s1) # pylint: disable=W0632 self.assertEqual(len(aligned), 3) self.assertEqual(aligned, [(0, 0, 0.0, [(0, 0), (1, 1)]), (1, 2, 0.0, [(0, 0), (1, 1)]), (2, 3, 0.0, [(0, 0), (1, 1)])]) self.assertEqual(final, [(0, 0), (None, 1), (1, 2), (2, 3)]) self.assertEqual(d, 3.)
def test_edit_distance_text_space(self): s1 = "AA\n\nCC" s2 = "AA\n\nCC" d, aligned, final = edit_distance_text(s1, s2) # pylint: disable=W0632 self.assertEqual(d, 0) self.assertEqual(len(aligned), 3) self.assertEqual(aligned, [(0, 0, 0.0, [(0, 0), (1, 1)]), (1, 1, 0.0, []), (2, 2, 0.0, [(0, 0), (1, 1)])]) self.assertEqual(final, [(0, 0), (1, 1), (2, 2)])
def test_edit_distance_html2(self): s1 = "AA\nBB\nCC\nZZZZZA\nDD" s2 = "AA\nCC\nDD\nZZZZZB\nEE" _, aligned, final = edit_distance_text(s1, s2) # pylint: disable=W0632 ht = diff2html(s1, s2, aligned, final, two_columns=True) self.assertIn('<tr style="1px solid black;">', ht) self.assertIn('<td style="background-color:#ABEBC6;">', ht) self.assertIn('<td style="background-color:#E5E7E9;">', ht) self.assertIn( 'AA</code></td><td style="background-color:#FFFFFF;">' '<code style="background-color:#FFFFFF;">AA', ht)
def test_edit_distance_no_align2(self): s1 = "AA\nBB\nCC\nreturn d\nDD" s2 = "AA\nBB\nCC\nfor i in jjjjjjjjj\nDD" d, aligned, final, mats = edit_distance_text(s1, s2, return_matrices=True) dd = edit_distance_string("return d", "for i in jjjjjjjjj") self.assertEqual(len(final), 6) self.assertEqual(len(mats), 2) self.assertEqual(len(aligned), 4) self.assertGreater(d, 0) self.assertGreater(dd[0], 0)
def test_edit_distance_text_big(self): f1 = dedent(''' def edit_distance_string(s1, s2): """ Computes the edit distance between strings *s1* and *s2*. :param s1: first string :param s2: second string :return: dist, list of tuples of aligned characters """ n1 = len(s1) + 1 n2 = len(s2) + 1 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) pred = numpy.full(dist.shape, 0, dtype=numpy.int32) for j in range(1, n2): dist[0, j] = j pred[0, j] = 2 for i in range(0, n1): dist[i, 0] = i pred[i, 0] = 1 pred[0, 0] = -1 for j in range(1, n2): for i in range(1, n1): c = dist[i, j] p = 0 if dist[i - 1, j] + 1 < c: c = dist[i - 1, j] + 1 p = 1 if dist[i, j - 1] + 1 < c: c = dist[i, j - 1] + 1 p = 2 d = 0 if s1[i - 1] == s2[j - 1] else 1 if dist[i - 1, j - 1] + d < c: c = dist[i - 1, j - 1] + d p = 3 if p == 0: raise RuntimeError( "Unexpected value for p=%d at position=%r." % (p, (i, j))) dist[i, j] = c pred[i, j] = p d = dist[len(s1), len(s2)] return d ''') f2 = dedent(''' def edit_distance_string(s1, s2): """ Computes the edit distance between strings *s1* and *s2*. :param s1: first string :param s2: second string :return: dist, list of tuples of aligned characters """ n1 = len(s1) + 1 n2 = len(s2) + 1 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) pred = numpy.full(dist.shape, 0, dtype=numpy.int32) for i in range(0, n1): dist[i, 0] = i pred[i, 0] = 1 for j in range(1, n2): dist[0, j] = j pred[0, j] = 2 pred[0, 0] = -1 for i in range(1, n1): for j in range(1, n2): c = dist[i, j] p = 0 if dist[i - 1, j] + 1 < c: c = dist[i - 1, j] + 1 p = 1 if dist[i, j - 1] + 1 < c: c = dist[i, j - 1] + 1 p = 2 d = 0 if s1[i - 1] == s2[j - 1] else 1 if dist[i - 1, j - 1] + d < c: c = dist[i - 1, j - 1] + d p = 3 if p == 0: raise RuntimeError( "Unexpected value for p=%d at position=%r." % (p, (i, j))) dist[i, j] = c pred[i, j] = p d = dist[len(s1), len(s2)] equals = [] i, j = len(s1), len(s2) p = pred[i, j] while p != -1: if p == 3: equals.append((i - 1, j - 1)) i -= 1 j -= 1 elif p == 2: j -= 1 elif p == 1: i -= 1 else: raise RuntimeError( "Unexpected value for p=%d at position=%r." % (p, (i, j))) p = pred[i, j] return d, list(reversed(equals)) ''') res = self.profile(lambda: edit_distance_text(f1, f2, verbose=False), return_results=True, rootrem=self.abs_path_join(__file__, '..', '..', '..')) d, aligned, final = res[0] if __name__ == '__main__': print(res[-1]) self.assertGreater(d, 0) self.assertGreater(len(aligned), 10) self.assertIn((1, 1), final)