def test_set_operations(self): """Test advanced set operations""" items1 = set(["abcde", "cdefg", "fghijk", "ijklm"]) items2 = set(["cdefg", "lmnop"]) idx1 = NGram(items1) idx2 = NGram(items2) results = lambda L: sorted(x[0] for x in L) # Item removal self.assertEqual(results(idx1.search('cde')), ["abcde","cdefg"]) idx1.remove('abcde') self.assertEqual(results(idx1.search('cde')), ["cdefg"]) # Set intersection operation items1.remove('abcde') idx1.intersection_update(idx2) self.assertEqual(idx1, items1.intersection(items2)) self.assertEqual(results(idx1.search('lmn')), []) self.assertEqual(results(idx1.search('ijk')), []) self.assertEqual(results(idx1.search('def')), ['cdefg'])
def test_set_operations(self): """Test advanced set operations""" items1 = set(["abcde", "cdefg", "fghijk", "ijklm"]) items2 = set(["cdefg", "lmnop"]) idx1 = NGram(items1) idx2 = NGram(items2) results = lambda L: sorted(x[0] for x in L) # Item removal self.assertEqual(results(idx1.search('cde')), ["abcde", "cdefg"]) idx1.remove('abcde') self.assertEqual(results(idx1.search('cde')), ["cdefg"]) # Set intersection operation items1.remove('abcde') idx1.intersection_update(idx2) self.assertEqual(idx1, items1.intersection(items2)) self.assertEqual(results(idx1.search('lmn')), []) self.assertEqual(results(idx1.search('ijk')), []) self.assertEqual(results(idx1.search('def')), ['cdefg'])
def ngram(w1, w2, n): """ ngram distance """ pad = lambda x : "#{}#".format(x) w1, w2 = pad(w1), pad(w2) g1 = [w1[i:i+n] for i in range(len(w1)-n+1)] g2 = [w2[i:i+n] for i in range(len(w2)-n+1)] # compute ngram similarity # d(a, b) = |a| + |b| + 2|a intersection b| n = NGram(g1) n.intersection_update(g2) d = len(g1) + len(g2) - 2 * len(list(n)) return d