def console_test(): from simhash import Simhash, SimhashIndex data = { 1: 'How are you? I Am fine. blar blar blar blar blar Thanks.', 2: 'How are you i am fine. blar blar blar blar blar than', 3: 'This is simhash test.', 4: 'How are you i am fine. blar blar blar blar blar thank1', } objs = [(str(k), Simhash(v)) for k, v in data.items()] index = SimhashIndex(objs, k=10) s1 = Simhash( u'How are you i am fine.ablar ablar xyz blar blar blar blar blar blar blar thank' ) dups = index.get_near_dups(s1) dups = index.get_near_dups2(s1, 5) index.remove(s1)