class MongoTest(BaseTest, unittest.TestCase): '''Test the Mongo client''' def setUp(self): self.client = Client('mongo', 'testing', 6, 3, host='localhost', weeks=3) def tearDown(self): self.client.delete()
def find_duplicates(bithash_corpus, table_name, ntables, nmatchbits): try: redis_client = Client('redis', table_name, ntables, nmatchbits) redis_client.delete() except ConnectionError: print 'Connection error. Is redis running? Execute:\n>> redis-server' return -1 # insert the first 20k documents redis_client.insert(bithash_corpus[:20000]) # Find duplicates of the remaining ~2k documents dup_lists = redis_client.find_all(bithash_corpus[20001:]) idxs_and_simhashs = [(20001 + idx, dup_lists[idx]) for idx, val in enumerate(dup_lists) if len(val) > 0] return idxs_and_simhashs