示例#1
0
class MongoTest(BaseTest, unittest.TestCase):
    '''Test the Mongo client'''
    def setUp(self):
        self.client = Client('mongo', 'testing', 6, 3, host='localhost',
                             weeks=3)

    def tearDown(self):
        self.client.delete()
示例#2
0
def find_duplicates(bithash_corpus, table_name, ntables, nmatchbits):

    try:
        redis_client = Client('redis', table_name, ntables, nmatchbits)
        redis_client.delete()
    except ConnectionError:
        print 'Connection error. Is redis running? Execute:\n>> redis-server'
        return -1

    # insert the first 20k documents
    redis_client.insert(bithash_corpus[:20000])

    # Find duplicates of the remaining ~2k documents
    dup_lists = redis_client.find_all(bithash_corpus[20001:])
    idxs_and_simhashs = [(20001 + idx, dup_lists[idx])
                         for idx, val in enumerate(dup_lists) if len(val) > 0]

    return idxs_and_simhashs