Пример #1
0
connection = Connection()
db = connection[DB_NAME]
coll = db[COLL_NAME]    

#remove everything in the collection
coll.remove()

#populate database with data/biz.dat
for line in open('data/biz.dat'):
    binfo_l = map(lambda s:s.strip(), line.split('\t'))
    assert(len(binfo_l) == 2)
    coll.insert({'name':binfo_l[0], 'url':binfo_l[1]})

#generate bloom filter based on 'url' 
bfcoll = db[COLL_NAME] #get another collection object
bfcoll = bloomify(bfcoll, 'url')

#generate test files
T1 = 'data/testset'; gendata(1000,0.5, output_file=open(T1,'w'))

#function to performtimed tests
def timed_test(testf_path, coll):
    tic = time.clock()
    for line in open(testf_path):
        binfo_l = map(lambda l:l.strip(), line.split("\t"))
        assert(len(binfo_l)==2)
        [bname, burl] = binfo_l
        coll.find_one({'url':burl})
    toc = time.clock()
    return toc-tic
Пример #2
0
connection = Connection()
db = connection[DB_NAME]
coll = db[COLL_NAME]    

#remove everything in the collection
coll.remove()

data = [{"name":"user_a", "value":1},
        {"name":"user_b", "value":2},
        {"name":"user_c", "value":3},
        {"name":"user_d", "value":4}
        ]

#populate collection with documents (data)
coll.insert(data)

#generate bloom filter based on 'name' 
bfcoll = bloomify(coll, "name")

#checks (all elements will be within the set):
for doc in data:
    assert( coll._bf.contains(doc["name"]) )    #will return True
    assert( None != coll.find_one(doc) )        #will not return None

#attempt to find something not in the database
qry = {"name":"XXXy"}
assert( not(coll._bf.contains("XXXy")) ) #SHOULD return False since XXX not in coll
assert( None == coll.find_one(qry) )     #will return None object since XXX not in coll