def interactive_tests_CPPR(): while True: queryid, user_id = map(int, raw_input("QueryID UserId?").split()) print "Computing CPPR(u,q)..." pr = CPPR(user_id, queryid) print "Done. Computing result..." result = [0] * 100 i = 0 for _ in g.vertices(): # print "PR for vertex", i, "is\t", pr[_] pp(result, (pr[_], i)) i += 1 print "Top 100 PR scores=" print nlargest(100, result)
def interactive_tests_CCPPR(): while True: print "New CPPR computation..." queryid, user_id = map(int, raw_input("QueryID UserId?").split()) print "Computing CCPPR(u,q)..." t0 = time() pr = CCPPR(user_id, queryid) print "Done in", time()-t0 print "Computing top result..." result = [0] * 100 i = 0 for _ in g.vertices(): # print "PR for vertex", i, "is\t", pr[_] pp(result, (pr[_], i)) i += 1 print "Top 100 CPR scores=" print nlargest(100, result)
def compute_user_sim_batch(start): """ This function will be executed by the worker process. It computes a batch of user similarities, starting at start-th user_id in the global variabl `users_set` and until the (start+BATCH_SIZE)-th user_id For every user_id it will compute the similarity against _ALL_ the other users (except the same user) and will keep the `TOP_N_SIMILAR_USERS`-th top ones in terms of similarity value. It will then commit them directly to mongodb before terminating. """ from heapq import heappushpop as pp print "Starting compute_user_sim_batch(", start, ")" # Initializing heaps for every user we are going to deal with results = {uid:([0] * TOP_N_SIMILAR_USERS) for uid in list(users_set)[start:start+BATCH_SIZE]} t0 = time() t1 = time() i = 0 print_interval = int(BATCH_SIZE*len(users_set)/N_OF_PRINTS_PER_BATCH) # So that we approximately print the state X times for u1, u2 in user_pair(start): # print "Executing process", start, "for", u1, u2 try: sim_res = us.sim(u1, u2) if abs(sim_res - 1.0) < ZERO_FLOAT: continue # Do not keep similarities too close to 1.0 (too similar users) pp(results[u1], (sim_res, u2)) except KeyError as err: print err print "Huh? KeyError u1=", u1, "start=", start, "BATCH_SIZE=", BATCH_SIZE i += 1 if i % print_interval is 0: print i+1, \ "sim() calls\t in %5.1f" % (time()-t0), \ "\t\t avg %.4e" % ((time()-t0)/float(i+1)), \ "\t local_avg %.4e" % ((time()-t1)/float(i+1)), \ "\t(process", start, ")" t1 = time() print "Computation from", start, "to", start + BATCH_SIZE, "finished in", time()-t0,". Committing..." t0 = time() commit_user_sims(start, results) print "Finished committing in", time()-t0 return True # Just to show we finished things properly