def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(self.usercircle, self.netdata.get_friends_nodes(self.usercircle), self.interact_type, self.K,data_type="learn") #print "Num close users", len(close_users), "Num friends", self.usercircle.get_num_friends() if len(close_users)< self.K: logging.warning("Cannot find k closest friends for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs(close_users,self.max_items) """ if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(self.usercircle, self.netdata.get_nonfriends_nodes(self.usercircle), self.interact_type, self.K, data_type="learn" ) #print "Num close users", len(close_users) if len(close_users) < self.K: logging.warning("Cannot find k closest global for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs(close_users, self.max_items) """ Length of recs can be zero because there are so little train_interactions of the close users if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors( self.usercircle, self.netdata.get_friends_nodes(self.usercircle), self.interact_type, self.K, data_type="learn") #print "Num close users", len(close_users), "Num friends", self.usercircle.get_num_friends() if len(close_users) < self.K: logging.warning("Cannot find k closest friends for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs( close_users, self.max_items) """ if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors( self.usercircle, self.netdata.get_nonfriends_nodes(self.usercircle), self.interact_type, self.K, data_type="learn") #print "Num close users", len(close_users) if len(close_users) < self.K: logging.warning("Cannot find k closest global for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs( close_users, self.max_items) """ Length of recs can be zero because there are so little train_interactions of the close users if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def compute_susceptibility_randomselect( netdata, nodes_list, interact_type, cutoff_rating, control_divider, min_interactions_per_user, time_diff, time_scale, max_tries, max_node_computes, max_interact_ratio_error, nonfr_match, allow_duplicates): # Find similarity on training set max_sim_ratio_error = 0.1 triplet_nodes = [] counter = 0 failed_counter = 0 eligible_nodes_counter = 0 count_success = 0 edges_counter = 0 total_tries_counter = 0 time_saved_counter = 0 if max_tries is None: max_tries = netdata.get_total_num_nodes() randomized_node_ids = random.sample( xrange(1, netdata.get_total_num_nodes() + 1), max_tries) data_type = "compare_train" data_type_code = ord(data_type[0]) #sim_dict = {} for node in nodes_list: nonfr_ids = {} sim_dict = {} num_node_interacts = node.get_num_interactions( interact_type) # return all interactions, no check for duplicates #if not node.has_interactions(interact_type) or not node.has_friends(): if node.length_train_ids < min_interactions_per_user or node.length_test_ids < min_interactions_per_user or not node.has_friends( ): #print "Node has no interactions. Skipping!" counter += 1 continue eligible_nodes_counter += 1 fnodes = netdata.get_friends_nodes(node) control_nonfr_nodes = [] avg_fsim = 0 avg_rsim = 0 num_eligible_friends = 0 selected_friends = [] friend_ids = node.get_friend_ids() edges_counter += len(friend_ids) for fobj in fnodes: num_fobj_interacts = fobj.get_num_interactions(interact_type) if fobj.length_train_ids >= min_interactions_per_user and fobj.length_test_ids >= min_interactions_per_user: """ fsim2 = node.compute_node_similarity(fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) """ if (fobj.uid, node.uid) in sim_dict: fsim = sim_dict[(fobj.uid, node.uid)] elif (node.uid, fobj.uid) in sim_dict: fsim = sim_dict[(node.uid, fobj.uid)] else: fsim = node.compute_node_similarity( fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(fobj.uid, node.uid)] = fsim #if fsim is None: # print "Error:fsim cannot be None" #print fsim found = False if fsim is not None and fsim != -1: num_eligible_friends += 1 total_tries_counter += 1 tries = 0 if nonfr_match == "random": randomized_node_ids = random.sample( xrange(1, netdata.get_total_num_nodes() + 1), max_tries) elif nonfr_match == "kbest": global_candidates = netdata.get_othernodes_iterable( fobj, should_have_interactions=True) globalk_neighbors = BasicNetworkAnalyzer.compute_knearest_neighbors( fobj, global_candidates, interact_type, 1000, data_type=data_type, cutoff_rating=-1, min_interactions_per_user=min_interactions_per_user, time_diff=-1, time_scale=ord('w')) randomized_node_ids = [ heapq.heappop(globalk_neighbors)[1].uid for h in xrange(len(globalk_neighbors)) ] randomized_node_ids.reverse() elif nonfr_match == "serial": randomized_node_ids = range(1, max_tries + 1) else: print "Error in parameter" sys.exit(1) r_index = 0 while not found and r_index < max_tries and r_index < len( randomized_node_ids): rand_node_id = randomized_node_ids[r_index] r_index += 1 if rand_node_id in nonfr_ids: continue rand_node = netdata.nodes[rand_node_id] if rand_node.length_train_ids >= min_interactions_per_user and rand_node.length_test_ids >= min_interactions_per_user: ratio_train = abs(rand_node.length_train_ids - fobj.length_train_ids) / float( fobj.length_train_ids) if ratio_train <= max_interact_ratio_error: if rand_node.uid not in friend_ids and rand_node.uid != node.uid: if (rand_node.uid, node.uid) in sim_dict: rsim = sim_dict[(rand_node.uid, node.uid)] time_saved_counter += 1 elif (node.uid, rand_node.uid) in sim_dict: rsim = sim_dict[(node.uid, rand_node.uid)] time_saved_counter += 1 else: rsim = node.compute_node_similarity( rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(rand_node.uid, node.uid)] = rsim """ rsim2 = node.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) #time_diff=-1, time_scale=time_scale) """ num_rnode_interacts = rand_node.get_num_interactions( interact_type) if rsim is not None and rsim != -1: sim_diff = abs(rsim - fsim) if ( fsim == 0 and sim_diff <= 0.00001 ) or ( fsim > 0 and sim_diff / fsim <= max_sim_ratio_error ): # and (fsim2 >0 and abs(rsim2-fsim2)/fsim2<=max_sim_ratio_error)): """ fr_nonfr_sim = fobj.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) print fr_nonfr_sim, node.length_train_ids, fobj.length_train_ids, rand_node.length_train_ids, fsim, rsim, r_index, max_tries if fr_nonfr_sim > 2*fsim: """ if True: found = True avg_fsim += fsim avg_rsim += rsim nonfr_ids[rand_node_id] = True control_nonfr_nodes.append( rand_node) selected_friends.append(fobj) tries += 1 if not found: #print "Could not get random non-friend with sim", fsim, "in %d tries" %tries failed_counter += 1 #print "SEE:", len(control_nonfr_nodes), num_eligible_friends if num_eligible_friends > 0 and len( control_nonfr_nodes) >= 1 * num_eligible_friends: avg_fsim = avg_fsim / float(len(control_nonfr_nodes)) avg_rsim = avg_rsim / float(len(control_nonfr_nodes)) #print num_eligible_friends, len(selected_friends) if len(selected_friends) != len(control_nonfr_nodes): print "ALERT: Something is wrong here!!" sys.exit(2) if len(control_nonfr_nodes) != num_eligible_friends: print "WARN: Cannot match all eligible friends", num_eligible_friends, len( control_nonfr_nodes) #print node.uid, [fr.uid for fr in selected_friends] triplet_nodes.append((node, selected_friends, control_nonfr_nodes, 0, 0, 0, avg_fsim, avg_rsim)) count_success += 1 if counter % 10 == 0: print "Done counter", counter if max_node_computes is not None: if counter > max_node_computes: print counter, max_node_computes break counter += 1 print "\n--Number of nodes assigned to me(with interactions and friends):", len( nodes_list) print "--Eligible nodes (with interactions > %d): " % min_interactions_per_user, eligible_nodes_counter print "--Total Edges from eligible nodes:", edges_counter #print "--Eligible friend-edges (with friend hving interactions >%d): " %min_interactions_per_user, eligible_edges_counter print "--Number of tries (and successful caches) to find random non-friend:", total_tries_counter, time_saved_counter print "--Number of successful nodes (can find rnodes):", count_success print "--Successful triplets:", len(triplet_nodes) # Now compare influencer effect on test set data_type = "influence_effect" data_type_code = ord(data_type[0]) influence_arr = compare_susceptibility_effect( triplet_nodes, interact_type, cutoff_rating, min_interactions_per_user, time_diff, time_scale, data_type_code, allow_duplicates) return influence_arr
def compute_susceptibility_randomselect(netdata, nodes_list, interact_type, cutoff_rating, control_divider, min_interactions_per_user, time_diff, time_scale, max_tries, max_node_computes, max_interact_ratio_error, nonfr_match, allow_duplicates): # Find similarity on training set max_sim_ratio_error = 0.1 triplet_nodes = [] counter = 0 failed_counter = 0 eligible_nodes_counter = 0 count_success = 0 edges_counter = 0 total_tries_counter = 0 time_saved_counter = 0 if max_tries is None: max_tries = netdata.get_total_num_nodes() randomized_node_ids = random.sample(xrange(1, netdata.get_total_num_nodes()+1), max_tries) data_type="compare_train" data_type_code=ord(data_type[0]) #sim_dict = {} for node in nodes_list: nonfr_ids = {} sim_dict = {} num_node_interacts = node.get_num_interactions(interact_type) # return all interactions, no check for duplicates #if not node.has_interactions(interact_type) or not node.has_friends(): if node.length_train_ids < min_interactions_per_user or node.length_test_ids <min_interactions_per_user or not node.has_friends(): #print "Node has no interactions. Skipping!" counter +=1 continue eligible_nodes_counter += 1 fnodes = netdata.get_friends_nodes(node) control_nonfr_nodes = [] avg_fsim = 0 avg_rsim = 0 num_eligible_friends = 0 selected_friends = [] friend_ids = node.get_friend_ids() edges_counter += len(friend_ids) for fobj in fnodes: num_fobj_interacts = fobj.get_num_interactions(interact_type) if fobj.length_train_ids >=min_interactions_per_user and fobj.length_test_ids >=min_interactions_per_user: """ fsim2 = node.compute_node_similarity(fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) """ if (fobj.uid,node.uid) in sim_dict: fsim = sim_dict[(fobj.uid,node.uid)] elif (node.uid,fobj.uid) in sim_dict: fsim = sim_dict[(node.uid,fobj.uid)] else: fsim = node.compute_node_similarity(fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(fobj.uid, node.uid)] = fsim #if fsim is None: # print "Error:fsim cannot be None" #print fsim found = False if fsim is not None and fsim!=-1: num_eligible_friends += 1 total_tries_counter += 1 tries=0 if nonfr_match=="random": randomized_node_ids = random.sample(xrange(1, netdata.get_total_num_nodes()+1), max_tries) elif nonfr_match=="kbest": global_candidates = netdata.get_othernodes_iterable(fobj, should_have_interactions=True) globalk_neighbors = BasicNetworkAnalyzer.compute_knearest_neighbors(fobj, global_candidates, interact_type,1000, data_type=data_type, cutoff_rating = -1, min_interactions_per_user=min_interactions_per_user, time_diff=-1, time_scale=ord('w')) randomized_node_ids = [heapq.heappop(globalk_neighbors)[1].uid for h in xrange(len(globalk_neighbors))] randomized_node_ids.reverse() elif nonfr_match=="serial": randomized_node_ids = range(1, max_tries+1) else: print "Error in parameter"; sys.exit(1) r_index = 0 while not found and r_index < max_tries and r_index<len(randomized_node_ids): rand_node_id = randomized_node_ids[r_index] r_index += 1 if rand_node_id in nonfr_ids: continue rand_node = netdata.nodes[rand_node_id] if rand_node.length_train_ids >=min_interactions_per_user and rand_node.length_test_ids >=min_interactions_per_user: ratio_train = abs(rand_node.length_train_ids-fobj.length_train_ids)/float(fobj.length_train_ids) if ratio_train <= max_interact_ratio_error: if rand_node.uid not in friend_ids and rand_node.uid!=node.uid: if (rand_node.uid,node.uid) in sim_dict: rsim = sim_dict[(rand_node.uid,node.uid)] time_saved_counter += 1 elif (node.uid,rand_node.uid) in sim_dict: rsim = sim_dict[(node.uid,rand_node.uid)] time_saved_counter += 1 else: rsim = node.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(rand_node.uid, node.uid)] = rsim """ rsim2 = node.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) #time_diff=-1, time_scale=time_scale) """ num_rnode_interacts = rand_node.get_num_interactions(interact_type) if rsim is not None and rsim!=-1: sim_diff = abs(rsim-fsim) if (fsim==0 and sim_diff<=0.00001) or (fsim>0 and sim_diff/fsim <= max_sim_ratio_error):# and (fsim2 >0 and abs(rsim2-fsim2)/fsim2<=max_sim_ratio_error)): """ fr_nonfr_sim = fobj.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) print fr_nonfr_sim, node.length_train_ids, fobj.length_train_ids, rand_node.length_train_ids, fsim, rsim, r_index, max_tries if fr_nonfr_sim > 2*fsim: """ if True: found = True avg_fsim += fsim avg_rsim += rsim nonfr_ids[rand_node_id] = True control_nonfr_nodes.append(rand_node) selected_friends.append(fobj) tries += 1 if not found: #print "Could not get random non-friend with sim", fsim, "in %d tries" %tries failed_counter += 1 #print "SEE:", len(control_nonfr_nodes), num_eligible_friends if num_eligible_friends >0 and len(control_nonfr_nodes) >= 1*num_eligible_friends: avg_fsim = avg_fsim/float(len(control_nonfr_nodes)) avg_rsim = avg_rsim/float(len(control_nonfr_nodes)) #print num_eligible_friends, len(selected_friends) if len(selected_friends) != len(control_nonfr_nodes): print "ALERT: Something is wrong here!!"; sys.exit(2) if len(control_nonfr_nodes) != num_eligible_friends: print "WARN: Cannot match all eligible friends", num_eligible_friends, len(control_nonfr_nodes) #print node.uid, [fr.uid for fr in selected_friends] triplet_nodes.append((node, selected_friends, control_nonfr_nodes, 0, 0, 0, avg_fsim, avg_rsim)) count_success +=1 if counter %10==0: print "Done counter", counter if max_node_computes is not None: if counter > max_node_computes: print counter, max_node_computes break counter += 1 print "\n--Number of nodes assigned to me(with interactions and friends):", len(nodes_list) print "--Eligible nodes (with interactions > %d): " %min_interactions_per_user, eligible_nodes_counter print "--Total Edges from eligible nodes:", edges_counter #print "--Eligible friend-edges (with friend hving interactions >%d): " %min_interactions_per_user, eligible_edges_counter print "--Number of tries (and successful caches) to find random non-friend:", total_tries_counter, time_saved_counter print "--Number of successful nodes (can find rnodes):", count_success print "--Successful triplets:", len(triplet_nodes) # Now compare influencer effect on test set data_type="influence_effect" data_type_code=ord(data_type[0]) influence_arr = compare_susceptibility_effect(triplet_nodes, interact_type, cutoff_rating, min_interactions_per_user, time_diff, time_scale, data_type_code, allow_duplicates) return influence_arr