def recommend(self):
     close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(self.usercircle,
                                                                   self.netdata.get_friends_nodes(self.usercircle),
                                                                   self.interact_type, self.K,data_type="learn")
     #print "Num close users", len(close_users), "Num friends", self.usercircle.get_num_friends()
     if len(close_users)< self.K:
         logging.warning("Cannot find k closest friends for recommend")
         return None                                                                  
     self.rec_items = self.usercircle.compute_weighted_popular_recs(close_users,self.max_items) 
     
     """
     if len(self.rec_items) == 0:
         print "oh"
         for sim, unode in close_users:
             print unode.length_train_ids
     """
     return self.rec_items
 def recommend(self):
     close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(self.usercircle,
                                                                   self.netdata.get_nonfriends_nodes(self.usercircle),
                                                                   self.interact_type, self.K, data_type="learn"
                                                                   )
     
     #print "Num close users", len(close_users)
     if len(close_users) < self.K:
         logging.warning("Cannot find k closest global for recommend")
         return None
     self.rec_items = self.usercircle.compute_weighted_popular_recs(close_users, self.max_items)
    
     """
     Length of recs can be zero because there are so little train_interactions of the close users 
     if len(self.rec_items) == 0:
         print "oh"
         for sim, unode in close_users:
             print unode.length_train_ids
     """
     return self.rec_items
示例#3
0
 def recommend(self):
     close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(
         self.usercircle,
         self.netdata.get_friends_nodes(self.usercircle),
         self.interact_type,
         self.K,
         data_type="learn")
     #print "Num close users", len(close_users), "Num friends", self.usercircle.get_num_friends()
     if len(close_users) < self.K:
         logging.warning("Cannot find k closest friends for recommend")
         return None
     self.rec_items = self.usercircle.compute_weighted_popular_recs(
         close_users, self.max_items)
     """
     if len(self.rec_items) == 0:
         print "oh"
         for sim, unode in close_users:
             print unode.length_train_ids
     """
     return self.rec_items
示例#4
0
    def recommend(self):
        close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(
            self.usercircle,
            self.netdata.get_nonfriends_nodes(self.usercircle),
            self.interact_type,
            self.K,
            data_type="learn")

        #print "Num close users", len(close_users)
        if len(close_users) < self.K:
            logging.warning("Cannot find k closest global for recommend")
            return None
        self.rec_items = self.usercircle.compute_weighted_popular_recs(
            close_users, self.max_items)
        """
        Length of recs can be zero because there are so little train_interactions of the close users 
        if len(self.rec_items) == 0:
            print "oh"
            for sim, unode in close_users:
                print unode.length_train_ids
        """
        return self.rec_items
def compute_susceptibility_randomselect(
        netdata, nodes_list, interact_type, cutoff_rating, control_divider,
        min_interactions_per_user, time_diff, time_scale, max_tries,
        max_node_computes, max_interact_ratio_error, nonfr_match,
        allow_duplicates):
    # Find similarity on training set
    max_sim_ratio_error = 0.1
    triplet_nodes = []
    counter = 0
    failed_counter = 0
    eligible_nodes_counter = 0
    count_success = 0
    edges_counter = 0
    total_tries_counter = 0
    time_saved_counter = 0

    if max_tries is None:
        max_tries = netdata.get_total_num_nodes()
    randomized_node_ids = random.sample(
        xrange(1,
               netdata.get_total_num_nodes() + 1), max_tries)

    data_type = "compare_train"
    data_type_code = ord(data_type[0])
    #sim_dict = {}
    for node in nodes_list:
        nonfr_ids = {}
        sim_dict = {}
        num_node_interacts = node.get_num_interactions(
            interact_type)  # return all interactions, no check for duplicates
        #if not node.has_interactions(interact_type) or not node.has_friends():
        if node.length_train_ids < min_interactions_per_user or node.length_test_ids < min_interactions_per_user or not node.has_friends(
        ):
            #print "Node has no interactions. Skipping!"
            counter += 1
            continue
        eligible_nodes_counter += 1
        fnodes = netdata.get_friends_nodes(node)
        control_nonfr_nodes = []
        avg_fsim = 0
        avg_rsim = 0
        num_eligible_friends = 0
        selected_friends = []
        friend_ids = node.get_friend_ids()
        edges_counter += len(friend_ids)
        for fobj in fnodes:
            num_fobj_interacts = fobj.get_num_interactions(interact_type)
            if fobj.length_train_ids >= min_interactions_per_user and fobj.length_test_ids >= min_interactions_per_user:
                """
                fsim2 = node.compute_node_similarity(fobj, interact_type, 
                        cutoff_rating, data_type_code, 
                        min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale)
                """
                if (fobj.uid, node.uid) in sim_dict:
                    fsim = sim_dict[(fobj.uid, node.uid)]
                elif (node.uid, fobj.uid) in sim_dict:
                    fsim = sim_dict[(node.uid, fobj.uid)]
                else:
                    fsim = node.compute_node_similarity(
                        fobj,
                        interact_type,
                        cutoff_rating,
                        data_type_code,
                        min_interactions_per_user,
                        time_diff=-1,
                        time_scale=time_scale)
                    sim_dict[(fobj.uid, node.uid)] = fsim


#if fsim is None:
#                        print "Error:fsim cannot be None"
#print fsim
                found = False
                if fsim is not None and fsim != -1:
                    num_eligible_friends += 1
                    total_tries_counter += 1
                    tries = 0
                    if nonfr_match == "random":
                        randomized_node_ids = random.sample(
                            xrange(1,
                                   netdata.get_total_num_nodes() + 1),
                            max_tries)
                    elif nonfr_match == "kbest":
                        global_candidates = netdata.get_othernodes_iterable(
                            fobj, should_have_interactions=True)
                        globalk_neighbors = BasicNetworkAnalyzer.compute_knearest_neighbors(
                            fobj,
                            global_candidates,
                            interact_type,
                            1000,
                            data_type=data_type,
                            cutoff_rating=-1,
                            min_interactions_per_user=min_interactions_per_user,
                            time_diff=-1,
                            time_scale=ord('w'))
                        randomized_node_ids = [
                            heapq.heappop(globalk_neighbors)[1].uid
                            for h in xrange(len(globalk_neighbors))
                        ]
                        randomized_node_ids.reverse()
                    elif nonfr_match == "serial":
                        randomized_node_ids = range(1, max_tries + 1)
                    else:
                        print "Error in parameter"
                        sys.exit(1)
                    r_index = 0

                    while not found and r_index < max_tries and r_index < len(
                            randomized_node_ids):
                        rand_node_id = randomized_node_ids[r_index]
                        r_index += 1
                        if rand_node_id in nonfr_ids:
                            continue
                        rand_node = netdata.nodes[rand_node_id]
                        if rand_node.length_train_ids >= min_interactions_per_user and rand_node.length_test_ids >= min_interactions_per_user:
                            ratio_train = abs(rand_node.length_train_ids -
                                              fobj.length_train_ids) / float(
                                                  fobj.length_train_ids)
                            if ratio_train <= max_interact_ratio_error:
                                if rand_node.uid not in friend_ids and rand_node.uid != node.uid:
                                    if (rand_node.uid, node.uid) in sim_dict:
                                        rsim = sim_dict[(rand_node.uid,
                                                         node.uid)]
                                        time_saved_counter += 1
                                    elif (node.uid, rand_node.uid) in sim_dict:
                                        rsim = sim_dict[(node.uid,
                                                         rand_node.uid)]
                                        time_saved_counter += 1
                                    else:
                                        rsim = node.compute_node_similarity(
                                            rand_node,
                                            interact_type,
                                            cutoff_rating,
                                            data_type_code,
                                            min_interactions_per_user,
                                            time_diff=-1,
                                            time_scale=time_scale)
                                        sim_dict[(rand_node.uid,
                                                  node.uid)] = rsim
                                        """
                                        rsim2 = node.compute_node_similarity(rand_node, interact_type, 
                                                        cutoff_rating, data_type_code, min_interactions_per_user, 
                                                        time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale)
                                                        #time_diff=-1, time_scale=time_scale)
                                        """
                                    num_rnode_interacts = rand_node.get_num_interactions(
                                        interact_type)
                                    if rsim is not None and rsim != -1:
                                        sim_diff = abs(rsim - fsim)
                                        if (
                                                fsim == 0
                                                and sim_diff <= 0.00001
                                        ) or (
                                                fsim > 0 and sim_diff / fsim <=
                                                max_sim_ratio_error
                                        ):  # and (fsim2 >0 and abs(rsim2-fsim2)/fsim2<=max_sim_ratio_error)):
                                            """
                                            fr_nonfr_sim = fobj.compute_node_similarity(rand_node, interact_type, 
                                                        cutoff_rating, data_type_code, min_interactions_per_user, 
                                                        time_diff=-1, time_scale=time_scale)
                                            print fr_nonfr_sim, node.length_train_ids, fobj.length_train_ids, rand_node.length_train_ids, fsim, rsim, r_index, max_tries
                                            if fr_nonfr_sim > 2*fsim:
                                            """
                                            if True:
                                                found = True
                                                avg_fsim += fsim
                                                avg_rsim += rsim
                                                nonfr_ids[rand_node_id] = True
                                                control_nonfr_nodes.append(
                                                    rand_node)
                                                selected_friends.append(fobj)
                        tries += 1
                    if not found:
                        #print "Could not get random non-friend with sim", fsim, "in %d tries" %tries
                        failed_counter += 1
        #print "SEE:", len(control_nonfr_nodes), num_eligible_friends
        if num_eligible_friends > 0 and len(
                control_nonfr_nodes) >= 1 * num_eligible_friends:
            avg_fsim = avg_fsim / float(len(control_nonfr_nodes))
            avg_rsim = avg_rsim / float(len(control_nonfr_nodes))
            #print num_eligible_friends, len(selected_friends)
            if len(selected_friends) != len(control_nonfr_nodes):
                print "ALERT: Something is wrong here!!"
                sys.exit(2)
            if len(control_nonfr_nodes) != num_eligible_friends:
                print "WARN: Cannot match all eligible friends", num_eligible_friends, len(
                    control_nonfr_nodes)
            #print node.uid, [fr.uid for fr in selected_friends]
            triplet_nodes.append((node, selected_friends, control_nonfr_nodes,
                                  0, 0, 0, avg_fsim, avg_rsim))
            count_success += 1
        if counter % 10 == 0:
            print "Done counter", counter
        if max_node_computes is not None:
            if counter > max_node_computes:
                print counter, max_node_computes
                break
        counter += 1
    print "\n--Number of nodes assigned to me(with interactions and friends):", len(
        nodes_list)
    print "--Eligible nodes (with interactions > %d): " % min_interactions_per_user, eligible_nodes_counter
    print "--Total Edges from eligible nodes:", edges_counter
    #print "--Eligible friend-edges (with friend hving interactions >%d): " %min_interactions_per_user, eligible_edges_counter
    print "--Number of tries (and successful caches) to find random non-friend:", total_tries_counter, time_saved_counter
    print "--Number of  successful nodes (can find rnodes):", count_success
    print "--Successful triplets:", len(triplet_nodes)

    # Now compare influencer effect on test set
    data_type = "influence_effect"
    data_type_code = ord(data_type[0])
    influence_arr = compare_susceptibility_effect(
        triplet_nodes, interact_type, cutoff_rating, min_interactions_per_user,
        time_diff, time_scale, data_type_code, allow_duplicates)
    return influence_arr
def compute_susceptibility_randomselect(netdata, nodes_list, interact_type, 
                                            cutoff_rating, control_divider, min_interactions_per_user, 
                                            time_diff, time_scale, max_tries, max_node_computes,
                                            max_interact_ratio_error, nonfr_match,
                                            allow_duplicates):   
    # Find similarity on training set
    max_sim_ratio_error = 0.1
    triplet_nodes = []
    counter = 0
    failed_counter = 0
    eligible_nodes_counter = 0
    count_success = 0
    edges_counter = 0
    total_tries_counter = 0
    time_saved_counter = 0
   
    if max_tries is None:
        max_tries = netdata.get_total_num_nodes()
    randomized_node_ids = random.sample(xrange(1, netdata.get_total_num_nodes()+1), max_tries)
    
    data_type="compare_train"
    data_type_code=ord(data_type[0]) 
    #sim_dict = {}
    for node in nodes_list:
        nonfr_ids = {}
        sim_dict = {}
        num_node_interacts = node.get_num_interactions(interact_type) # return all interactions, no check for duplicates
        #if not node.has_interactions(interact_type) or not node.has_friends():
        if node.length_train_ids < min_interactions_per_user or node.length_test_ids <min_interactions_per_user or not node.has_friends():
            #print "Node has no interactions. Skipping!"
            counter +=1
            continue
        eligible_nodes_counter += 1
        fnodes = netdata.get_friends_nodes(node)
        control_nonfr_nodes = []
        avg_fsim = 0
        avg_rsim = 0
        num_eligible_friends = 0
        selected_friends = []
        friend_ids = node.get_friend_ids()
        edges_counter += len(friend_ids)
        for fobj in fnodes:
            num_fobj_interacts = fobj.get_num_interactions(interact_type)
            if fobj.length_train_ids >=min_interactions_per_user and fobj.length_test_ids >=min_interactions_per_user:
                """
                fsim2 = node.compute_node_similarity(fobj, interact_type, 
                        cutoff_rating, data_type_code, 
                        min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale)
                """
                if (fobj.uid,node.uid) in sim_dict:
                    fsim = sim_dict[(fobj.uid,node.uid)]
                elif (node.uid,fobj.uid) in sim_dict:
                    fsim = sim_dict[(node.uid,fobj.uid)]
                else:
                    fsim = node.compute_node_similarity(fobj, interact_type, 
                            cutoff_rating, data_type_code, 
                            min_interactions_per_user, time_diff=-1, time_scale=time_scale)
                    sim_dict[(fobj.uid, node.uid)] = fsim
#if fsim is None:
#                        print "Error:fsim cannot be None"
                #print fsim
                found = False
                if fsim is not None and fsim!=-1:
                    num_eligible_friends += 1
                    total_tries_counter += 1
                    tries=0
                    if nonfr_match=="random":
                        randomized_node_ids = random.sample(xrange(1, netdata.get_total_num_nodes()+1), max_tries)
                    elif nonfr_match=="kbest":
                        global_candidates = netdata.get_othernodes_iterable(fobj, should_have_interactions=True)
                        globalk_neighbors = BasicNetworkAnalyzer.compute_knearest_neighbors(fobj, global_candidates, 
                                                                interact_type,1000, data_type=data_type, 
                                                                cutoff_rating = -1,
                                                                min_interactions_per_user=min_interactions_per_user,
                                                                time_diff=-1, time_scale=ord('w'))
                        randomized_node_ids = [heapq.heappop(globalk_neighbors)[1].uid for h in xrange(len(globalk_neighbors))]
                        randomized_node_ids.reverse()
                    elif nonfr_match=="serial":
                        randomized_node_ids = range(1, max_tries+1)
                    else:
                        print "Error in parameter"; sys.exit(1)
                    r_index = 0
                   
                    while not found and r_index < max_tries and r_index<len(randomized_node_ids):
                        rand_node_id = randomized_node_ids[r_index]
                        r_index += 1
                        if rand_node_id in nonfr_ids:
                            continue
                        rand_node = netdata.nodes[rand_node_id]
                        if rand_node.length_train_ids >=min_interactions_per_user and rand_node.length_test_ids >=min_interactions_per_user:
                            ratio_train = abs(rand_node.length_train_ids-fobj.length_train_ids)/float(fobj.length_train_ids)
                            if ratio_train <= max_interact_ratio_error: 
                                if rand_node.uid not in friend_ids and rand_node.uid!=node.uid:
                                    if (rand_node.uid,node.uid) in sim_dict: 
                                        rsim = sim_dict[(rand_node.uid,node.uid)]
                                        time_saved_counter += 1
                                    elif (node.uid,rand_node.uid) in sim_dict:
                                        rsim = sim_dict[(node.uid,rand_node.uid)]
                                        time_saved_counter += 1
                                    else:
                                        rsim = node.compute_node_similarity(rand_node, interact_type, 
                                                        cutoff_rating, data_type_code, min_interactions_per_user, 
                                                        time_diff=-1, time_scale=time_scale)
                                        sim_dict[(rand_node.uid, node.uid)] = rsim
                                        """
                                        rsim2 = node.compute_node_similarity(rand_node, interact_type, 
                                                        cutoff_rating, data_type_code, min_interactions_per_user, 
                                                        time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale)
                                                        #time_diff=-1, time_scale=time_scale)
                                        """
                                    num_rnode_interacts = rand_node.get_num_interactions(interact_type)
                                    if rsim is not None and rsim!=-1:
                                        sim_diff = abs(rsim-fsim)
                                        if (fsim==0 and sim_diff<=0.00001) or (fsim>0 and
                                                sim_diff/fsim <= max_sim_ratio_error):# and (fsim2 >0 and abs(rsim2-fsim2)/fsim2<=max_sim_ratio_error)):
                                            """
                                            fr_nonfr_sim = fobj.compute_node_similarity(rand_node, interact_type, 
                                                        cutoff_rating, data_type_code, min_interactions_per_user, 
                                                        time_diff=-1, time_scale=time_scale)
                                            print fr_nonfr_sim, node.length_train_ids, fobj.length_train_ids, rand_node.length_train_ids, fsim, rsim, r_index, max_tries
                                            if fr_nonfr_sim > 2*fsim:
                                            """
                                            if True:
                                                found = True
                                                avg_fsim += fsim
                                                avg_rsim += rsim
                                                nonfr_ids[rand_node_id] = True
                                                control_nonfr_nodes.append(rand_node)
                                                selected_friends.append(fobj)
                        tries += 1
                    if not found:
                        #print "Could not get random non-friend with sim", fsim, "in %d tries" %tries
                        failed_counter += 1
        #print "SEE:", len(control_nonfr_nodes), num_eligible_friends
        if num_eligible_friends >0 and len(control_nonfr_nodes) >= 1*num_eligible_friends:
            avg_fsim = avg_fsim/float(len(control_nonfr_nodes))
            avg_rsim = avg_rsim/float(len(control_nonfr_nodes))
            #print num_eligible_friends, len(selected_friends)
            if len(selected_friends) != len(control_nonfr_nodes):
                print "ALERT: Something is wrong here!!"; sys.exit(2)
            if len(control_nonfr_nodes) != num_eligible_friends:
                print "WARN: Cannot match all eligible friends", num_eligible_friends, len(control_nonfr_nodes)
            #print node.uid, [fr.uid for fr in selected_friends]
            triplet_nodes.append((node, selected_friends, control_nonfr_nodes, 
                                 0, 0, 0, avg_fsim, avg_rsim))
            count_success +=1
        if counter %10==0:
            print "Done counter", counter
        if max_node_computes is not None:
            if counter > max_node_computes:
                print counter, max_node_computes
                break
        counter += 1
    print "\n--Number of nodes assigned to me(with interactions and friends):", len(nodes_list)
    print "--Eligible nodes (with interactions > %d): " %min_interactions_per_user, eligible_nodes_counter
    print "--Total Edges from eligible nodes:", edges_counter
    #print "--Eligible friend-edges (with friend hving interactions >%d): " %min_interactions_per_user, eligible_edges_counter
    print "--Number of tries (and successful caches) to find random non-friend:", total_tries_counter, time_saved_counter
    print "--Number of  successful nodes (can find rnodes):", count_success
    print "--Successful triplets:", len(triplet_nodes) 


    # Now compare influencer effect on test set
    data_type="influence_effect"
    data_type_code=ord(data_type[0]) 
    influence_arr = compare_susceptibility_effect(triplet_nodes, interact_type, 
                                              cutoff_rating, min_interactions_per_user, 
                                              time_diff, time_scale, data_type_code,
                                              allow_duplicates)
    return influence_arr