def generate_adoption_data(netdata,
                           interact_type,
                           split_timestamp,
                           min_interactions_per_user=1,
                           time_window=None,
                           time_scale=ord('o')):
    print "ALERT: Generating adoption data chronologically"
    core_nodes2 = netdata.get_nodes_list(should_have_interactions=True)
    # caution: should do it for all users with interactions to be safe
    interactions_stream, eligible_nodes = adopt_time.get_interactions_stream(
        core_nodes2,
        interact_type,
        split_timestamp,
        min_interactions_per_user,
        after=True)
    print "Number of interactions %d after time %d" % (
        len(interactions_stream), split_timestamp)
    counter = 0
    kbest_lastm_heaps = {}  # Heap storing last M interactions by neighbors
    friends_lastm_heaps = {}
    rnonfr_lastm_heaps = {}
    # care only about friends of eligible_nodes
    incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes)
    # consider only k-nearest neighbors of eligible nodes
    incoming_kbest_dict = adopt_time.compute_globalk_neighbors3(
        netdata,
        eligible_nodes,
        interact_type,
        k=10,
        min_interactions_per_user=min_interactions_per_user)

    # consider building lastm_heap only for an eligible nodes
    friends_lastm_heaps = adopt_time.compute_initial_lastm_heaps(
        eligible_nodes, incoming_friends_dict, interact_type, split_timestamp,
        min_interactions_per_user, time_window)
    kbest_lastm_heaps = adopt_time.compute_initial_lastm_heaps(
        eligible_nodes, incoming_kbest_dict, interact_type, split_timestamp,
        min_interactions_per_user, time_window)

    items_pop = get_initial_items_pop(eligible_nodes, interact_type)
    # we need friends_dict for only those that appear in the test set
    # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user),
    # since that is what is compared in the suscept test.
    outf = open("adoptions.dat", "w")
    for node, item_id, timestamp, rating in interactions_stream:
        fr_count, kbest_count, itempop = process_interaction(
            node, item_id, timestamp, rating, friends_lastm_heaps,
            kbest_lastm_heaps, rnonfr_lastm_heaps,
            incoming_friends_dict[node.uid], incoming_kbest_dict[node.uid],
            items_pop)
        if node.has_friends():
            outf.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(
                node.uid, item_id, timestamp, rating, fr_count, kbest_count,
                itempop))
    outf.close()
def generate_adoption_data(netdata,  interact_type, split_timestamp, 
        min_interactions_per_user=1, time_window=None, time_scale=ord('o')):
    print "ALERT: Generating adoption data chronologically"
    core_nodes2 = netdata.get_nodes_list(should_have_interactions=True)
    # caution: should do it for all users with interactions to be safe
    interactions_stream, eligible_nodes  = adopt_time.get_interactions_stream(core_nodes2,
            interact_type, split_timestamp, min_interactions_per_user, after=True)
    print "Number of interactions %d after time %d" %(len(interactions_stream),
            split_timestamp)
    counter = 0
    kbest_lastm_heaps = {} # Heap storing last M interactions by neighbors
    friends_lastm_heaps = {}
    rnonfr_lastm_heaps = {}
    # care only about friends of eligible_nodes
    incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes)
    # consider only k-nearest neighbors of eligible nodes
    incoming_kbest_dict = adopt_time.compute_globalk_neighbors3(netdata, eligible_nodes, 
            interact_type, k=10, min_interactions_per_user=min_interactions_per_user)
    
    # consider building lastm_heap only for an eligible nodes
    friends_lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes,
            incoming_friends_dict, interact_type, split_timestamp, min_interactions_per_user,
            time_window)
    kbest_lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes,
            incoming_kbest_dict, interact_type, split_timestamp, min_interactions_per_user,
            time_window)


        
    items_pop = get_initial_items_pop(eligible_nodes, interact_type)
    # we need friends_dict for only those that appear in the test set
    # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user), 
    # since that is what is compared in the suscept test.
    outf = open("adoptions.dat", "w")
    for node, item_id, timestamp, rating in interactions_stream:
        fr_count,kbest_count, itempop = process_interaction(node, item_id, timestamp, rating, friends_lastm_heaps,
                kbest_lastm_heaps, rnonfr_lastm_heaps, incoming_friends_dict[node.uid],
                incoming_kbest_dict[node.uid], items_pop)
        if node.has_friends():
            outf.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(node.uid, item_id, timestamp, rating, fr_count, kbest_count, itempop))
    outf.close()
def generate_fake_preferences(
    netdata,
    interact_type,
    split_timestamp,
    min_interactions_beforeaftersplit_per_user=1,
    time_window=None,
    time_scale=ord("o"),
    method="random",
):
    global num_not_influence, num_not_homophily
    global friends_share, nonfriends_share
    num_not_homophily = 0
    num_not_influence = 0
    print "ALERT: Generating some fake data..."
    """
    core_nodes = netdata.get_nodes_iterable(should_have_interactions=True,
            should_have_friends=True)
    # caution: should do it for all users with interactions to be safe
    interactions_stream  = get_interactions_stream(core_nodes, interact_type, split_timestamp, min_interactions_per_user)
    """
    core_nodes2 = netdata.get_nodes_iterable(should_have_interactions=True)
    # caution: should do it for all users with interactions to be safe
    interactions_stream, eligible_nodes = adopt_time.get_interactions_stream(
        core_nodes2, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user
    )
    print "Number of interactions to change", len(interactions_stream)
    counter = 0
    lastm_heaps = {}
    if method == "homophily":
        """
        globalk_neighbors_dict = compute_globalk_neighbors(netdata, all_future_nodes, 
                interact_type, k=10, min_interactions_per_user=min_interactions_per_user)
        print "Generated k-best neighbors for all"
        """
        globalk_incoming_dict = adopt_time.compute_globalk_neighbors3(
            netdata,
            eligible_nodes,
            interact_type,
            k=10,
            min_interactions_per_user=min_interactions_beforeaftersplit_per_user,
        )
        lastm_heaps = adopt_time.compute_initial_lastm_heaps(
            eligible_nodes,
            globalk_incoming_dict,
            interact_type,
            split_timestamp,
            min_interactions_beforeaftersplit_per_user,
            time_window,
        )
        """
        for node in netdata.get_nodes_iterable(should_have_interactions=True):
            lastm_heaps[node.uid] = [(0,-1)]*time_window
            heapq.heapify(lastm_heaps[node.uid])
        """
    elif method == "random":
        items_pop = get_items_dup_array(netdata, interact_type)
        print "Generated items popularity dict"
    elif method == "influence":
        # we need friends_dict for only those that appear in the test set
        # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user),
        # since that is what is compared in the suscept test.
        incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes)

        lastm_heaps = adopt_time.compute_initial_lastm_heaps(
            eligible_nodes,
            incoming_friends_dict,
            interact_type,
            split_timestamp,
            min_interactions_beforeaftersplit_per_user,
            time_window,
        )
        """
        for node in netdata.get_nodes_iterable(should_have_interactions=True):
            lastm_heaps[node.uid] = [(0,-1)]*time_window
            heapq.heapify(lastm_heaps[node.uid])
        """
        print len(lastm_heaps), "yoyo"
        # print(incoming_friends_dict)
    else:
        print "Invalid fake prefs method"
        sys.exit(1)

    for node, item_id, timestamp, rating in interactions_stream:
        if method == "random":
            new_item_id = select_item_dueto_random(items_pop)  # .randint(1, netdata.total_num_items)
        elif method == "influence":
            new_item_id = select_item_from_neighbors(
                netdata, lastm_heaps, node, timestamp, time_window, incoming_friends_dict[node.uid]
            )
        elif method == "homophily":
            new_item_id = select_item_from_neighbors(
                netdata, lastm_heaps, node, timestamp, time_window, globalk_incoming_dict[node.uid]
            )
        else:
            print "Invalid fake prefs method"
            sys.exit(1)
        # print node.uid, new_item_id, timestamp
        ret = node.change_interacted_item(interact_type, item_id, new_item_id, timestamp)
        if ret == -1:
            print node.get_interactions(interact_type)
            print "Cannot find in the above array", item_id
            print "Big Error: Item_id not found in interactions list."
            sys.exit(1)
        counter += 1
        if counter % 100000 == 0:
            print "Done faked", counter
    if method == "influence":
        print "In influence: number of interactions that were not generated by influence:", num_not_influence
    elif method == "homophily":
        print "In homophily: friends share, and nonfriends share of fake data:", friends_share, nonfriends_share
        print "In homophily: number of interactions that were not generated by influence:", num_not_homophily
    print "Fake data generated"
Exemplo n.º 4
0
def generate_fake_preferences(netdata, interact_type, split_timestamp, 
        min_interactions_beforeaftersplit_per_user=1, time_window=None, time_scale=ord('o'), method="random"):
    global num_not_influence, num_not_homophily
    global friends_share, nonfriends_share
    num_not_homophily = 0
    num_not_influence = 0
    print "ALERT: Generating some fake data..."
    """
    core_nodes = netdata.get_nodes_iterable(should_have_interactions=True,
            should_have_friends=True)
    # caution: should do it for all users with interactions to be safe
    interactions_stream  = get_interactions_stream(core_nodes, interact_type, split_timestamp, min_interactions_per_user)
    """
    core_nodes2 = netdata.get_nodes_iterable(should_have_interactions=True)
    # caution: should do it for all users with interactions to be safe
    interactions_stream, eligible_nodes  = adopt_time.get_interactions_stream(core_nodes2, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user)
    print "Number of interactions to change", len(interactions_stream)
    counter = 0
    lastm_heaps = {}
    if method=="homophily":
        """
        globalk_neighbors_dict = compute_globalk_neighbors(netdata, all_future_nodes, 
                interact_type, k=10, min_interactions_per_user=min_interactions_per_user)
        print "Generated k-best neighbors for all"
        """
        globalk_incoming_dict = adopt_time.compute_globalk_neighbors3(netdata,eligible_nodes, 
                interact_type, k=10, min_interactions_per_user=min_interactions_beforeaftersplit_per_user) 
        lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes,
                globalk_incoming_dict, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user,
                time_window)
        """
        for node in netdata.get_nodes_iterable(should_have_interactions=True):
            lastm_heaps[node.uid] = [(0,-1)]*time_window
            heapq.heapify(lastm_heaps[node.uid])
        """    
    elif method=="random":
        items_pop = get_items_dup_array(netdata, interact_type)
        print "Generated items popularity dict"
    elif method=="influence":
        # we need friends_dict for only those that appear in the test set
        # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user), 
        # since that is what is compared in the suscept test.
        incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes)
    
        lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes,
                incoming_friends_dict, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user,
                time_window)
        """
        for node in netdata.get_nodes_iterable(should_have_interactions=True):
            lastm_heaps[node.uid] = [(0,-1)]*time_window
            heapq.heapify(lastm_heaps[node.uid])
        """
        print len(lastm_heaps), "yoyo"
        #print(incoming_friends_dict)
    else:
        print "Invalid fake prefs method"; sys.exit(1)

    for node, item_id, timestamp, rating in interactions_stream:
        if method == "random":
            new_item_id = select_item_dueto_random(items_pop)#.randint(1, netdata.total_num_items)
        elif method == "influence":
            new_item_id = select_item_from_neighbors(netdata, lastm_heaps,
                    node, timestamp, time_window, incoming_friends_dict[node.uid])
        elif method == "homophily":
            new_item_id = select_item_from_neighbors(netdata, lastm_heaps, 
                    node, timestamp, time_window, globalk_incoming_dict[node.uid])
        else:
            print "Invalid fake prefs method"; sys.exit(1)
        #print node.uid, new_item_id, timestamp
        ret = node.change_interacted_item(interact_type, item_id, new_item_id, timestamp)
        if ret == -1: 
            print node.get_interactions(interact_type)
            print "Cannot find in the above array", item_id
            print "Big Error: Item_id not found in interactions list."; sys.exit(1)
        counter += 1
        if counter % 100000 == 0:
            print "Done faked", counter
    if method=="influence":
        print "In influence: number of interactions that were not generated by influence:", num_not_influence
    elif method=="homophily":
        print "In homophily: friends share, and nonfriends share of fake data:", friends_share, nonfriends_share
        print "In homophily: number of interactions that were not generated by influence:", num_not_homophily
    print "Fake data generated"