Пример #1
0
def compute_indexes(G: nx.Graph, method, negative, positive):
    if method == 'resource_allocation':
        return nx.resource_allocation_index(
            G, negative), nx.resource_allocation_index(G, positive)
    elif method == 'jaccard_coefficient':
        return nx.jaccard_coefficient(G, negative), nx.jaccard_coefficient(
            G, positive)
    elif method == 'adamic_adar':
        return nx.adamic_adar_index(G, negative), nx.adamic_adar_index(
            G, positive)
    elif method == 'preferential_attachment':
        return nx.preferential_attachment(
            G, negative), nx.preferential_attachment(G, positive)
    elif method == 'sorensen_neighbours':
        return ([(u, v, sorensen_index(G, u, v)) for u, v in negative],
                [(u, v, sorensen_index(G, u, v)) for u, v in positive])
    elif method == 'community':
        c = louvain(G)
        commLabels = c.communities
        comms = c.to_node_community_map()
        return ([(u, v, community_index(G, u, v, commLabels, comms))
                 for u, v in negative],
                [(u, v, community_index(G, u, v, commLabels, comms))
                 for u, v in positive])
    else:
        raise NameError('The given method is not supported')
Пример #2
0
def get_link_pred_auc(graph, pos_test, neg_test):

    jc_pos_test_pred = nx.jaccard_coefficient(graph, pos_test)
    jc_neg_test_pred = nx.jaccard_coefficient(graph, neg_test)

    jc_pos_score = [p for _, _, p in jc_pos_test_pred]
    jc_neg_score = [n for _, _, n in jc_neg_test_pred]

    jc_all_labels = [1] * len(jc_pos_score) + [0] * len(jc_neg_score)
    jc_all_scores = jc_pos_score + jc_neg_score

    jc_auc = metrics.roc_auc_score(jc_all_labels, jc_all_scores)

    aa_pos_test_pred = nx.resource_allocation_index(graph, pos_test)
    aa_neg_test_pred = nx.resource_allocation_index(graph, neg_test)

    aa_pos_score = [p for _, _, p in aa_pos_test_pred]
    aa_neg_score = [n for _, _, n in aa_neg_test_pred]

    aa_all_labels = [1] * len(aa_pos_score) + [0] * len(aa_neg_score)
    aa_all_scores = aa_pos_score + aa_neg_score

    aa_auc = metrics.roc_auc_score(aa_all_labels, aa_all_scores)

    return jc_auc, aa_auc
Пример #3
0
def get_feature(nonedge, G, df):
    #=========common_neigh===========
    common_neigh = compute_common_neigh(G, nonedge)
    v = get_list_value(common_neigh)
    df['common_neigh'] = v
    print_output(common_neigh)

    #=========jaccard_coefficient===========

    jaccard = list(nx.jaccard_coefficient(G, nonedge))
    v = get_list_value(jaccard)
    df['jaccard'] = v
    print_output(jaccard)

    resource_alloc = list(nx.resource_allocation_index(G, nonedge))
    v = get_list_value(resource_alloc)
    df['resource_alloc'] = v
    print_output(resource_alloc)

    adamic_adar = list(nx.adamic_adar_index(G, nonedge))
    v = get_list_value(adamic_adar)
    df['adamic_adar'] = v
    print_output(adamic_adar)

    pref_attach = list(nx.preferential_attachment(G, nonedge))
    v = get_list_value(pref_attach)
    df['pref_attach'] = v
    print_output(pref_attach)
Пример #4
0
def Link_Precision(Graph):
    """Step1------构造网络"""
    G = Graph

    # plt.figure(1)
    # plt.subplot(211)
    # pos = nx.circular_layout(G)
    # nx.draw_networkx(G, pos, with_labels=True)

    # 对构造的网络删除部分连边
    test_set, non_edges_set = delete_link(G)

    # plt.subplot(212)
    # pos = nx.circular_layout(G)
    # nx.draw_networkx(G, pos, with_labels=True)
    # plt.show()
    """Step2------对网络缺失边预测得分值"""
    # preds = nx.adamic_adar_index(G)
    # preds = nx.common_neighbors(G)
    # preds = nx.jaccard_coefficient(G)
    preds = nx.resource_allocation_index(G)
    preds = sorted(preds, key=itemgetter(2), reverse=True)  # 根据连边的预测得分值降序排序
    """Step3------AUC,Precision指标的计算"""
    auc_result = AUC_Indx(preds, test_set, non_edges_set)
    precision_result = Precision_Index(preds, test_set)
    """输出结果"""
    print "AUC:", auc_result, "Precision:", precision_result
    return auc_result, precision_result
Пример #5
0
def feature_calculate(g, data, column_names, save_to):
    pairs = list(map(lambda x: (x[0], x[1]), data))
    jaccard = nx.jaccard_coefficient(g, pairs)
    preferential = nx.preferential_attachment(g, pairs)
    rai = nx.resource_allocation_index(g, pairs)

    # shortest path
    total = len(data)
    current = 0
    for row_data in zip(data, jaccard, preferential, rai):
        row = row_data[0]
        try:
            thisjaccard = row_data[1][2]
        except:
            thisjaccard = -1

        try:
            thispreferential = row_data[2][2]
        except:
            thispreferential = -1

        try:
            thisrai = row_data[3][2]
        except:
            thisrai = -1

        # pred = row_data[1]
        # resource_allocation_index = pred[2]
        if current % 1000 == 0:
            ut.log("calculating {}/{}...".format(current, total))
        path_length = 99999
        try:
            path = nx.shortest_path(g, row[0], row[1], 'weight')
            path_length = len(path)
        except:
            pass

        # shortest path
        row.insert(-1, path_length)

        # jaccard
        row.insert(-1, thisjaccard)

        # preferential
        row.insert(-1, thispreferential)

        # rai
        row.insert(-1, thisrai)

        current += 1

    original_columns_titles = list(column_names)
    original_columns_titles.insert(-1, "shortest_path_count")
    original_columns_titles.insert(-1, "jaccard")
    original_columns_titles.insert(-1, "preferential")
    original_columns_titles.insert(-1, "rai")

    data.insert(0, original_columns_titles)

    ut.write_list_csv(save_to, data)
Пример #6
0
def sort_edges_by_resource_allocation(graph, edges):
    edges_sorted = sorted(list(nx.resource_allocation_index(graph, edges)),
                          key=lambda l: l[2],
                          reverse=True,
                          cmp=compare_with_ties)
    return [(row[0], row[1])
            for row in edges_sorted], [row[2] for row in edges_sorted]
def link_prediction_with_metrics(subgraph, tuples, df):
    jaccard_coefficient_list = list(nx.jaccard_coefficient(subgraph, tuples))
    y_test = create_test_data(jaccard_coefficient_list)
    print(
        f"ROC AUC Score with Jaccard Coefficient: {roc_auc_score(df['link'], y_test)}\n"
        f"Average Precision with Jaccard Coefficient: {average_precision_score(df['link'], y_test)}"
    )

    adamic_adar_list = list(nx.adamic_adar_index(subgraph, tuples))
    y_test = create_test_data(adamic_adar_list)
    print(
        f"ROC AUC Score with Adamic Adar Index: {roc_auc_score(df['link'], y_test)}\n"
        f"Average Precision with Adamic Adar Index: {average_precision_score(df['link'], y_test)}"
    )

    preferential_attachment_list = list(
        nx.preferential_attachment(subgraph, tuples))
    y_test = create_test_data(preferential_attachment_list)
    print(
        f"ROC AUC Score with Preferential Attachment: {roc_auc_score(df['link'], y_test)}\n"
        f"Average Precision with Preferential Attachment: {average_precision_score(df['link'], y_test)}"
    )

    resource_allocation_list = list(
        nx.resource_allocation_index(subgraph, tuples))
    y_test = create_test_data(resource_allocation_list)
    print(
        f"ROC AUC Score with Resource Allocation Index: {roc_auc_score(df['link'], y_test)}\n"
        f"Average Precision with Resource Allocation Index: {average_precision_score(df['link'], y_test)}"
    )
Пример #8
0
def new_connections_predictions():
    df = future_connections
    df['jaccard_coefficient'] = [
        x[2] for x in nx.jaccard_coefficient(G, df.index)
    ]
    df['resource_allocation_index'] = [
        x[2] for x in nx.resource_allocation_index(G, df.index)
    ]
    df['preferential_attachment'] = [
        x[2] for x in nx.preferential_attachment(G, df.index)
    ]
    df['common_neighbors'] = df.index.map(
        lambda ind: len(list(nx.common_neighbors(G, ind[0], ind[1]))))
    print('.......we have extracted all the features......')
    df_train = df[~pd.isnull(df['Future Connection'])]
    df_test = df[pd.isnull(df['Future Connection'])]
    features = [
        'jaccard_coefficient', 'resource_allocation_index',
        'preferential_attachment', 'common_neighbors'
    ]
    X_train = df_train[features]
    Y_train = df_train['Future Connection']
    X_test = df_test[features]
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    clf = LogisticRegression(solver='liblinear', random_state=14)
    clf.fit(X_train_scaled, Y_train)
    predictions = np.round(clf.predict_proba(X_test_scaled)[:, 1], 2)
    results = pd.Series(data=predictions, index=X_test.index)
    results = results.sort_values(ascending=False)
    return results


# print (new_connections_predictions())
    def extract_features(self, prediction_set=None):
        edge_features = defaultdict(dict)
        print '{0} | extract_features: res_alloc'.format(str(datetime.now()))
        res_alloc = nx.resource_allocation_index(self.G, ebunch=prediction_set)
        self.append_features(edge_features,
                             feature_name='res_alloc',
                             feature_list=res_alloc)

        print '{0} | extract_features: jaccard_coef'.format(str(
            datetime.now()))
        jaccard_coef = nx.jaccard_coefficient(self.G, ebunch=prediction_set)
        self.append_features(edge_features,
                             feature_name='jaccard_coef',
                             feature_list=jaccard_coef)

        print '{0} | extract_features: adamic_adar'.format(str(datetime.now()))
        adamic_adar = nx.adamic_adar_index(self.G, ebunch=prediction_set)
        self.append_features(edge_features,
                             feature_name='adamic_adar',
                             feature_list=adamic_adar)

        print '{0} | extract_features: pref_attachment'.format(
            str(datetime.now()))
        pref_attachment = nx.preferential_attachment(self.G,
                                                     ebunch=prediction_set)
        self.append_features(edge_features,
                             feature_name='pref_attachment',
                             feature_list=pref_attachment)

        # reformat feature dictionary to a dataframe object
        df, feature_names = self.feature_dict_to_df(edge_features)

        return df, feature_names
Пример #10
0
def SimilarityMeasures(G):

    # resource_allocation_index
    preds = nx.resource_allocation_index(G, [(1, 2), (3, 4), (1, 4), (5, 6),
                                             (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')

    # Common neighours
    print(sorted(nx.common_neighbors(G, 1, 2)))
    print('****************************')

    # jaccard coefficient
    preds = nx.jaccard_coefficient(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')

    # AdamicAdar
    preds = nx.adamic_adar_index(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')

    # Preferential Attachment (PA),
    preds = nx.preferential_attachment(G, [(1, 2), (3, 4), (1, 4), (5, 6),
                                           (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')
def L_P_RA(network):
    num_add = 0  # the number of egdes to be added
    nodes_pair_without_edge = []  # the pairs of nodes without edges
    probability_add = []  # the probabilities of the pairs of nodes to be added
    score = 0.0  # the score of each pair of nodes in link prediction model
    total_score_without_edge = 0.0  # the sum of scores of pairs of nodes without edge

    #  calculate the score of each pair of nodes
    for i, elei in enumerate(list(network.nodes(), 1)):
        for j, elej in enumerate(list(network.nodes(), 1)):

            if i >= j:
                continue
            if not network.has_edge(elei, elej):
                try:
                    pre = nx.resource_allocation_index(network, [(elei, elej)])
                    for u, v, s in pre:
                        score = s
                except:
                    continue
                total_score_without_edge += score
                nodes_pair_without_edge.append((elei, elej, score))

    for a, b, c in nodes_pair_without_edge:
        probability_add.append(
            c / total_score_without_edge
        )  # calculate the probabilities of edges to be added
    # select edges to be added according to probabilities
    edges_add = calculate_param.prob_select_distinct(nodes_pair_without_edge,
                                                     probability_add, num_add)
    for a, b, c in edges_add:
        network.add_edge(a, b)  # add selected edges

    return True
def new_connections_predictions():
    future_connections['preferential_attachment'] = [
        i[2] for i in nx.preferential_attachment(G, future_connections.index)
    ]
    future_connections['Common Neighbors'] = future_connections.index.map(
        lambda x: len(list(nx.common_neighbors(G, x[0], x[1]))))
    future_connections['resource_allocation'] = [
        i[2] for i in nx.resource_allocation_index(G, future_connections.index)
    ]
    future_connections['jaccard'] = [
        i[2] for i in nx.jaccard_coefficient(G, future_connections.index)
    ]
    final_test = future_connections[
        future_connections['Future Connection'].isnull() == True]
    train = future_connections.dropna()
    final_test.drop(['Future Connection'], axis=1, inplace=True)
    X = train.drop(['Future Connection'], axis=1)
    y = train['Future Connection']
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import roc_auc_score
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=3)
    clf = RandomForestClassifier(n_estimators=100,
                                 max_depth=5,
                                 max_features=None,
                                 random_state=0)
    ran = clf.fit(X, y)
    pred = ran.predict_proba(final_test)
    pred1 = [i[1] for i in pred]
    final_test['pred'] = pred1

    return final_test['pred']
Пример #13
0
def get_test_features():
    features = []
    count = 0
    print("Generating test data features......")
    for temp_data in test_edges:
      if (count % 100 == 0):
        print(count)
      count += 1
      feature = []
      try:
        preds = nx.resource_allocation_index(G, [temp_data])
        for u, v, p in preds:
          feature.append(p)
        
        preds = nx.jaccard_coefficient(G, [temp_data])
        for u, v, p in preds:
          feature.append(p)
          
        
        
      except:
        print("one error at: "+str(count))
        pass
      
      features.append(feature)
    print("positive features: "+str(len(features)))
    return features
Пример #14
0
def link_scores(graph, all_dfs, labels, g_undirected):
    lst = []
    lst2 = []
    predictions1 = nx.preferential_attachment(g_undirected,
                                              g_undirected.edges())

    [lst.append((u, v, p)) for u, v, p in predictions1]
    predictions1 = {(k, v): n for k, v, n in lst}

    all_dfs['Preferential_Attachment'] = all_dfs.apply(
        lambda x: map_predictions_to_df(predictions1, x), axis=1)

    predictions3 = nx.resource_allocation_index(g_undirected,
                                                g_undirected.edges())

    try:
        [lst2.append((u, v, p)) for u, v, p in predictions3]
        predictions3 = {(k, v): n for k, v, n in lst2}

        all_dfs['Resource_allocation'] = all_dfs.apply(
            lambda x: map_predictions_to_df(predictions3, x), axis=1)

    except ZeroDivisionError:
        print("ZeroDivisionError: float division by zero")

    return all_dfs
def extract_network_based(data):
    print("Extracting Network Based Fearure...")
    tid1 = data['tid1'].values
    tid2 = data['tid2'].values
    num_nodes = np.max((tid1.max(), tid2.max())) + 1

    G = nx.Graph()
    G.add_nodes_from(range(num_nodes))
    for u, v in zip(np.concatenate([tid1, tid2]), np.concatenate([tid2,
                                                                  tid1])):
        G.add_edge(u, v)

    preds = nx.resource_allocation_index(G, list(zip(tid1, tid2)))
    data['nx1'] = [p for (u, v, p) in preds]
    preds = nx.jaccard_coefficient(G, list(zip(tid1, tid2)))
    data['nx2'] = [p for (u, v, p) in preds]
    preds = nx.preferential_attachment(G, list(zip(tid1, tid2)))
    data['nx3'] = [p for (u, v, p) in preds]

    G.add_node(num_nodes)
    for i in range(num_nodes):
        G.add_edge(i, num_nodes)
        G.add_edge(num_nodes, i)
    preds = nx.adamic_adar_index(G, list(zip(tid1, tid2)))
    data['nx4'] = [p for (u, v, p) in preds]
    return data
def prepare_data_ft():
    df = future_connections
    # Triadic Measurements return (NodeA, NodeB, coef). use i[2] to access the coef
    df['AAI'] = [i[2] for i in nx.adamic_adar_index(G, df.index)]
    df['JCI'] = [i[2] for i in nx.jaccard_coefficient(G, df.index)]
    df['RA'] = [i[2] for i in nx.resource_allocation_index(G, df.index)]
    df['PA'] = [i[2] for i in nx.preferential_attachment(G, df.index)]
    return df
def resource_allocation(G, feat, ledge):
    feat['R_Allocation'] = 0.0
    for i in feat.index.values:
        if (G.has_node(feat['Node_1'][i]) and G.has_node(feat['Node_2'][i])):
            values = nx.resource_allocation_index(
                G, [(feat['Node_1'][i], feat['Node_2'][i])])
            for v in values:
                feat['R_Allocation'][i] = v[2]
Пример #18
0
def new_connections_predictions():

    # Your Code Here
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.metrics import roc_auc_score
    for node in G.nodes():
        G.node[node]['community'] = G.node[node]['Department']
    preferential_attachment = list(nx.preferential_attachment(G))
    df = pd.DataFrame(index=[(x[0], x[1]) for x in preferential_attachment])
    df['preferential_attachment'] = [x[2] for x in preferential_attachment]
    cn_soundarajan_hopcroft = list(nx.cn_soundarajan_hopcroft(G))
    df_cn_soundarajan_hopcroft = pd.DataFrame(
        index=[(x[0], x[1]) for x in cn_soundarajan_hopcroft])
    df_cn_soundarajan_hopcroft['cn_soundarajan_hopcroft'] = [
        x[2] for x in cn_soundarajan_hopcroft
    ]
    df = df.join(df_cn_soundarajan_hopcroft, how='outer')
    df['cn_soundarajan_hopcroft'] = df['cn_soundarajan_hopcroft'].fillna(
        value=0)
    df['resource_allocation_index'] = [
        x[2] for x in list(nx.resource_allocation_index(G))
    ]
    df['jaccard_coefficient'] = [x[2] for x in list(nx.jaccard_coefficient(G))]
    df = future_connections.join(df, how='outer')
    df_train = df[~pd.isnull(df['Future Connection'])]
    df_test = df[pd.isnull(df['Future Connection'])]
    features = [
        'cn_soundarajan_hopcroft', 'preferential_attachment',
        'resource_allocation_index', 'jaccard_coefficient'
    ]
    df_test = df_test[features]
    X_train, X_test, y_train, y_test = train_test_split(
        df_train[features],
        df_train['Future Connection'],
        random_state=0,
        test_size=0.5)
    clf_RF = RandomForestClassifier(max_features=3,
                                    random_state=0,
                                    max_depth=3,
                                    min_samples_leaf=3,
                                    criterion='entropy')
    clf_RF.fit(X_train, y_train)
    clf_GDBT = GradientBoostingClassifier(learning_rate=0.01,
                                          max_depth=8,
                                          random_state=0,
                                          n_estimators=30)
    clf_GDBT.fit(X_train, y_train)
    roc_score_forest = roc_auc_score(y_test,
                                     clf_RF.predict_proba(X_test)[:, 1])
    roc_score = roc_auc_score(y_test, clf_GDBT.predict_proba(X_test)[:, 1])
    print(roc_score_forest)
    print(roc_score)
    #test_proba = clf_RF.predict_proba(X_test)[:, 1]
    preds = pd.Series(data=clf_GDBT.predict_proba(df_test)[:, 1],
                      index=df_test.index)

    return preds  # Your Answer Here
Пример #19
0
def new_connections_predictions():
    pref_attach = list(nx.preferential_attachment(G))
    df = pd.DataFrame(index=[(x[0], x[1]) for x in pref_attach])
    df['pref_attch'] = [x[2] for x in pref_attach]

    common_neigh = [(e[0], e[1], len(list(nx.common_neighbors(G, e[0], e[1]))))
                    for e in nx.non_edges(G)]
    df1 = pd.DataFrame(index=[(x[0], x[1]) for x in common_neigh])
    df1['common_neigh'] = [x[2] for x in common_neigh]
    df = df.join(df1, how='outer')
    df['common_neigh'] = df['common_neigh'].fillna(value=0)
    del df1

    community_common_neigh = list(
        nx.cn_soundarajan_hopcroft(G, community='Department'))
    df1 = pd.DataFrame(index=[(x[0], x[1]) for x in community_common_neigh])
    df1['community_common_neigh'] = [x[2] for x in community_common_neigh]
    df = df.join(df1, how='outer')
    df['community_common_neigh'] = df['community_common_neigh'].fillna(value=0)
    del df1

    community_res_alloc = list(
        nx.ra_index_soundarajan_hopcroft(G, community='Department'))
    df1 = pd.DataFrame(index=[(x[0], x[1]) for x in community_res_alloc])
    df1['community_res_alloc'] = [x[2] for x in community_res_alloc]
    df = df.join(df1, how='outer')
    df['community_res_alloc'] = df['community_res_alloc'].fillna(value=0)
    del df1

    df['res_alloc'] = [x[2] for x in list(nx.resource_allocation_index(G))]
    df['jaccard_coeff'] = [x[2] for x in list(nx.jaccard_coefficient(G))]

    features = [
        'jaccard_coeff', 'res_alloc', 'pref_attch', 'common_neigh',
        'community_common_neigh', 'community_res_alloc'
    ]

    df = future_connections.join(df, how='outer')
    df_train = df[~pd.isnull(df['Future Connection'])]
    df_test = df[pd.isnull(df['Future Connection'])]

    X_train = df_train[features]
    X_test = df_test[features]
    y_train = df_train['Future Connection']

    scalar = MinMaxScaler()
    X_train_scaled = scalar.fit_transform(X_train)
    X_test_scaled = scalar.fit_transform(X_test)

    clf = RandomForestClassifier(n_estimators=100,
                                 n_jobs=-1,
                                 max_depth=10,
                                 random_state=0).fit(X_train_scaled, y_train)
    test_proba = clf.predict_proba(X_test_scaled)[:, 1]
    predictions = pd.Series(test_proba, X_test.index)
    # target = future_connections[pd.isnull(future_connections['Future Connection'])]
    # target['proba'] = [predictions[x] for x in target.index]
    return predictions
Пример #20
0
def new_connections_predictions():

    # Your Code Here
    for n in G.nodes():
        G.node[n]['community'] = G.node[n]['Department']
    #df = pd.DataFrame(index=[(x[0], x[1]) for x in list(nx.preferential_attachment(G))])
    future_connections['common_neighbors'] = [
        len(list(nx.common_neighbors(G, x[0], x[1])))
        for x in future_connections.index
    ]
    future_connections['jaccard_coefficient'] = [
        list(nx.jaccard_coefficient(G, [x]))[0][2]
        for x in future_connections.index
    ]
    future_connections['resource_allocation_index'] = [
        list(nx.resource_allocation_index(G, [x]))[0][2]
        for x in future_connections.index
    ]
    future_connections['adamic_adar_index'] = [
        list(nx.adamic_adar_index(G, [x]))[0][2]
        for x in future_connections.index
    ]
    future_connections['preferential_attachment'] = [
        list(nx.preferential_attachment(G, [x]))[0][2]
        for x in future_connections.index
    ]
    future_connections['cn_soundarajan_hopcroft'] = [
        list(nx.cn_soundarajan_hopcroft(G, [x]))[0][2]
        for x in future_connections.index
    ]
    #future_connections['ra_soundarajan_hopcroft'] = [list(nx.ra_soundarajan_hopcroft(G, [x]))[0][2] for x in future_connections.index]
    future_connections['cn_soundarajan_hopcroft'] = future_connections[
        'cn_soundarajan_hopcroft'].fillna(value=0)
    #future_connections['ra_soundarajan_hopcroft'] = df['cn_soundarajan_hopcroft'].fillna(value=0)
    #future_connections.join(df,how='outer')

    features = [
        'jaccard_coefficient', 'resource_allocation_index',
        'adamic_adar_index', 'preferential_attachment',
        'cn_soundarajan_hopcroft'
    ]

    X_train = future_connections.loc[
        ~pd.isnull(future_connections['Future Connection']), features]
    y_train = future_connections.loc[
        ~pd.isnull(future_connections['Future Connection']),
        ['Future Connection']]
    X_test = future_connections.loc[(
        pd.isnull(future_connections['Future Connection'])), features]

    classifier = MLPClassifier(hidden_layer_sizes=[10, 5],
                               solver='lbfgs',
                               alpha=10)
    classifier.fit(X_train, y_train)

    y_predicted = classifier.predict_proba(X_test)[:, 1]

    return pd.Series(y_predicted, X_test.index)  # Your Answer Here
Пример #21
0
def resource_allocation_index(graph, train_set, test_set):
    if not os.path.isfile("./data/resource_allocation_training.csv"):
        print("Computing training resource-allocation index")

        t0 = clock()
        results = []

        for i in range(0, len(train_set)):
            preds = nx.resource_allocation_index(graph, [(train_set[i][0], train_set[i][1])])
            results.append([float(p) for u, v, p in preds])
            if i % 5000 == 0:
                print(i)

        print("Some elements of results:", results[0:5])

        with open("./data/resource_allocation_training.csv", 'w') as file:
            csv_out = csv.writer(file)
            for i in range(0, len(train_set)):
                csv_out.writerow(results[i])
                if i % 100000 == 0:
                    print(i)

        print(clock() - t0)

    if not os.path.isfile("./data/resource_allocation_testing.csv"):
        print("Computing testing resource-allocation index")
        t0 = clock()
        results = []

        for i in range(0, len(test_set)):
            preds = nx.resource_allocation_index(graph, [(test_set[i][0], test_set[i][1])])
            results.append([float(p) for u, v, p in preds])
            if i % 5000 == 0:
                print(i)

        print("Some elements of results:", results[0:5])

        with open("./data/resource_allocation_testing.csv", 'w') as file:
            csv_out = csv.writer(file)
            for i in range(0, len(test_set)):
                csv_out.writerow(results[i])
                if i % 100000 == 0:
                    print(i)

        print(clock() - t0)
Пример #22
0
 def edge_feature(self, e):
     return numpy.array([
                         math.sqrt(float(len(set(self.G[e[0]]).intersection(self.G[e[1]])))), \
                         float(abs(nx.clustering(self.G, e[0]) - nx.clustering(self.G, e[1]))), \
                         float(list(nx.jaccard_coefficient(self.G, [(e[0], e[1])]))[0][2]), \
                         float(list(nx.resource_allocation_index(self.G, [(e[0], e[1])]))[0][2]),\
                         float(min(len(self.G[e[0]]), len(self.G[e[1]]))) / float(max(len(self.G[e[0]]), len(self.G[e[1]]))), \
                         1.0
                        ])
    def predict(self, node_pairs):
        predictions = resource_allocation_index(self.graph, node_pairs)
        return list(predictions)

        def __repr__(self):
            return self.__str__()

        def __str__(self):
            return 'ResourceAllocation'
Пример #24
0
def new_connections_predictions():

    pref_atch = list(nx.preferential_attachment(G))
    new_df = pd.DataFrame(index=[(x[0], x[1]) for x in pref_atch])
    new_df["PrefrentialAttachment"] = [x[2] for x in pref_atch]

    cn_soundarajan_hopcroft = list(
        nx.cn_soundarajan_hopcroft(G, community="Department"))
    df_cn_soundarajan_hopcroft = pd.DataFrame(
        index=[(x[0], x[1]) for x in cn_soundarajan_hopcroft])
    df_cn_soundarajan_hopcroft['CommunityCommonNeighbor'] = [
        x[2] for x in cn_soundarajan_hopcroft
    ]
    new_df = new_df.join(df_cn_soundarajan_hopcroft, how='outer')
    new_df['CommunityCommonNeighbor'] = new_df[
        'CommunityCommonNeighbor'].fillna(value=0)

    res_alo = list(nx.resource_allocation_index(G))
    df_res_alo = pd.DataFrame(index=[(x[0], x[1]) for x in res_alo])
    df_res_alo["ResourceAllocationIndex"] = [x[2] for x in res_alo]
    new_df = new_df.join(df_res_alo, how='outer')
    new_df['ResourceAllocationIndex'] = new_df[
        'ResourceAllocationIndex'].fillna(value=0)

    jac_coef = list(nx.jaccard_coefficient(G))
    df_jac_coef = pd.DataFrame(index=[(x[0], x[1]) for x in jac_coef])
    df_jac_coef["JaccardCoeffiecient"] = [x[2] for x in jac_coef]
    new_df = new_df.join(df_jac_coef, how='outer')
    new_df['JaccardCoeffiecient'] = new_df['JaccardCoeffiecient'].fillna(
        value=0)

    new_df = new_df.join(future_connections, how='outer')
    train_df = new_df[~new_df["Future Connection"].isnull()]
    test_df = new_df[new_df["Future Connection"].isnull()]

    features = [
        "PrefrentialAttachment", "CommunityCommonNeighbor",
        "ResourceAllocationIndex", "JaccardCoeffiecient"
    ]
    X_train = train_df[features]
    X_test = test_df[features]
    y_train = train_df["Future Connection"]

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    clf = MLPClassifier(hidden_layer_sizes=[10, 5],
                        alpha=5,
                        random_state=0,
                        solver='lbfgs',
                        verbose=0)
    clf.fit(X_train_scaled, y_train)
    rslt = clf.predict_proba(X_test_scaled)[:, 1]
    final_rslt = pd.Series(rslt, index=X_test.index)

    return final_rslt
Пример #25
0
def resourceAllocation(G, X):
    resource_allocation = []
    for i in range(X.shape[0]):
        try:
            coef = [[u, v, p] for u, v, p in nx.resource_allocation_index(
                G, [(X[i][0], X[i][1])])][0]
            resource_allocation.append(coef[2])
        except:
            resource_allocation.append(0)
    return resource_allocation
Пример #26
0
    def resourceAllocationIndex(self, buyerPairs):
        # Sort the allocatio indexes in ascending order.
        allocation = nx.resource_allocation_index(self.networkG, buyerPairs)
        allocation = [entry for entry in allocation if entry[2] > 1.5]
        allocation = list(set(allocation))

        #Print allocation indexes
        for entry in allocation:
            u, v, p = entry
            print('Node pair: [', u, ']', '[', v, '] -> ', 'Index = ', p)
Пример #27
0
def calRA(nodeA, nodeB):
    if (UDG.has_node(nodeA) and UDG.has_node(nodeB)):
        try:
            AA = nx.resource_allocation_index(UDG, [(nodeA, nodeB)])
            for u, v, p in AA:
                return p
        except ZeroDivisionError:
            return 0
    else:
        return 0
Пример #28
0
def generate_algo(graph, X):
    res_alloc_index = np.asarray(list(nx.resource_allocation_index(graph,
                                                                   X)))[:, 2]
    jac_coef = np.asarray(list(nx.jaccard_coefficient(graph, X)))[:, 2]
    ad_adar_idx = np.asarray(list(nx.adamic_adar_index(graph, X)))[:, 2]
    pref_att = np.asarray(list(nx.preferential_attachment(graph, X)))[:, 2]
    #cn_sound_hop=list(nx.cn_soundarajan_hopcroft(graph, X))
    #ra_sound_hop  =list( nx.ra_index_soundarajan_hopcroft(graph, X))
    #within = list(nx.within_inter_cluster(graph, X))
    return list(res_alloc_index), list(jac_coef), list(ad_adar_idx), list(
        pref_att)
Пример #29
0
    def resourceAllocationIndex(self, buyerPairs):
        # Sort the allocatio indexes in ascending order.
        print('Computing Similarity Scors using Resource Allocation Index ...')        
        allocation = nx.resource_allocation_index(self.networkG, buyerPairs)
        allocation = [entry for entry in allocation if entry[2] > self.threshold]
        allocation = list(set(allocation))

        print('Computation Done!')
        print('\n')

        return(list(set(allocation)))
def prob_in_net(n, m, graphs_test, train_edges_0, test_edges_0, matrixes,
                M_test):

    feature_train = []
    feature_test = []
    feature_train_1 = []
    feature_test_1 = []
    pred_prob = []
    pred = []
    roc_auc = []
    roc_avg = 0
    truth_test = []
    for i in range(k_net):
        #jaccard_coefficient
        feature_train.append(
            nx.resource_allocation_index(graphs_test[i], train_edges_0))
        feature_test.append(
            nx.resource_allocation_index(graphs_test[i], test_edges_0))

        A2 = M_test[i] * M_test[i]
        A3 = A2 * M_test[i]
        Lp_matrix = A2 + (0.001 * A3)
        (features_train,
         truth_train) = similarity_Features(Lp_matrix, matrixes[i],
                                            train_edges_0)
        (features_test, truth) = similarity_Features(Lp_matrix, matrixes[i],
                                                     test_edges_0)
        truth_test.append(truth)

        LR = LogisticRegression(class_weight='balanced')
        LR.fit(features_train, truth_train)
        pred.append(LR.predict(features_test))
        x = LR.predict_proba(features_test)[:, 1]
        pred_prob.append(x)
        fpr, tpr, thrshold = metrics.roc_curve(truth, pred[i])
        roc = metrics.auc(fpr, tpr)
        roc_auc.append(roc)
        print('Net', i, roc)
        roc_avg += roc
    roc_auc.append(roc_avg / k_net)
    return (roc_auc, matrix, pred_prob, truth_test)
Пример #31
0
def sort_edges_by_resource_allocation(graph, edges):
    edges_sorted = sorted(list(nx.resource_allocation_index(
        graph, edges)), key=lambda l: l[2], reverse=True, cmp=compare_with_ties)
    return [(row[0], row[1]) for row in edges_sorted], [row[2] for row in edges_sorted]
Пример #32
0
def link_prediction(G, query_nodes, target_nodes, n_edges, start_dist, alg = "ra"):
    """Selects a random set of links between based on the scores calculated by 
    a standard link-prediction algorithm from networkx library
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    alg: string
        A string describing the link-prediction algorithm to be used
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    ac_scores: list
        The set of scores of adding the links
    """
    assert alg in ["ra", "pa", "jaccard", "aa"], "alg must be one of [\"ra\", \"pa\", \"jaccard\", \"aa\"]."
          
    H = G.copy()
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum())[0,0]
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    links_to_add = []
    if alg == 'ra':
        preds = nx.resource_allocation_index(H, eligible)
    elif alg == 'jaccard':
        preds = nx.jaccard_coefficient(H, eligible)
    elif alg == 'aa':
        preds = nx.adamic_adar_index(H, eligible)
    elif alg == 'pa':
        preds = nx.preferential_attachment(H, eligible)
        
    for u,v,p in preds:
        links_to_add.append((u,v,p))
    links_to_add.sort(key=lambda x: x[2], reverse = True)
    
    ac_scores = []
    ac_scores.append(row_sums)
    i = 0
    while i < n_edges:
        F_updated = update_fundamental_mat(F, H, map_query_to_org, links_to_add[i][0])
        H.add_edge(links_to_add[i][0], links_to_add[i][1])
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        F = F_updated            
        ac_scores.append(abs_cen)
        i += 1
    return links_to_add, ac_scores