Пример #1
0
def expLP(digraph,
          graph_embedding,
          n_sample_nodes,
          rounds,
          res_pre,
          m_summ,
          train_ratio=0.8,
          no_python=False,
          is_undirected=True):
    print('\tLink Prediction')
    summ_file = open('%s_%s.lpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    MAP = [None] * rounds
    prec_curv = [None] * rounds
    for round_id in range(rounds):
        MAP[round_id], prec_curv[round_id] = \
            evaluateStaticLinkPrediction(digraph, graph_embedding,
                                         train_ratio=train_ratio,
                                         n_sample_nodes=1024,
                                         no_python=no_python,
                                         is_undirected=is_undirected)
    summ_file.write(
        '\t%f/%f\t%s\n' %
        (np.mean(MAP), np.std(MAP),
         metrics.getPrecisionReport(prec_curv[0], len(prec_curv[0]))))
    summ_file.close()
    pickle.dump([MAP, prec_curv], open('%s_%s.lp' % (res_pre, m_summ), 'wb'))
def expGR(digraph, graph_embedding,
          X, n_sampled_nodes, rounds,
          res_pre, m_summ,
          is_undirected=True):
    print('\tGraph Reconstruction')
    summ_file = open('%s_%s.grsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    if len(digraph.nodes) <= n_sampled_nodes:
        rounds = 1
    MAP = [None] * rounds
    prec_curv = [None] * rounds
    err = [None] * rounds
    err_b = [None] * rounds
    n_nodes = [None] * rounds
    n_edges = [None] * rounds
    for round_id in range(rounds):
        sampled_digraph, node_l = graph_util.sample_graph(
            digraph,
            n_sampled_nodes=n_sampled_nodes
        )
        n_nodes[round_id] = len(sampled_digraph.nodes)
        n_edges[round_id] = len(sampled_digraph.edges)
        print('\t\tRound: %d, n_nodes: %d, n_edges:%d\n' % (round_id,
                                                            n_nodes[round_id],
                                                            n_edges[round_id]))
        sampled_X = X[node_l]
        MAP[round_id], prec_curv[round_id], err[round_id], err_b[round_id] = \
            evaluateStaticGraphReconstruction(sampled_digraph, graph_embedding,
                                              sampled_X, node_l,
                                              is_undirected=is_undirected)
    try:
        summ_file.write('Err: %f/%f\n' % (np.mean(err), np.std(err)))
        summ_file.write('Err_b: %f/%f\n' % (np.mean(err_b), np.std(err_b)))
    except TypeError:
        pass
    summ_file.write('%f/%f\t%s\n' % (np.mean(MAP), np.std(MAP),
                                     metrics.getPrecisionReport(prec_curv[0],
                                                                n_edges[0])))
    pickle.dump([n_nodes,
                 n_edges,
                 MAP,
                 prec_curv,
                 err,
                 err_b],
                open('%s_%s.gr' % (res_pre, m_summ), 'wb'))
Пример #3
0
def expGR(digraph,
          graph_embedding,
          X,
          n_sampled_nodes_l,
          rounds,
          res_pre,
          m_summ,
          K=10000,
          is_undirected=True,
          sampling_scheme="u_rand"):
    """This function is used to experiment graph reconstruction.
        Args:
            digraph (Object): directed networkx graph object.
            graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding.
            X (Vector): Embedding of the the nodes of the graph.
            n_sampled_node_l (Int): Number of nodes in the graph.
            rounds (Int): The number of times the graph reconstruction is performed.
            res_pre (Str): Prefix to be used to save the result.
            m_summ (Str): String to denote the name of the summary file. 
            K (Int): The maximum value to be use to get the precision curves.
            sampling_scheme (Str): Sampling schme used to sample nodes to be reconstructed.
            is_undirected (bool): Boolean flag to denote whether the graph is directed or not.
        Returns:
            Numpy Array: Consisting of Mean average precision.
    """
    print('\tGraph Reconstruction')
    summ_file = open('%s_%s_%s.grsumm' % (res_pre, m_summ, sampling_scheme),
                     'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    n_sample_nodes_l = [
        min(int(n), digraph.number_of_nodes()) for n in n_sample_nodes_l
    ]
    if not n_sample_nodes_l:
        n_sample_nodes_l = [node_num]
    MAP = {}
    prec_curv = {}
    err = {}
    err_b = {}
    n_nodes = {}
    n_edges = {}
    # if digraph.number_of_nodes() <= n_sampled_nodes:
    #     rounds = 1
    for n_s in n_sampled_nodes_l:
        n_s = int(n_s)
        MAP[n_s] = [None] * rounds
        prec_curv[n_s] = [None] * rounds
        err[n_s] = [None] * rounds
        err_b[n_s] = [None] * rounds
        n_nodes[n_s] = [None] * rounds
        n_edges[n_s] = [None] * rounds
        for rid in range(rounds):
            if sampling_scheme == "u_rand":
                sampled_digraph, node_l = graph_util.sample_graph(
                    digraph, n_sampled_nodes=n_s)
            else:
                sampled_digraph, node_l = graph_util.sample_graph_rw(
                    digraph, n_sampled_nodes=n_s)
            n_nodes[n_s][rid] = sampled_digraph.number_of_nodes()
            n_edges[n_s][rid] = sampled_digraph.number_of_edges()
            print('\t\tRound: %d/%d, n_nodes: %d, n_edges:%d\n' %
                  (rid, rounds, n_nodes[n_s][rid], n_edges[n_s][rid]))
            sampled_X = X[node_l]
            MAP[n_s][rid], prec_curv[n_s][rid], err[n_s][rid], err_b[n_s][rid] = \
                evaluateStaticGraphReconstruction(sampled_digraph, graph_embedding,
                                                  sampled_X, node_l,
                                                  is_undirected=is_undirected)
            prec_curv[n_s][rid] = prec_curv[n_s][rid][:K]
        summ_file.write('n_s:%d' % n_s)
        try:
            summ_file.write('\tErr: %f/%f\n' %
                            (np.mean(err[n_s]), np.std(err[n_s])))
            summ_file.write('\tErr_b: %f/%f\n' %
                            (np.mean(err_b[n_s]), np.std(err_b[n_s])))
        except TypeError:
            pass
        summ_file.write(
            '\t%f/%f\t%s\n' %
            (np.mean(MAP[n_s]), np.std(MAP[n_s]),
             metrics.getPrecisionReport(prec_curv[n_s][0], n_edges[n_s][0])))
    pickle.dump(
        [n_nodes, n_edges, MAP, prec_curv, err, err_b, n_sampled_nodes_l],
        open('%s_%s_%s.gr' % (res_pre, m_summ, sampling_scheme), 'wb'))
    return MAP[list(MAP.keys())[0]]
Пример #4
0
def expLP(digraph, graph_embedding,
          n_sample_nodes_l, rounds,
          res_pre, m_summ, train_ratio=0.8,
          no_python=True, K=32768,
          is_undirected=True, sampling_scheme="u_rand"):
    print('\tLink Prediction')
    MAP = {}
    prec_curv = {}
    n_sample_nodes_l = [min(int(n), digraph.number_of_nodes()) for n in n_sample_nodes_l]

    # Randomly hide (1-train_ratio)*100% of links
    node_num = digraph.number_of_nodes()
    train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest(
        digraph,
        train_ratio=train_ratio,
        is_undirected=is_undirected
    )

    # Ensure the resulting train subgraph is connected
    if not nx.is_connected(train_digraph.to_undirected()):
        train_digraph = max(
            nx.weakly_connected_component_subgraphs(train_digraph),
            key=len
        )
        tdl_nodes = train_digraph.nodes()
        nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes))))
        train_digraph = nx.relabel_nodes(train_digraph, nodeListMap, copy=True)
        test_digraph = test_digraph.subgraph(tdl_nodes)
        ### unfroze the graph
        test_digraph = nx.Graph(test_digraph)
        ####nx.relabel_nodes(test_digraph, nodeListMap, copy=False)
        test_digraph = nx.relabel_nodes(test_digraph, nodeListMap, copy=True)
        
    pickle.dump(nodeListMap, open('gem/nodeListMap/lp_lcc.pickle', 'wb'))

    t1 = time()
    # learn graph embedding on train subgraph
    print(
        'Link Prediction train graph n_nodes: %d, n_edges: %d' % (
            train_digraph.number_of_nodes(),
            train_digraph.number_of_edges())
    )
    X, _ = graph_embedding.learn_embedding(
        graph=train_digraph,
        no_python=no_python
    )
    if X is not None and X.shape[0] != train_digraph.number_of_nodes():
        pdb.set_trace()
    print('Time taken to learn the embedding: %f sec' % (time() - t1))

    # sample test graph for evaluation and store results
    node_l = None
    if not n_sample_nodes_l:
        n_sample_nodes_l = [node_num]
    summ_file = open('%s_%s_%s.lpsumm' % (res_pre, m_summ, sampling_scheme), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    for n_s in n_sample_nodes_l:
        n_s = int(n_s)
        n_s = min(n_s, train_digraph.number_of_nodes())
        MAP[n_s] = [None] * rounds
        prec_curv[n_s] = [None] * rounds
        for round_id in range(rounds):
            if sampling_scheme == "u_rand":
                train_digraph_s, node_l = graph_util.sample_graph(
                    train_digraph,
                    n_s
                )
            else:
                train_digraph_s, node_l = graph_util.sample_graph_rw(
                    train_digraph,
                    n_s
                )
            if X is not None:
                X_sub = X[node_l]
            else:
                X_sub = None
            test_digraph_s = test_digraph.subgraph(node_l)
            nodeListMap = dict(zip(node_l, range(len(node_l))))
            pickle.dump(nodeListMap, open('gem/nodeListMap/lp_lcc_samp.pickle', 'wb'))
            test_digraph_s = nx.relabel_nodes(test_digraph_s, nodeListMap, copy=True)
            MAP[n_s][round_id], prec_curv[n_s][round_id] = \
                evaluateStaticLinkPrediction(train_digraph_s, test_digraph_s,
                                             graph_embedding, X_sub,
                                             node_l=node_l,
                                             is_undirected=is_undirected)
            prec_curv[n_s][round_id] = prec_curv[n_s][round_id][:K]
        summ_file.write('\tn_s:%d, %f/%f\t%s\n' % (
            n_s,
            np.mean(MAP[n_s]),
            np.std(MAP[n_s]),
            metrics.getPrecisionReport(
                prec_curv[n_s][0],
                len(prec_curv[n_s][0])
            )
        ))
    summ_file.close()
    #if len(prec_curv[-1][0]) < 100:
        #pdb.set_trace()
    pickle.dump([MAP, prec_curv, n_sample_nodes_l],
                open('%s_%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme, str(train_ratio)),
                     'wb'))
    print('Link prediction evaluation complete. Time: %f sec' % (time() - t1))
    # prec_curv2 = [p[4096] for p in prec_curv[prec_curv.keys()[0]]]
    return MAP[list(MAP.keys())[0]]  # prec_curv2