示例#1
0
def test_ae_static():
    # Parameters for Stochastic block model graph
    # Todal of 1000 nodes
    node_num = 100
    # Test with two communities
    community_num = 2
    # At each iteration migrate 10 nodes from one community to the another
    node_change_num = 2
    # Length of total time steps the graph will dynamically change
    length = 7
    # output directory for result
    outdir = './output'
    intr = './intermediate'
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    if not os.path.exists(intr):
        os.mkdir(intr)
    testDataType = 'sbm_cd'
    # Generate the dynamic graph
    dynamic_sbm_series = list(sbm.get_community_diminish_series_v2(node_num,
                                                                   community_num,
                                                                   length,
                                                                   1,  # comminity ID to perturb
                                                                   node_change_num))
    graphs = [g[0] for g in dynamic_sbm_series]
    # parameters for the dynamic embedding
    # dimension of the embedding
    dim_emb = 8
    lookback = 2

    # AE Static
    embedding = AE(d=dim_emb,
                   beta=5,
                   nu1=1e-6,
                   nu2=1e-6,
                   K=3,
                   n_units=[500, 300],
                   n_iter=2,
                   xeta=1e-4,
                   n_batch=100,
                   modelfile=['./intermediate/enc_modelsbm.json',
                              './intermediate/dec_modelsbm.json'],
                   weightfile=['./intermediate/enc_weightssbm.hdf5',
                               './intermediate/dec_weightssbm.hdf5'])
    embs = []
    t1 = time()
    # ae static
    for temp_var in range(length):
        emb, _ = embedding.learn_embeddings(graphs[temp_var])
        embs.append(emb)
    print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1))

    viz.plot_static_sbm_embedding(embs[-4:], dynamic_sbm_series[-4:])
示例#2
0
                 nu1        = 1e-6, 
                 nu2        = 1e-6,
                 K          = 3, 
                 n_units    = [500, 300, ],
                 n_iter     = 200, 
                 xeta       = 1e-4,
                 n_batch    = 100,
                 modelfile  = ['./intermediate/enc_modelsbm.json',
                             './intermediate/dec_modelsbm.json'],
                 weightfile = ['./intermediate/enc_weightssbm.hdf5',
                             './intermediate/dec_weightssbm.hdf5'])
embs  = []
t1 = time()
#ae static
for temp_var in range(length):
    emb, _= embedding.learn_embeddings(graphs[temp_var])
    embs.append(emb)
print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))

viz.plot_static_sbm_embedding(embs[-4:], dynamic_sbm_series[-4:])   

#TIMERS
datafile  = dataprep_util.prep_input_TIMERS(graphs, length, testDataType) 
embedding = TIMERS(K         = dim_emb, 
                 Theta         = 0.5, 
                 datafile      = datafile,
                 length        =  length,
                 nodemigration = node_change_num,
                 resultdir     = outdir,
                 datatype      = testDataType)
if not os.path.exists(outdir):
示例#3
0
def main(args):

    # Set seeds
    np.random.seed(args.seed)
    from tensorflow import set_random_seed
    set_random_seed(args.seed)

    # Set the number of timesteps in the sequence
    num_timesteps = args.seq_len - 1  # one timestep per pair of consecutive graphs
    num_training_loops = num_timesteps - 1  # Num training loops to actually do (keep last graph for test/validation)

    data_loc = os.path.join(args.data_loc, args.dataset)

    # Preload the training graphs into memory...not very scaleable but helps with CPU load
    # Preload all but the last graph as this is use for val/test
    graphs = []
    for i in range(num_timesteps):
        adj_train, features = third_party_utils.load_adj_graph(
            f'{data_loc}_t{i}.npz')  # Load the input graph
        graphs.append(
            nx.from_scipy_sparse_matrix(adj_train, create_using=nx.DiGraph()))
        print(f'{args.dataset}_t{i} Loaded')
    assert len(
        graphs
    ) == num_timesteps  #Should be the length of the time series as the index will start from zero
    print("Training graphs loaded into memory")

    # Extract the val/test graph which is the final one in the sequence
    val_test_graph_previous, _ = third_party_utils.load_adj_graph(
        f'{data_loc}_t{num_timesteps-1}.npz')
    val_test_graph, _ = third_party_utils.load_adj_graph(
        f'{data_loc}_t{num_timesteps}.npz')
    val_test_graph_adj, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = third_party_utils.mask_test_edges(
        val_test_graph)
    val_test_graph_adj, train_edges_pre, val_edges_pre, val_edges_false, test_edges_pre, test_edges_false = third_party_utils.mask_test_edges(
        val_test_graph_previous)

    pos_edges = np.concatenate((val_edges, test_edges, train_edges)).tolist()
    pos_edges = set(map(tuple, pos_edges))
    pos_edges_pre = np.concatenate(
        (val_edges_pre, test_edges_pre, train_edges_pre)).tolist()
    pos_edges_pre = set(map(tuple, pos_edges_pre))
    new_edges = np.array(list(pos_edges - pos_edges_pre))

    num_edges = len(new_edges)
    new_edges_false = test_edges[:num_edges]

    print(
        f"Validation and Test edges capture from graph {args.dataset}_t{args.seq_len-1} in the sequence"
    )

    # Chose the model to run
    #AE Static ----------------------------------------------------------------------------
    # None offset auto encoder seems to be
    if args.model == "AE":

        embedding = AE(d=dim_emb,
                       beta=5,
                       nu1=1e-6,
                       nu2=1e-6,
                       K=3,
                       n_units=[
                           500,
                           300,
                       ],
                       n_iter=100,
                       xeta=1e-6,
                       n_batch=100,
                       modelfile=[
                           './intermediate/enc_modelsbm.json',
                           './intermediate/dec_modelsbm.json'
                       ],
                       weightfile=[
                           './intermediate/enc_weightssbm.hdf5',
                           './intermediate/dec_weightssbm.hdf5'
                       ])
        t1 = time()
        #ae static

        # Loop through each of the graphs in the time series and train model
        print("Starting training AE")
        # for temp_var in range(num_training_loops):
        #     emb, _= embedding.learn_embeddings(graphs[temp_var])

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])
        print(embedding._method_name + ':\n\tTraining time: %f' %
              (time() - t1))
        print(
            third_party_utils.eval_gae(test_edges, test_edges_false,
                                       embedding))

        accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
            new_edges, new_edges_false, embedding, use_embeddings=False)
        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)

    #dynAE ------------------------------------------------------------------------------
    # As proposed in dyngraph2vec paper. Seems to just be an offset dense auto encoder trained to predict next graph.
    elif args.model == "DynAE":

        embedding = DynAE(d=dim_emb,
                          beta=5,
                          n_prev_graphs=lookback,
                          nu1=1e-6,
                          nu2=1e-6,
                          n_units=[
                              500,
                              300,
                          ],
                          rho=0.3,
                          n_iter=150,
                          xeta=1e-5,
                          n_batch=100,
                          modelfile=[
                              './intermediate/enc_model_dynAE.json',
                              './intermediate/dec_model_dynAE.json'
                          ],
                          weightfile=[
                              './intermediate/enc_weights_dynAE.hdf5',
                              './intermediate/dec_weights_dynAE.hdf5'
                          ],
                          savefilesuffix="testing")
        t1 = time()
        # for temp_var in range(lookback+1, num_training_loops+1):
        #     print(temp_var)
        #     print(graphs[:temp_var])
        #     emb, _ = embedding.learn_embeddings(graphs[:temp_var])

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])

        if new_edges.size != 0:
            print("Here yo")
            accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
                new_edges, new_edges_false, embedding, use_embeddings=False)
            print(
                third_party_utils.eval_gae(new_edges,
                                           new_edges_false,
                                           embedding,
                                           use_embeddings=False))
        else:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0

        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)
        print(
            third_party_utils.eval_gae(test_edges,
                                       test_edges_false,
                                       embedding,
                                       use_embeddings=False))

    #dynRNN ------------------------------------------------------------------------------
    # As proposed in dyngraph2vec paper. Only seems to use LSTM cells with no compression beforehand.
    elif args.model == "DynRNN":

        embedding = DynRNN(d=dim_emb,
                           beta=5,
                           n_prev_graphs=lookback,
                           nu1=1e-6,
                           nu2=1e-6,
                           n_enc_units=[500, 200],
                           n_dec_units=[200, 500],
                           rho=0.3,
                           n_iter=150,
                           xeta=1e-4,
                           n_batch=100,
                           modelfile=[
                               './intermediate/enc_model_dynRNN.json',
                               './intermediate/dec_model_dynRNN.json'
                           ],
                           weightfile=[
                               './intermediate/enc_weights_dynRNN.hdf5',
                               './intermediate/dec_weights_dynRNN.hdf5'
                           ],
                           savefilesuffix="testing")

        t1 = time()
        # for temp_var in range(lookback+1, num_training_loops+1):
        #     emb, _ = embedding.learn_embeddings(graphs[:temp_var])

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])

        if new_edges.size != 0:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
                new_edges, new_edges_false, embedding, use_embeddings=False)
            print(
                third_party_utils.eval_gae(new_edges,
                                           new_edges_false,
                                           embedding,
                                           use_embeddings=False))
        else:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0

        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)
        print(
            third_party_utils.eval_gae(test_edges,
                                       test_edges_false,
                                       embedding,
                                       use_embeddings=False))

    #dynAERNN ------------------------------------------------------------------------------
    # As proposed in dyngraph2vec paper. Use auto encoder before passing to an LSTM cell.
    elif args.model == "DynAERNN":

        embedding = DynAERNN(d=dim_emb,
                             beta=5,
                             n_prev_graphs=lookback,
                             nu1=1e-6,
                             nu2=1e-6,
                             n_aeunits=[500, 300],
                             n_lstmunits=[300, dim_emb],
                             rho=0.3,
                             n_iter=150,
                             xeta=1e-3,
                             n_batch=100,
                             modelfile=[
                                 './intermediate/enc_model_dynAERNN.json',
                                 './intermediate/dec_model_dynAERNN.json'
                             ],
                             weightfile=[
                                 './intermediate/enc_weights_dynAERNN.hdf5',
                                 './intermediate/dec_weights_dynAERNN.hdf5'
                             ],
                             savefilesuffix="testing")

        t1 = time()
        # for temp_var in range(lookback+1, num_training_loops+1):
        #                 emb, _ = embedding.learn_embeddings(graphs[:temp_var])

        #lp.expLP(graphs, embedding, 2, 0, 0)

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])

        if new_edges.size != 0:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
                new_edges, new_edges_false, embedding, use_embeddings=False)
            print(
                third_party_utils.eval_gae(new_edges,
                                           new_edges_false,
                                           embedding,
                                           use_embeddings=False))
        else:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0

        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)
        print(
            third_party_utils.eval_gae(test_edges,
                                       test_edges_false,
                                       embedding,
                                       use_embeddings=False))

    return accuracy, roc_score, ap_score, tn, fp, fn, tp, ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp