Exemplo n.º 1
0
def main():
    """
    Parsing command line parameters, reading data, fitting and scoring a SimGNN model.
    """
    args = parameter_parser()
    tab_printer(args)
    trainer = SimGNNTrainer(args)

    if args.measure_time:
        trainer.measure_time()
    else:
        if args.load:
            trainer.load()
        else:
            trainer.fit()
        trainer.score()
        if args.save:
            trainer.save()

    if args.notify:
        import os
        import sys

        if sys.platform == "linux":
            os.system('notify-send SimGNN "Program is finished."')
        elif sys.platform == "posix":
            os.system("""
                      osascript -e 'display notification "SimGNN" with title "Program is finished."'
                      """)
        else:
            raise NotImplementedError("No notification support for this OS.")
Exemplo n.º 2
0
def main():
    """
    Parsing command line parameters.
    Reading data, embedding base graph, creating persona graph and learning a splitter.
    Saving the persona mapping and the embedding.
    """
    args = parameter_parser()
    torch.manual_seed(args.seed)
    tab_printer(args)
    """
    1. read graph and load as torch dataset
    """
    graph, graph_ingr_only = graph_reader(args.input_nodes, args.input_edges)
    """
    2. Metapath2vec with MetaPathWalker - Ingredient-Ingredient / Ingredient-Food-like Compound / Ingredient-Drug-like Compound
    """

    if args.idx_embed == 'Node2vec':
        node2vec = Node2Vec(args, graph)
        node2vec.train()

    else:
        metapath2vec = Metapath2Vec(args, graph)
        metapath2vec.train()
    """
    3. Plot your embedding if you like
    """
    plot_embedding(args, graph)
    """
    4. Evaluate Node Classification & Node Clustering
    """
    evaluate(args, graph)
Exemplo n.º 3
0
def main():
    """
    Parsing command lines, creating target matrix, fitting SINE and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    model = SINETrainer(args)
    model.fit()
    model.save_embedding()
Exemplo n.º 4
0
def main():
    """
    Parsing command lines, creating target matrix, fitting an Attention Walker and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    model = AttentionWalkTrainer(args)
    model.fit()
    model.save_model()
Exemplo n.º 5
0
def main():
    """
    Parsing command line parameters, reading data, fitting and scoring a SimGNN model.
    """
    args = parameter_parser()
    tab_printer(args)
    trainer = SimGNNTrainer(args)
    trainer.fit()
    trainer.score()
Exemplo n.º 6
0
def main():
    """
    Parsing command line parameters, processing graphs, fitting a CapsGNN.
    """
    args = parameter_parser()
    tab_printer(args)
    model = CapsGNNTrainer(args)
    model.fit()
    model.score()
    model.save_predictions()
Exemplo n.º 7
0
def main():
    """
    Parsing command line parameters, reading data, fitting an EdMot clustering and scoring the model.
    """
    args = parameter_parser()
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    model = EdMot(graph, args.components, args.cutoff)
    memberships = model.fit()
    membership_saver(args.membership_path, memberships)
Exemplo n.º 8
0
Arquivo: main.py Projeto: kdwcse/GAM
def main():
    """
    Parsing command line parameters, processing graphs, fitting a GAM.
    """
    args = parameter_parser()
    tab_printer(args)
    model = GAMTrainer(args)
    model.fit()
    model.score()
    model.save_predictions_and_logs()
Exemplo n.º 9
0
def main():
    """
    Parsing command lines, creating target matrix, fitting BANE and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    P = read_graph(args)
    X = read_features(args)
    model = BANE(args, P, X)
    model.fit()
    model.save_embedding()
Exemplo n.º 10
0
Arquivo: main.py Projeto: tjufan/DANMF
def main():
    """
    Parsing command lines, creating target matrix, fitting DANMF and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    graph = read_graph(args)
    model = DANMF(graph, args)
    model.pre_training()
    model.training()
    if args.calculate_loss:
        loss_printer(model.loss)
Exemplo n.º 11
0
def main():
    """
    Parsing command line parameters, reading data, fitting an NGCN and scoring the model.
    """
    args = parameter_parser()
    torch.manual_seed(args.seed)
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    features = feature_reader(args.features_path)
    target = target_reader(args.target_path)
    trainer = Trainer(args, graph, features, target)
    trainer.fit()
Exemplo n.º 12
0
def main():
    """
    Parsing command line parameters, creating EgoNets, creating a partition of the persona graph. Saving the memberships.
    """
    args = parameter_parser()
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    splitter = EgoNetSplitter(graph, args.resolution)
    splitter.create_egonets()
    splitter.map_personalities()
    splitter.create_persona_graph()
    splitter.create_partitions()
    membership_saver(args.output_path, splitter.overlapping_partitions)
Exemplo n.º 13
0
def execute_factorization():
    """
    Reading the target matrix, running optimization and saving to hard drive.
    """
    args = parameter_parser()
    tab_printer(args)
    X = read_features(args.input_path)
    print("\nTraining started.\n")
    model = ADMM_NMF(X, args)
    model.optimize()
    print("\nFactors saved.\n")
    model.save_user_factors()
    model.save_item_factors()
Exemplo n.º 14
0
def main():
    """
    Parsing command lines, creating target matrix, fitting an SGCN, predicting edge signs, and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    edges = read_graph(args)
    trainer = SignedGCNTrainer(args, edges)
    trainer.setup_dataset()
    trainer.create_and_train_model()
    if args.test_size > 0:
        trainer.save_model()
        score_printer(trainer.logs)
        save_logs(args, trainer.logs)
Exemplo n.º 15
0
 def __init__(self):
     self.args = parameter_parser()
     print("\nEnumerating unique labels.\n")
     self.training_graphs = glob.glob(self.args.training_graphs + "*.json")
     self.testing_graphs = glob.glob(self.args.testing_graphs + "*.json")
     self.graph_pairs = self.training_graphs + self.testing_graphs
     self.global_labels = set()
     for self.graph_pair in tqdm(self.graph_pairs):
         self.data = process(self.graph_pair)
         self.global_labels = self.global_labels.union(set(self.data["labels_1"]))
         self.global_labels = self.global_labels.union(set(self.data["labels_2"]))
     self.global_labels = list(self.global_labels)
     self.global_labels = {val:index  for index, val in enumerate(self.global_labels)}
     self.number_of_labels = len(self.global_labels)
Exemplo n.º 16
0
def main():
    """
    Parsing command line parameters.
    Reading data, embedding base graph, creating persona graph and learning a splitter.
    Saving the persona mapping and the embedding.
    """
    args = parameter_parser()
    torch.manual_seed(args.seed)
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    trainer = SplitterTrainer(graph, args)
    trainer.fit()
    trainer.save_embedding()
    trainer.save_persona_graph_mapping()
Exemplo n.º 17
0
def main():
    """
    Parsing command line parameters, reading data, graph decomposition, fitting a ClusterGCN and scoring the model.
    """
    args = parameter_parser()
    torch.manual_seed(args.seed)
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    features = feature_reader(args.features_path)
    target = target_reader(args.target_path)
    clustering_machine = ClusteringMachine(args, graph, features, target)
    clustering_machine.decompose()
    gcn_trainer = ClusterGCNTrainer(args, clustering_machine)
    gcn_trainer.train()
    gcn_trainer.test()
Exemplo n.º 18
0
def main():
    """
    Parsing command line parameters, reading data, doing sparsification, fitting a GWNN and saving the logs.
    """
    args = parameter_parser()
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    features = feature_reader(args.features_path)
    target = target_reader(args.target_path)
    sparsifier = WaveletSparsifier(graph, args.scale, args.approximation_order,
                                   args.tolerance)
    sparsifier.calculate_all_wavelets()
    trainer = GWNNTrainer(args, sparsifier, features, target)
    trainer.fit()
    trainer.score()
    save_logs(args, trainer.logs)
Exemplo n.º 19
0
def main():
    """
    Parsing command line parameters, reading data, fitting an NGCN and scoring the model.
    """
    args = parameter_parser()
    torch.manual_seed(args.seed)
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    features = feature_reader(args.features_path)
    target = target_reader(args.target_path)
    trainer = Trainer(args, graph, features, target, True)
    trainer.fit()
    if args.model == "mixhop":
        trainer.evaluate_architecture()
        args = trainer.reset_architecture()
        trainer = Trainer(args, graph, features, target, False)
        trainer.fit()
Exemplo n.º 20
0
    Function to print the logs in a nice tabular format.
    :param args: Parameters used for the model.
    """
    args = vars(args)
    keys = sorted(args.keys())
    tab = Texttable() 
    tab.add_rows([["Parameter", "Value"]] +  [[k.replace("_"," ").capitalize(),args[k]] for k in keys])
    print(tab.draw())

def read_graph(settings):
    """
    Reading the edge list from the path and returning the networkx graph object.
    :param path: Path to the edge list.
    :return graph: Graph from edge list.
    """
    if settings.edgelist_input:
        graph = nx.read_edgelist(settings.input)
    else:
        edge_list = pd.read_csv(settings.input).values.tolist()
        graph = nx.from_edgelist(edge_list)
        graph.remove_edges_from(graph.selfloop_edges())
    return graph

if __name__ == "__main__":
    settings = parameter_parser()
    tab_printer(settings)
    G = read_graph(settings)
    machine = WaveletMachine(G,settings)
    machine.create_embedding()
    machine.transform_and_save_embedding()
Exemplo n.º 21
0
""" Main SimGNN model """
from tensorflow import keras
import numpy as np
from tqdm import tqdm, trange
from parser import parameter_parser
from utilities import data2, convert_to_keras, process, find_loss
from simgnn import simgnn

parser = parameter_parser()


def train(model, x):
    batches = x.create_batches()
    global_labels = x.getlabels()
    print(global_labels)
    """
    Training the Network
    Take every graph pair and train it as a batch.
    """

    for epoch in range(0, parser.epochs):
        for index, batch in tqdm(enumerate(batches),
                                 total=len(batches),
                                 desc="Batches"):
            for graph_pair in batch:
                data = process(graph_pair)
                data = convert_to_keras(data, global_labels)
                x = np.array([data["features_1"]])
                y = np.array([data["features_2"]])
                a = np.array([data["edge_index_1"]])
                b = np.array([data["edge_index_2"]])
Exemplo n.º 22
0
    out.to_csv(output_path, index=None)


def main(args):
    """
    Main function to read the graph list, extract features, learn the embedding and save it.
    :param args: Object with the arguments.
    """
    graphs = glob.glob(args.input_path + "*.json")
    print("\nFeature extraction started.\n")
    document_collections = Parallel(n_jobs=args.workers)(
        delayed(feature_extractor)(g, args.wl_iterations)
        for g in tqdm(graphs))
    print("\nOptimization started.\n")
    model = Doc2Vec(document_collections,
                    size=args.dimensions,
                    window=0,
                    min_count=args.min_count,
                    dm=0,
                    sample=args.down_sampling,
                    workers=args.workers,
                    iter=args.epochs,
                    alpha=args.learning_rate)

    save_embedding(args.output_path, model, graphs, args.dimensions)


if __name__ == "__main__":
    args = parameter_parser()
    main(args)
Exemplo n.º 23
0
def main():
    print("Program start, environment initializing ...")
    torch.autograd.set_detect_anomaly(True)
    args = parameter_parser()
    utils.print2file(str(args), args.logDir, True)

    if args.device >= 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.device)

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    pic = {}

    # check if pickles, otherwise load data
    # pickle_name = args.data_prefix+args.dataset+"-"+str(args.bsize)+"-"+str(args.num_clusters)+"_main"+".pickle"
    # if os.path.isfile(pickle_name):
    #     print("Loading Pickle.")
    #     load_time = time.time()
    #     pic = pickle.load(open(pickle_name, "rb"))
    #     print("Loading Done. " + str(time.time()-load_time) + " seconds.")
    # else:
    if True:
        print("Data Pre-processing")
        # Load data
        (pic["train_adj"], full_adj, pic["train_feats"], pic["test_feats"],
         pic["y_train"], y_val, y_test, pic["train_mask"], pic["val_mask"],
         test_mask, _, pic["val_data"], pic["test_data"], num_data,
         visible_data) = utils.load_data(args.data_prefix,
                                         args.dataset,
                                         args.precalc,
                                         amazon=True)

        print("Partition graph and do preprocessing")
        if args.bsize > 1:
            _, pic["parts"] = partition_utils.partition_graph(
                pic["train_adj"], visible_data, args.num_clusters)
            pic["parts"] = [np.array(pt) for pt in pic["parts"]]

            (pic["features_batches"], pic["support_batches"],
             pic["y_train_batches"],
             pic["train_mask_batches"]) = utils.preprocess_multicluster_v2(
                 pic["train_adj"], pic["parts"], pic["train_feats"],
                 pic["y_train"], pic["train_mask"], args.num_clusters,
                 args.bsize, args.diag_lambda)

        else:
            (pic["parts"], pic["features_batches"], pic["support_batches"],
             pic["y_train_batches"],
             pic["train_mask_batches"]) = utils.preprocess(
                 pic["train_adj"], pic["train_feats"], pic["y_train"],
                 pic["train_mask"], visible_data, args.num_clusters,
                 args.diag_lambda)

        (_, pic["val_features_batches"], pic["val_support_batches"],
         pic["y_val_batches"], pic["val_mask_batches"]) = utils.preprocess(
             full_adj, pic["test_feats"], y_val, pic["val_mask"],
             np.arange(num_data), args.num_clusters_val, args.diag_lambda)

        (_, pic["test_features_batches"], pic["test_support_batches"],
         pic["y_test_batches"], pic["test_mask_batches"]) = utils.preprocess(
             full_adj, pic["test_feats"], y_test, test_mask,
             np.arange(num_data), args.num_clusters_test, args.diag_lambda)

        # pickle.dump(pic, open(pickle_name, "wb"))

    idx_parts = list(range(len(pic["parts"])))
    print("Preparing model ...")
    model = StackedGCN(args,
                       pic["test_feats"].shape[1],
                       pic["y_train"].shape[1],
                       precalc=args.precalc,
                       num_layers=args.num_layers,
                       norm=args.layernorm)

    w_server = model.cpu().state_dict()

    print("Start training ...")
    model_saved = "./model/" + args.dataset + "-" + args.logDir[6:-4] + ".pt"

    try:
        for epoch in range(args.epochs):
            # Training process
            w_locals, loss_locals, epoch_acc = [], [], []
            all_time = []
            best_val_acc = 0

            for pid in range(len(pic["features_batches"])):
                # for pid in range(10):
                # Use preprocessed batch data
                package = {
                    "features": pic["features_batches"][pid],
                    "support": pic["support_batches"][pid],
                    "y_train": pic["y_train_batches"][pid],
                    "train_mask": pic["train_mask_batches"][pid]
                }

                model.load_state_dict(w_server)
                out_dict = slave_run_train(model, args, package, pid)

                w_locals.append(copy.deepcopy(out_dict['params']))
                loss_locals.append(copy.deepcopy(out_dict['loss']))
                all_time.append(out_dict["time"])
                epoch_acc.append(out_dict["acc"])

            # update global weights
            a_start_time = time.time()
            if args.agg == 'avg':
                w_server = average_agg(w_locals, args.dp)
            elif args.agg == 'att':
                w_server = weighted_agg(w_locals,
                                        w_server,
                                        args.epsilon,
                                        args.ord,
                                        dp=args.dp)
            else:
                exit('Unrecognized aggregation')

            model.load_state_dict(w_server)
            # agg_time = time.time() - a_start_time
            # print(str(sum(all_time)/len(all_time) + agg_time))
            print2file(
                'Epoch: ' + str(epoch) + ' Average Train acc: ' +
                str(sum(epoch_acc) / len(epoch_acc)), args.logDir, True)

            if epoch % args.val_freq == 0:
                val_cost, val_acc, val_micro, val_macro = evaluate(
                    model,
                    args,
                    pic["val_features_batches"],
                    pic["val_support_batches"],
                    pic["y_val_batches"],
                    pic["val_mask_batches"],
                    pic["val_data"],
                    pid="validation")

                log_str = 'Validateion set results: ' + 'cost= {:.5f} '.format(
                    val_cost) + 'accuracy= {:.5f} '.format(
                        val_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(
                            val_micro, val_macro)
                print2file(log_str, args.logDir, True)

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    torch.save(model.state_dict(), model_saved)
                    print2file(
                        "Best val_acc: " + str(best_val_acc) +
                        " with epoch: " + str(epoch), args.logDir, True)

        torch.save(
            model.state_dict(),
            "./model/" + args.dataset + "-" + args.logDir[6:-4] + "Done.pt")
        print2file("Training Done. Model Saved.", args.logDir, True)
        # Test Model
        # Perform two test, one with last model, another with best val_acc model
        # 1)
        test_cost, test_acc, micro, macro = evaluate(
            model,
            args,
            pic["test_features_batches"],
            pic["test_support_batches"],
            pic["y_test_batches"],
            pic["test_mask_batches"],
            pic["test_data"],
            pid="Final test")

        log_str = 'Test set results: ' + 'cost= {:.5f} '.format(
            test_cost) + 'accuracy= {:.5f} '.format(
                test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro)
        print2file(log_str, args.logDir, True)

        # 2)
        test_model = StackedGCN(args,
                                pic["test_feats"].shape[1],
                                pic["y_train"].shape[1],
                                precalc=args.precalc,
                                num_layers=args.num_layers,
                                norm=args.layernorm)
        test_model.load_state_dict(torch.load(model_saved))
        test_model.eval()
        test_cost, test_acc, micro, macro = evaluate(
            test_model,
            args,
            pic["test_features_batches"],
            pic["test_support_batches"],
            pic["y_test_batches"],
            pic["test_mask_batches"],
            pic["test_data"],
            pid="Best test")

        log_str = 'Test set results: ' + 'cost= {:.5f} '.format(
            test_cost) + 'accuracy= {:.5f} '.format(
                test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro)
        print2file(log_str, args.logDir, True)

    except KeyboardInterrupt:
        print("==" * 20)
        print("Existing from training earlier than the plan.")

    print("End..so far so good.")