def preprocessing(data, emb_file, seed, trans):
    num_graphs = len(data[0])
    nx_graphs = [data[0][i].g for i in range(num_graphs)]
    dgl_graphs = [dgl.from_networkx(graph) for graph in nx_graphs]
    batch_graphs = dgl.batch(dgl_graphs)
    num_nodes = len(batch_graphs.nodes())
    graph_size = [len(g.nodes()) for g in nx_graphs]
    
    emb = np.loadtxt(emb_file)
    if trans:
        emb = np.dot(emb,DCT(num_nodes).T)
    G = batch_graphs.to_networkx()
    Sub = {}
    for i in range(num_graphs):
        if i == 0:
            node_start = 0
        else:
            node_start = sum(graph_size[:i-1]) 
        node_end = sum(graph_size[:i])
        nbunch = [node for node in range(node_start, node_end)]
        subgraph = nx.subgraph(G, nbunch)
        Sub[data[0][i]] = np.dot(emb, encode(G,subgraph))
    
    idx_list = separate_data(data[0],seed = seed)
    return Sub, idx_list, data[0]
 def prepare_data_order(args):
     graphs, num_classes, max_deg = load_data_order(args.dataset, args.degree_as_tag)
     if USE_SAVE_IDX:
         try:
           train_idx = np.loadtxt('./dataset/%s/10fold_idx/train_idx-%d.txt' % (args.dataset, args.fold_idx+1), dtype=np.int32).tolist()
           test_idx = np.loadtxt('./dataset/%s/10fold_idx/test_idx-%d.txt' % (args.dataset, args.fold_idx+1), dtype=np.int32).tolist()
           train_graphs, test_graphs = [graphs[i] for i in train_idx], [graphs[i] for i in test_idx]
         except:
             print("Could not load dataset indicies")
             if num_classes >= len(graphs) or args.dont_split:
                 train_graphs, test_graphs, train_idx, test_idx = graphs, [], 0, 0
             else:
                 train_graphs, test_graphs, train_idx, test_idx = separate_data(graphs, args.seed, args.fold_idx)
     else:
         if num_classes >= len(graphs) or args.dont_split:
             train_graphs, test_graphs, train_idx, test_idx = graphs, [], 0, 0
         else:
             train_graphs, test_graphs, train_idx, test_idx = separate_data(graphs, args.seed, args.fold_idx)
     return train_graphs, test_graphs, train_idx, test_idx, num_classes, max_deg
Пример #3
0
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(description='PyTorch GIN fMRI')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any')
    parser.add_argument('--sourcedir',
                        type=str,
                        default='data',
                        help='path to the data directory')
    parser.add_argument('--sparsity',
                        type=int,
                        default=30,
                        help='sparsity M of graph adjacency')
    parser.add_argument('--input_feature',
                        type=str,
                        default='one_hot',
                        help='input feature type',
                        choices=['one_hot', 'coordinate', 'mean_bold'])
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input minibatch size for training')
    parser.add_argument('--iters_per_epoch',
                        type=int,
                        default=50,
                        help='number of iterations per each epoch')
    parser.add_argument('--epochs',
                        type=int,
                        default=150,
                        help='number of epochs to train')
    parser.add_argument('--lr',
                        type=float,
                        default=0.005,
                        help='initial learning rate')
    parser.add_argument('--lr_step',
                        type=int,
                        default=5,
                        help='learning rate decay step')
    parser.add_argument('--lr_rate',
                        type=float,
                        default=0.8,
                        help='learning rate decay rate')
    parser.add_argument('--fold_seed',
                        type=int,
                        default=0,
                        help='random seed for splitting the dataset')
    parser.add_argument('--fold_idx',
                        type=int,
                        default=0,
                        help='indices of fold in 10-fold validation.')
    parser.add_argument('--num_layers',
                        type=int,
                        default=5,
                        help='number of the GNN layers')
    parser.add_argument(
        '--num_mlp_layers',
        type=int,
        default=2,
        help='number of layers for the MLP. 1 means linear model.')
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=64,
                        help='number of hidden units')
    parser.add_argument('--beta',
                        type=float,
                        default=0.05,
                        help='coefficient for infograph regularizer')
    parser.add_argument('--final_dropout',
                        type=float,
                        default=0.5,
                        help='final layer dropout')
    parser.add_argument(
        '--graph_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average"],
        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument(
        '--neighbor_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average", "max"],
        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument(
        '--learn_eps',
        action="store_true",
        help=
        'whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.'
    )
    parser.add_argument('--exp',
                        type=str,
                        default="graph_neural_mapping",
                        help='experiment name')
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    graphs, num_classes = load_data(args.sourcedir, args.sparsity,
                                    args.input_feature)

    os.makedirs('results/{}/saliency/{}'.format(args.exp, args.fold_idx),
                exist_ok=True)
    os.makedirs('results/{}/latent/{}'.format(args.exp, args.fold_idx),
                exist_ok=True)
    os.makedirs('results/{}/model/{}'.format(args.exp, args.fold_idx),
                exist_ok=True)

    train_graphs, test_graphs = separate_data(graphs, args.fold_seed,
                                              args.fold_idx)

    model = GIN_InfoMaxReg(args.num_layers, args.num_mlp_layers,
                           train_graphs[0].node_features.shape[1],
                           args.hidden_dim, num_classes, args.final_dropout,
                           args.learn_eps, args.graph_pooling_type,
                           args.neighbor_pooling_type, device).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=args.lr_step,
                                          gamma=args.lr_rate)

    train_summary_writer = SummaryWriter('results/{}/summary/{}/train'.format(
        args.exp, args.fold_idx),
                                         flush_secs=1,
                                         max_queue=1)
    test_summary_writer = SummaryWriter('results/{}/summary/{}/test'.format(
        args.exp, args.fold_idx),
                                        flush_secs=1,
                                        max_queue=1)
    with open('results/{}/argv.csv'.format(args.exp), 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(vars(args).items())

    latent_space_initial, labels = get_latent_space(model, test_graphs)
    np.save(
        'results/{}/latent/{}/latent_space_initial.npy'.format(
            args.exp, args.fold_idx), latent_space_initial)
    np.save('results/{}/latent/{}/labels.npy'.format(args.exp, args.fold_idx),
            labels)
    del latent_space_initial
    del labels

    for epoch in tqdm(range(args.epochs), ncols=50, desc=f'{args.fold_idx}'):
        loss_train = train(args, model, device, train_graphs, optimizer,
                           args.beta, epoch)
        scheduler.step()
        acc_train, precision_train, recall_train = test(
            args, model, device, train_graphs)

        train_summary_writer.add_scalar('loss/total', loss_train, epoch)
        train_summary_writer.add_scalar('metrics/accuracy', acc_train, epoch)
        train_summary_writer.add_scalar('metrics/precision', precision_train,
                                        epoch)
        train_summary_writer.add_scalar('metrics/recall', recall_train, epoch)

    acc_test, precision_test, recall_test = test(args, model, device,
                                                 test_graphs)
    test_summary_writer.add_scalar('metrics/accuracy', acc_test, epoch)
    test_summary_writer.add_scalar('metrics/precision', precision_test, epoch)
    test_summary_writer.add_scalar('metrics/recall', recall_test, epoch)

    torch.save(model.state_dict(),
               'results/{}/model/{}/model.pt'.format(args.exp, args.fold_idx))
    latent_space, labels = get_latent_space(model, test_graphs)
    saliency_map_0 = get_saliency_map(model, test_graphs, 0)
    saliency_map_1 = get_saliency_map(model, test_graphs, 1)
    np.save(
        'results/{}/latent/{}/latent_space.npy'.format(args.exp,
                                                       args.fold_idx),
        latent_space)
    np.save(
        'results/{}/saliency/{}/saliency_female.npy'.format(
            args.exp, args.fold_idx), saliency_map_0)
    np.save(
        'results/{}/saliency/{}/saliency_male.npy'.format(
            args.exp, args.fold_idx), saliency_map_1)
Пример #4
0
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(
        description=
        'PyTorch graph convolutional neural net for whole-graph classification'
    )
    parser.add_argument('--dataset',
                        type=str,
                        default="NCI1",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument(
        '--iters_per_epoch',
        type=int,
        default=50,
        help='number of iterations per each epoch (default: 50)')
    parser.add_argument(
        '--epochs',
        type=int,
        default=500,  #defeat
        help='number of epochs to train (default: 350)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--wl2',
                        type=float,
                        default=0.0,
                        help='learning rate (default: 0.0)')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument(
        '--fold_idx',
        type=int,
        default=0,
        help='the index of fold in 10-fold validation. Should be less then 10.'
    )
    parser.add_argument(
        '--num_layers',
        type=int,
        default=6,
        help='number of layers INCLUDING the input one (default: 5)')
    parser.add_argument(
        '--num_mlp_layers',
        type=int,
        default=2,
        help=
        'number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.'
    )
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=64,
                        help='number of hidden units (default: 64)')
    parser.add_argument('--final_dropout',
                        type=float,
                        default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument(
        '--graph_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average"],
        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument(
        '--neighbor_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average", "max"],
        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument(
        '--learn_eps',
        action="store_true",
        help=
        'Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.'
    )
    parser.add_argument(
        '--degree_as_tag',
        action="store_true",
        help=
        'let the input node features be the degree of nodes (heuristics for unlabeled graph)'
    )
    parser.add_argument('--filename',
                        type=str,
                        default="meta-guass-pow10",
                        help='output file')
    parser.add_argument(
        '--attention',
        type=bool,
        default=True,  #defeault false
        help='if attention,defeaut:False')
    parser.add_argument('--tqdm', type=bool, default=False, help='if use tqdm')
    parser.add_argument(
        '--multi_head',
        type=int,
        default=1,  # defeat:3
        help='if use tqdm')
    parser.add_argument(
        '--sum_flag',
        type=int,
        default=1,  #defeatut :1
        help='if 0: don;t sum')
    parser.add_argument(
        '--inter',
        type=int,
        default=1,  #defeult :0
        help='if 0: not do unteraction in attention')

    parser.add_argument(
        '--dire_sigmod',
        type=int,
        default=0,  # defeult :0
        help='if 0: do softmax in dire attention,else if 1: sigmod')

    parser.add_argument(
        '--attention_type',
        type=str,
        default="mlp-sigmod",
        help='attention type:dire(sum or not), mlp-softmax , mlp-sigmod')
    args = parser.parse_args()

    ISOTIMEFORMAT = '%Y-%m-%d-%H-%M'
    theTime = datetime.datetime.now().strftime(ISOTIMEFORMAT)
    save_fold_path = "result/save_model/" + args.filename + str(
        args.num_layers) + str(theTime)  # result save path

    writer_path = str(theTime) + args.filename + 'TBX'
    writer = SummaryWriter(log_dir=writer_path)

    #set up seeds and gpu device
    print("lr:", args.lr)
    print("attention: ", args.attention)
    print("attention_type:", args.attention_type)
    print("sum_flag:", args.sum_flag)
    print("inter:", args.inter)
    print("filename:", args.filename)
    if args.attention == True:  # if do attention we need sum  graph pool information
        args.graph_pooling_type = 'sum'
    print("data sets:", args.dataset)
    print("degree as tag:", args.degree_as_tag)
    print("flod_idx:", args.fold_idx)
    if args.sum_flag == 1:
        print(
            "if use  directly attention is sum attention ,besides use sigmod attention model"
        )

    f = open(args.filename + "_train", 'w')
    if args.fold_idx == -1:
        acc = []
        for idx in range(10):
            acc_i = cross_val(args, writer, idx, f)
            acc.append(acc_i)
        writer.close()
        np.save("result/" + args.filename + "_all.numpy",
                np.array(acc))  # save
    else:
        torch.manual_seed(0)
        np.random.seed(0)
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(0)

        graphs, num_classes = load_data(args.dataset, args.degree_as_tag)

        ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
        train_graphs, test_graphs = separate_data(graphs, args.seed,
                                                  args.fold_idx)

        model = GraphCNN(args.num_layers,
                         args.num_mlp_layers,
                         train_graphs[0].node_features.shape[1],
                         args.hidden_dim,
                         num_classes,
                         args.final_dropout,
                         args.learn_eps,
                         args.graph_pooling_type,
                         args.neighbor_pooling_type,
                         device,
                         attention=args.attention,
                         multi_head=args.multi_head,
                         sum_flag=args.sum_flag,
                         inter=args.inter,
                         attention_type=args.attention_type,
                         dire_sigmod=args.dire_sigmod).to(device)

        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wl2)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=50,
                                              gamma=0.5)

        #args.epoch = 1
        acc = []
        max_acc = 0
        for epoch in range(1, args.epochs + 1):  #args.epochs
            scheduler.step()
            avg_loss = train(args, model, device, train_graphs, optimizer,
                             epoch)
            acc_train, acc_test = ftest(args, model, device, train_graphs,
                                        test_graphs, epoch)
            max_acc = max(acc_test, max_acc)
            writer.add_scalars(
                str(args.fold_idx) + '/scalar/acc', {
                    'train': acc_train,
                    'val': acc_test
                }, epoch)
            acc.append(acc_test)
            f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
            f.write("\n")
            print("")
            if epoch % 50 == 0:
                torch.save(model.state_dict(),
                           save_fold_path + "_" + str(epoch) + ".pt")
        print("****************max acc:", max_acc)
        try:
            torch.save(model.state_dict(), save_fold_path + "_last.pt")
            np.save(
                "result/" + args.filename + "_" + str(args.fold_idx) +
                "_val_acc.npy", np.array(acc))
            writer.close()
        except:
            print("acc all:", acc)
            pass
        #print(model.eps)
        f.close()
Пример #5
0
def cross_val(args, writer, idx, f):
    fold_idx = idx
    print(
        "**********************fold:{}**************************************************"
        .format(fold_idx))
    torch.manual_seed(0)
    np.random.seed(0)
    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    graphs, num_classes = load_data(args.dataset, args.degree_as_tag)

    ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    train_graphs, test_graphs = separate_data(graphs, args.seed, fold_idx)

    model = GraphCNN(args.num_layers,
                     args.num_mlp_layers,
                     train_graphs[0].node_features.shape[1],
                     args.hidden_dim,
                     num_classes,
                     args.final_dropout,
                     args.learn_eps,
                     args.graph_pooling_type,
                     args.neighbor_pooling_type,
                     device,
                     attention=args.attention,
                     multi_head=args.multi_head,
                     sum_flag=args.sum_flag,
                     inter=args.inter,
                     attention_type=args.attention_type,
                     dire_sigmod=args.dire_sigmod).to(device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
    f.write("************************************** %d *********" % fold_idx)
    max_acc = 0
    acc = []
    for epoch in range(1, args.epochs + 1):
        scheduler.step()

        avg_loss = train(args, model, device, train_graphs, optimizer, epoch)
        acc_train, acc_test = test(args, model, device, train_graphs,
                                   test_graphs, epoch)
        writer.add_scalars('/scalar/acc' + str(fold_idx), {
            'train': acc_train,
            'val': acc_test
        }, epoch)
        acc.append(acc_test)
        if acc_test > max_acc:
            max_acc = acc_test

        f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
        f.write("\n")
        print("")
    print("acc:", acc)
    try:
        f.write(
            "**************************************kkk flod_id:{},best:{} *********"
            .format(fold_idx, max_acc))
        np.save(
            "result/" + args.filename + "_" + str(fold_idx) + "_val_acc.npy",
            np.array(acc))
        print(
            "************************************** flod_id:{},best:{} *********"
            .format(fold_idx, max_acc))
    except:
        pass
    return acc
Пример #6
0
data_dir = '../../data'
cleaned_file = 'trauma_los_cleaned.csv'
extract_file = 'trauma_los_cleaned_extract.csv'
desired_headings = [
    'sex', 'normalvitals', 'gcs1', 'iss8', 'age65', 'transfer', 'penetrating',
    'mechcode', 'bodyregions', 'headany', 'faceany', 'neckany', 'chestany',
    'abdoany', 'spineany', 'upperlimbany', 'lowerlimbany', 'head3', 'face3',
    'neck3', 'chest3', 'abdo3', 'spine3', 'upper3', 'lower3', 'operation',
    'neurosurgery', 'laparotomy', 'thoracotomy', 'married', 'english',
    'mentalhealth', 'comorbidity', 'ssa'
]

# trim the input file into only the features we want to use
extract_features(data_dir, cleaned_file, extract_file, desired_headings)
X, y, headings = separate_data(True, data_dir, extract_file)
# convert all strings to ints
X, y = map(lambda x: list(map(int, x)), X), map(int, y)
X, y = np.array(list(X)), np.array(list(y))
n_samples, n_features = X.shape

# create a stratified 10-fold cross-validation iterator that generates the
# indices for us
cv = StratifiedKFold(y=y, n_folds=10, shuffle=True)

# create a support vector machine classifier
clf_svm = svm.SVC(kernel='linear', probability=True)

# create a Gaussian naive Bayes classifier
clf_gauss_nb = GaussianNB()
Пример #7
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='WL subtree kernel')
    parser.add_argument('--dataset',
                        type=str,
                        default="MUTAG",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument(
        '--fold_idx',
        type=int,
        default=0,
        help='the index of fold in 10-fold validation. Should be less then 10.'
    )
    parser.add_argument('--iter',
                        type=int,
                        default=5,
                        help='Number of iteration for the WL')
    parser.add_argument('--normalize',
                        action="store_true",
                        help='normalize the feature or not')
    parser.add_argument('--filename', type=str, default="", help='output file')
    args = parser.parse_args()

    np.random.seed(0)
    graphs, num_classes = load_data(args.dataset, False)

    ##10-fold cross validation, consider the particular fold.
    train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

    #SVM hyper-parameter to tune
    C_list = [0.01, 0.1, 1, 10, 100]
    X_train, y_train = convert(train_graphs)
    X_test, y_test = convert(test_graphs)

    wl_kernel = GraphKernel(kernel=[{
        "name": "weisfeiler_lehman",
        "niter": args.iter
    }, {
        "name": "subtree_wl"
    }],
                            normalize=args.normalize)
    K_train = wl_kernel.fit_transform(X_train)
    K_test = wl_kernel.transform(X_test)

    train_acc = []
    test_acc = []
    for C in C_list:
        clf = SVC(kernel='precomputed', C=C)
        clf.fit(K_train, y_train)
        y_pred_test = clf.predict(K_test)
        y_pred_train = clf.predict(K_train)
        train_acc.append(accuracy_score(y_train, y_pred_train) * 100)
        test_acc.append(accuracy_score(y_test, y_pred_test) * 100)

    print(train_acc)
    print(test_acc)

    if not args.filename == "":
        np.savetxt(args.filename, np.array([train_acc, test_acc]).transpose())
Пример #8
0
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(description='PyTorch graph convolutional neural net for whole-graph classification')
    parser.add_argument('--dataset', type=str, default="MUTAG",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--device', type=int, default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size', type=int, default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument('--iters_per_epoch', type=int, default=50,
                        help='number of iterations per each epoch (default: 50)')
    parser.add_argument('--epochs', type=int, default=350,
                        help='number of epochs to train (default: 350)')
    parser.add_argument('--lr', type=float, default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--seed', type=int, default=0,
                        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument('--fold_idx', type=int, default=0,
                        help='the index of fold in 10-fold validation. Should be less then 10.')
    parser.add_argument('--num_layers', type=int, default=5,
                        help='number of layers INCLUDING the input one (default: 5)')
    parser.add_argument('--num_mlp_layers', type=int, default=2,
                        help='number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.')
    parser.add_argument('--hidden_dim', type=int, default=64,
                        help='number of hidden units (default: 64)')
    parser.add_argument('--final_dropout', type=float, default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument('--graph_pooling_type', type=str, default="sum", choices=["sum", "average"],
                        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument('--neighbor_pooling_type', type=str, default="sum", choices=["sum", "average", "max"],
                        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument('--opt', type=str, default="adam", choices=["adam", "sgd"])
    parser.add_argument('--learn_eps', action="store_true",
                                        help='Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.')
    parser.add_argument('--degree_as_tag', action="store_true",
    					help='let the input node features be the degree of nodes (heuristics for unlabeled graph)')
    parser.add_argument('--filename', type = str, default = "",
                                        help='output file')
    parser.add_argument('--random', type=int, default=None,
                                        help='the range of random features (default: None). None means it does not add random features.')
    args = parser.parse_args()

    #set up seeds and gpu device
    torch.manual_seed(0)
    np.random.seed(0)    
    device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    if args.dataset in ['TRIANGLE', 'TRIANGLE_EX', 'LCC', 'LCC_EX', 'MDS', 'MDS_EX']:
        node_classification = True
        train_graphs, _ = load_data(f'dataset/{args.dataset}/{args.dataset}_train.txt', args.degree_as_tag)
        test_graphs, _ = load_data(f'dataset/{args.dataset}/{args.dataset}_test.txt', args.degree_as_tag)
        for g in train_graphs + test_graphs:
            if args.random:
                g.node_features = torch.ones(g.node_features.shape[0], 0)
            else:
                g.node_features = torch.ones(g.node_features.shape[0], 1)
        if args.dataset in ['TRIANGLE', 'TRIANGLE_EX', 'MDS', 'MDS_EX']:
            num_classes = 2
        elif args.dataset in ['LCC', 'LCC_EX']:
            num_classes = 3
        else:
            assert(False)
        if args.dataset in ['MDS', 'MDS_EX']:
            get_labels = lambda batch_graph, model: torch.LongTensor(MDS_LOCAL(model, batch_graph))
            criterion = nn.CrossEntropyLoss()
        else:
            get_labels = lambda batch_graph, model: torch.LongTensor(sum([graph.node_tags for graph in batch_graph], []))
            bc = [0 for i in range(num_classes)]
            for G in train_graphs:
                for t in G.node_tags:
                    bc[t] += 1
            w = torch.FloatTensor([max(bc) / bc[i] for i in range(num_classes)]).to(device)
            criterion = nn.CrossEntropyLoss(weight=w)
    else:
        node_classification = False
        graphs, num_classes = load_data(f'dataset/{args.dataset}/{args.dataset}.txt', args.degree_as_tag)
        
        ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
        train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)
        
        criterion = nn.CrossEntropyLoss()
        get_labels = lambda batch_graph, model: torch.LongTensor([graph.label for graph in batch_graph])

    model = GraphCNN(args.num_layers, args.num_mlp_layers, train_graphs[0].node_features.shape[1], args.hidden_dim, num_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, args.random, node_classification, device).to(device)

    if args.opt == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
    elif args.opt == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.epochs, gamma=0.5)

    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        
        avg_loss = train(args, model, device, train_graphs, optimizer, criterion, get_labels, epoch)
        acc_train, acc_test = test(args, model, device, train_graphs, test_graphs, num_classes, get_labels, epoch)

        if not args.filename == "":
            with open(args.filename, 'w') as f:
                f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
                f.write("\n")
        print("")

        print(model.eps)
Пример #9
0
def main():
    # Training settings
    # Note: Check experiment scripts for hyperparameters
    parser = argparse.ArgumentParser(description='PyTorch graph convolutional\
                                                  neural net for whole-graph \
                                                  classification')
    parser.add_argument('--dataset',
                        type=str,
                        default="MUTAG",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--device',
                        type=str,
                        default="0",
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument(
        '--iters_per_epoch',
        type=int,
        default=50,
        help='number of iterations per each epoch (default: 50)')
    parser.add_argument('--epochs',
                        type=int,
                        default=350,
                        help='number of epochs to train (default: 350)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed for splitting the dataset into 10\
                              (default: 0)')
    parser.add_argument('--fold_idx',
                        type=int,
                        default=0,
                        help='the index of fold in 10-fold validation. \
                              Should be less then 10.')
    parser.add_argument('--num_layers',
                        type=int,
                        default=5,
                        help='number of layers INCLUDING the input one \
                              (default: 5)')
    parser.add_argument(
        '--num_mlp_layers',
        type=int,
        default=2,
        help='number of layers for MLP EXCLUDING the input one \
                              (default: 2). 1 means linear model.')
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=64,
                        help='number of hidden units (default: 64)')
    parser.add_argument('--final_dropout',
                        type=float,
                        default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument(
        '--graph_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average"],
        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument('--neighbor_pooling_type',
                        type=str,
                        default="sum",
                        choices=["sum", "average", "max"],
                        help='Pooling for over neighboring nodes: sum, average\
                              or max')
    parser.add_argument('--learn_eps',
                        action="store_true",
                        help='Whether to learn the epsilon \
                                              weighting for the center nodes.')
    parser.add_argument('--degree_as_tag',
                        action="store_true",
                        help='let the input node features be the degree of \
                              nodes (heuristics for unlabeled graph)')
    parser.add_argument('--filename', type=str, default="", help='output file')
    parser.add_argument('--bn',
                        type=bool,
                        default=True,
                        help="Enable batchnorm\
                                                               for MLP")
    parser.add_argument('--gbn',
                        type=bool,
                        default=True,
                        help="Enable \
                                                    batchnorm for graph")
    parser.add_argument('--corrupt_label',
                        action="store_true",
                        help="Enable label corruption")
    parser.add_argument('--N',
                        type=str,
                        default="",
                        help="Label noise configuration N. \
                              Should be passed as a flattened\
                               string with row order or a single\
                                value for symmetrix noise config.")
    parser.add_argument('--denoise',
                        type=str,
                        default="",
                        choices=["estimate", "anchors", "exact"],
                        help="Method to recover the noise matrix C.")
    parser.add_argument('--correction',
                        type=str,
                        default="backward",
                        choices=["backward", "forward", "compound"],
                        help="Type of loss correction function.")
    parser.add_argument('--anchors',
                        type=str,
                        default="",
                        help="List of representative train data.")
    parser.add_argument('--est_mode',
                        default="max",
                        choices=["max", "min"],
                        help="Type of estimator for C")
    parser.add_argument('--skip_new',
                        action="store_true",
                        help="Train new model for estimating noise")
    args = parser.parse_args()

    #set up seeds and gpu device
    torch.manual_seed(0)
    np.random.seed(0)
    if args.device != "cpu":
        device = torch.device("cuda:" + args.device)\
                 if torch.cuda.is_available() else torch.device("cpu")
    else:
        device = torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    graphs, num_classes = load_data(args.dataset, args.degree_as_tag)

    ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

    # Corrupt data
    if args.corrupt_label:
        assert len(args.N) != 0, "Need to pass noise matrix!"
        N = np.fromstring(args.N, sep=" ", dtype=float)
        if len(N) == 1:
            self_prob = N[0]
            N = np.ones((num_classes, num_classes)) * \
                ((1 - self_prob) / (num_classes-1))
            np.fill_diagonal(N, self_prob)
            # Note: this could potentially cause some numerical problem
        elif len(N) == num_classes**2:
            N = N.reshape(num_classes, -1)
        else:
            raise ValueError("N needs to be a single value or square matrix.")
        print("Corrupting training label with N:")
        print(N)
        train_graphs = corrupt_label(train_graphs, N)

    if args.denoise != "exact":
        model = GraphCNN(args.num_layers, args.num_mlp_layers,
                         train_graphs[0].node_features.shape[1],
                         args.hidden_dim, num_classes, args.final_dropout,
                         args.learn_eps, args.graph_pooling_type,
                         args.neighbor_pooling_type, device, args.bn,
                         args.gbn).to(device)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=50,
                                              gamma=0.5)

        for epoch in range(1, args.epochs + 1):
            scheduler.step()

            avg_loss = train(args, model, device, train_graphs, optimizer,
                             epoch)
            acc_train, acc_test = test(args, model, device, train_graphs,
                                       test_graphs, epoch)

            if not args.filename == "":
                with open(args.filename, 'w') as f:
                    f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
                    f.write("\n")
            print("")

            print(model.eps)
    else:
        model = None

    if args.denoise in ["estimate", "anchors", "exact"]:
        C = None
        anchors = None
        if args.denoise == "estimate" or args.denoise == "anchors":
            anchors = _parse_anchors(args.anchors, train_graphs)
            C = estimate_C(model,
                           train_graphs,
                           anchors,
                           est_mode=args.est_mode)
        elif args.denoise == "exact":
            C = estimate_C(model, train_graphs, anchors, N)

        criterion = None
        if args.correction == "backward":
            criterion = lambda x, y: backward_correction(
                x, y, C, device, model.num_classes)
        elif args.correction == "forward":
            criterion = lambda x, y: forward_correction_xentropy(
                x, y, C, device, model.num_classes)
        elif args.correction == "compound":
            criterion = lambda x, y: compound_correction(
                x, y, C, device, model.num_classes)
        del model
        if not args.skip_new:
            print("Training new denoising model")
            model = GraphCNN(args.num_layers, args.num_mlp_layers,
                             train_graphs[0].node_features.shape[1],
                             args.hidden_dim, num_classes, args.final_dropout,
                             args.learn_eps, args.graph_pooling_type,
                             args.neighbor_pooling_type, device, args.bn,
                             args.gbn).to(device)
            optimizer = optim.Adam(model.parameters(), lr=args.lr)
            scheduler = optim.lr_scheduler.StepLR(optimizer,
                                                  step_size=50,
                                                  gamma=0.5)
            for epoch in range(1, args.epochs + 1):
                scheduler.step()
                avg_loss = train(args, model, device, train_graphs, optimizer,
                                 epoch, criterion)
                acc_train, acc_test = test(args, model, device, train_graphs,
                                           test_graphs, epoch)
                if not args.filename == "":
                    with open(args.denoise+'_'+args.correction+'_'+args.est_mode\
                              +'_'+args.filename, 'w') as f:
                        f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
                        f.write("\n")
                print("")
                print(model.eps)
Пример #10
0
def main():
    #
    #set up seeds and gpu device
    torch.manual_seed(0)
    np.random.seed(0)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    dataset = 'MUTAG'  #  'IMDBMULTI', 'COLLAB', 'NCI1' # 'REDDITBINARY' # 'MUTAG'  # 'PTC' 'PROTEINS'# 'MUTAG'
    degree_as_tag = False
    graphs, num_classes = load_data(dataset, degree_as_tag)
    print(dataset)
    ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    accuracies = {}
    for fold_id in range(10):
        accuracies[fold_id] = 0
        train_graphs, test_graphs = separate_data(graphs, 0, fold_id)

        hidden_dim = 32
        final_dropout = 0.5
        learn_eps = False
        graph_pooling_type = 'sum'
        neighbor_pooling_type = 'sum'  # 'attn1' 'attn2' 'sum' 'average' 'max'
        device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")
        # device = torch.device('cpu')

        model = GPN(2, 2, train_graphs[0].node_features.shape[1], hidden_dim,
                    num_classes)
        model.to(device)

        optimizer = optim.Adam(model.parameters(), lr=0.005)
        # optimizer = optim.RMSprop(model.parameters(), lr=0.005)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=50,
                                              gamma=0.5)

        epochs = 100
        for epoch in range(1, epochs + 1):
            scheduler.step()
            from collections import namedtuple
            ARGS = namedtuple('ARGS', ['batch_size', 'iters_per_epoch'])
            args = ARGS(batch_size=128, iters_per_epoch=50)
            avg_loss = train(args, model, device, train_graphs, optimizer,
                             epoch)
            acc_train, acc_test = model_test(args, model, device, train_graphs,
                                             test_graphs, epoch)
            print('epoch:{} acc_train: {}, acc_test: {}'.format(
                epoch, acc_train, acc_test))
            # if not args.filename == "":
            #     with open(args.filename, 'w') as f:
            #         f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
            #         f.write("\n")
            # print("")
            accuracies[fold_id] = max(accuracies[fold_id], acc_test)
            print('fold_id: {} current max acc test {}'.format(
                fold_id, accuracies[fold_id]))
            # print(model.eps)

    print(accuracies)
    print(np.mean(list(accuracies.values())),
          np.std(list(accuracies.values())))
Пример #11
0
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(description='PyTorch graph convolutional neural net for whole-graph classification')
    parser.add_argument('--dataset', type=str, default="NCI1",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--device', type=int, default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size', type=int, default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument('--iters_per_epoch', type=int, default=50,
                        help='number of iterations per each epoch (default: 50)')
    parser.add_argument('--epochs', type=int, default=300,   #defeat
                        help='number of epochs to train (default: 350)')
    parser.add_argument('--lr', type=float, default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--wl2', type=float, default=0.0,
                        help='learning rate (default: 0.0)')
    parser.add_argument('--seed', type=int, default=0,
                        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument('--fold_idx', type=int, default=0,
                        help='the index of fold in 10-fold validation. Should be less then 10.')
    parser.add_argument('--num_layers', type=int, default=6,
                        help='number of layers INCLUDING the input one (default: 5)')
    parser.add_argument('--num_mlp_layers', type=int, default=2,
                        help='number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.')
    parser.add_argument('--hidden_dim', type=int, default=64,
                        help='number of hidden units (default: 64)')
    parser.add_argument('--final_dropout', type=float, default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument('--graph_pooling_type', type=str, default="sum", choices=["sum", "average"],
                        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument('--neighbor_pooling_type', type=str, default="sum", choices=["sum", "average", "max"],
                        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument('--learn_eps', action="store_true",
                                        help='Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.')
    parser.add_argument('--degree_as_tag', action="store_true",
    					help='let the input node features be the degree of nodes (heuristics for unlabeled graph)')
    parser.add_argument('--filename', type = str, default = "yanzheng-MUTAG_sumsoftmax",
                                        help='output file')
    parser.add_argument('--attention', type=bool, default=True,  #defeault false
                       help='if attention,defeaut:False')
    parser.add_argument('--tqdm', type=bool, default=False,
                        help='if use tqdm')
    parser.add_argument('--multi_head', type=int, default=1,  # defeat:3
                        help='if use tqdm')
    parser.add_argument('--sum_flag', type=int, default=1,     #defeatut :1
                        help='if 0: don;t sum')
    parser.add_argument('--inter', type=int, default=1,        #defeult :0
                        help='if 0: not do unteraction in attention')

    parser.add_argument('--dire_sigmod', type=int, default=0,  # defeult :0
                        help='if 0: do softmax in dire attention,else if 1: sigmod')

    parser.add_argument('--attention_type', type=str, default="mlp-sigmod",
                        help='attention type:dire(sum or not), mlp-softmax , mlp-sigmod')
    args = parser.parse_args()
    writer_path = args.filename + 'TBX'
    writer = SummaryWriter(log_dir=writer_path)
    ISOTIMEFORMAT = '%Y-%m-%d-%H-%M'
    theTime = datetime.datetime.now().strftime(ISOTIMEFORMAT)
    save_fold_path = "result/save_model/" + args.filename + str(theTime)  # result save path

    #set up seeds and gpu device
    print("lr:",args.lr)
    print("attention: ",args.attention)
    print("attention_type:",args.attention_type)
    print("sum_flag:",args.sum_flag)
    print("inter:",args.inter)
    print("filename:",args.filename)
    if args.attention == True:   # if do attention we need sum  graph pool information
        args.graph_pooling_type = 'sum'
    print("data sets:",args.dataset)
    print("degree as tag:",args.degree_as_tag)
    print("flod_idx:",args.fold_idx)
    if args.sum_flag == 1:
        print("if use  directly attention is sum attention ,besides use sigmod attention model")

    f = open(args.filename+"_train", 'w')

    torch.manual_seed(0)
    np.random.seed(0)
    device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    graphs, num_classes = load_data(args.dataset, args.degree_as_tag)

    ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

    model = GraphCNN(args.num_layers, args.num_mlp_layers, train_graphs[0].node_features.shape[1], args.hidden_dim, num_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, device,attention=args.attention,multi_head=args.multi_head,sum_flag=args.sum_flag,inter=args.inter,attention_type=args.attention_type,dire_sigmod=args.dire_sigmod).to(device)
     
    model.load_state_dict(torch.load("result/save_model/meta-guass-pow1062019-07-15-13-48_last.pt"))
    model.eval()
    # acc_train, acc_test = ftest(args, model, device, train_graphs, test_graphs, epoch)
    '''
    here to decide which dataset to be test!!
    '''
    test_graphs = train_graphs#train_graphs#train_graphs
    output = pass_data_iteratively(model, test_graphs)
    pred = output.max(1, keepdim=True)[1]
    labels = torch.LongTensor([graph.label for graph in test_graphs]).to(device)
    correct = pred.eq(labels.view_as(pred)).sum().cpu().item()
    acc_test = correct / float(len(test_graphs))
    print("acc test: ",acc_test*100)
Пример #12
0
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(
        description=
        'PyTorch graph convolutional neural net for whole-graph classification'
    )
    parser.add_argument('--dataset',
                        type=str,
                        default="PROTEINS",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--device',
                        type=int,
                        default=1,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument(
        '--iters_per_epoch',
        type=int,
        default=50,
        help='number of iterations per each epoch (default: 50)')
    parser.add_argument('--epochs',
                        type=int,
                        default=350,
                        help='number of epochs to train (default: 350)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-2,
                        help='learning rate (default: 0.01)')
    # parser.add_argument('--lr_decay', type=float, default=0.8,
    #                     help='learning rate decay with epochs')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument(
        '--fold_idx',
        type=int,
        default=0,
        help='the index of fold in 10-fold validation. Should be less then 10.'
    )
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of layers INCLUDING the input one (default: 5)')
    parser.add_argument(
        '--num_mlp_layers',
        type=int,
        default=2,
        help=
        'number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.'
    )
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=64,
                        help='number of hidden units (default: 64)')
    parser.add_argument(
        '--embedded_dim',
        type=int,
        default=4,
        help=
        'number of the embedding dimension using shortest path length to sources'
    )
    parser.add_argument('--orders',
                        type=int,
                        default=1,
                        help='number of neighbors order use in conv layers')
    parser.add_argument('--final_dropout',
                        type=float,
                        default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument(
        '--graph_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average"],
        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument(
        '--neighbor_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average", "max"],
        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument(
        '--learn_eps',
        action="store_true",
        help=
        'Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.'
    )
    parser.add_argument('--res_connection',
                        type=bool,
                        default=False,
                        help='whether add shortcut or res_connection')
    parser.add_argument(
        '--degree_as_tag',
        type=bool,
        default=True,
        help=
        'let the input node features be the degree of nodes (heuristics for unlabeled graph)'
    )
    parser.add_argument('--filename', type=str, default="", help='output file')
    parser.add_argument('--use_tb',
                        type=bool,
                        default=False,
                        help='use tensorboard to record loss')
    args = parser.parse_args()

    #       SOTA Performance Hyperparameters
    #   dataset    model    lr   batchsize   epoch   layers    dropout   orders    test_acc     train_acc
    #   PROTEINS   AGAT    5e-3    32        10~20     3        0.5        1        0.794           0.82
    #   PROTEINS   SPGNN   1e-3    32         ~30      2        0.5        2        0.771(15epochs)  ~0.77
    #   PROTEINS   SPGNN   1e-3    32         ~80      2        0.5        2        0.786            0.80
    #
    use_default = False
    if not use_default:
        args.dataset = 'PTC'
        args.device = 0
        args.batch_size = 32
        args.lr = 1e-2
        args.num_layers = 3
        args.num_mlp_layers = 3
        args.embedded_dim = 6
        args.hidden_dim = 32
        args.final_dropout = 0.5
        args.orders = 3
        args.degree_as_tag = True
        args.res_connection = True
        args.use_tb = True
        args.iters_per_epoch = 30

    model_ = AGCN

    #set up seeds and gpu device
    torch.manual_seed(0)
    np.random.seed(0)
    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    graphs, num_classes = load_data(
        args.dataset,
        args.degree_as_tag,
        args.embedded_dim,
    )
    # graphs, num_classes = load_pkl(args.dataset, args.degree_as_tag, args.embedded_dim)

    ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

    model = model_(args.num_layers, args.num_mlp_layers,
                   train_graphs[0].node_features.shape[1], args.hidden_dim,
                   num_classes, args.embedded_dim, args.final_dropout,
                   args.res_connection, device, args.orders).to(device)
    # model = AGAT(args.num_layers, args.num_mlp_layers, train_graphs[0].node_features.shape[1], args.hidden_dim, num_classes, args.embedded_dim, args.final_dropout, args.res_connection, device).to(device)
    # model = GIN(args.num_layers, args.num_mlp_layers, train_graphs[0].node_features.shape[1], args.hidden_dim, num_classes, args.final_dropout, args.res_connection, device).to(device)

    # pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(model)
    writer = SummaryWriter(log_dir=log_path) if args.use_tb else None

    count_step_size = 15
    optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.75)

    acc_test_meter = AverageValueMeter()
    for epoch in range(1, args.epochs + 1):
        scheduler.step()

        avg_loss = train(args, model, writer, device, train_graphs, optimizer,
                         epoch)
        acc_train, acc_test = test(args, model, writer, device, train_graphs,
                                   test_graphs, epoch)

        acc_test_meter.add(acc_test)

        if epoch % count_step_size == 0:
            print('------last {} epochs test acc:{}'.format(
                count_step_size,
                acc_test_meter.value()[0]))
            acc_test_meter.reset()

        if not args.filename == "":
            with open(args.filename, 'w') as f:
                f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
                f.write("\n")
        print("")
Пример #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        type=str,
                        default="PROTEINS",
                        help='name of dataset')
    parser.add_argument(
        '--mod',
        type=str,
        default="f-scaled",
        choices=["origin", "additive", "scaled", "f-additive", "f-scaled"],
        help='model to be used: origin, additive, scaled, f-additive, f-scaled'
    )
    parser.add_argument('--seed', type=int, default=809, help='random seed')
    parser.add_argument('--epochs',
                        type=int,
                        default=300,
                        help='number of epochs to train')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-2,
                        help='initial learning rate')
    parser.add_argument('--wd',
                        type=float,
                        default=1e-3,
                        help='weight decay value')
    parser.add_argument('--n_layer',
                        type=int,
                        default=4,
                        help='number of hidden layers')
    parser.add_argument('--hid',
                        type=int,
                        default=32,
                        help='size of input hidden units')
    parser.add_argument('--heads',
                        type=int,
                        default=1,
                        help='number of attention heads')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.0,
                        help='dropout rate')
    parser.add_argument('--alpha',
                        type=float,
                        default=0.2,
                        help='alpha for the leaky_relu')
    parser.add_argument('--kfold',
                        type=int,
                        default=10,
                        help='number of kfold')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='batch size')
    parser.add_argument('--readout',
                        type=str,
                        default="add",
                        choices=["add", "mean"],
                        help='readout function: add, mean')
    args = parser.parse_args()

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    set_seed(args.seed)

    path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data',
                    args.dataset)
    dataset = TUDataset(path, name=args.dataset,
                        pre_transform=Constant()).shuffle()

    train_graphs, test_graphs = separate_data(len(dataset), args.kfold)

    kfold_num = args.kfold
    print('Dataset:', args.dataset)
    print('# of graphs:', len(dataset))
    print('# of classes:', dataset.num_classes)

    test_acc_values = torch.zeros(kfold_num, args.epochs)

    for idx in range(kfold_num):
        print(
            '============================================================================='
        )
        print(kfold_num, 'fold cross validation:', idx + 1)

        idx_train = train_graphs[idx]
        idx_test = test_graphs[idx]

        train_dataset = dataset[idx_train]
        test_dataset = dataset[idx_test]
        train_loader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  worker_init_fn=args.seed)
        test_loader = DataLoader(test_dataset, batch_size=args.batch_size)

        t_start = time.time()
        best_epoch = 0

        config = Config(mod=args.mod,
                        nhid=args.hid,
                        nclass=dataset.num_classes,
                        nfeat=dataset.num_features,
                        dropout=args.dropout,
                        heads=args.heads,
                        alpha=args.alpha,
                        n_layer=args.n_layer,
                        readout=args.readout)

        model = CPA(config).to(device)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd,
                               amsgrad=False)
        scheduler = MultiStepLR(
            optimizer,
            milestones=[50, 100, 150, 200, 250, 300, 350, 400, 450, 500],
            gamma=0.5)

        for epoch in range(args.epochs):
            train_loss = train(model, train_loader, optimizer, device)
            train_acc = test(model, train_loader, device)
            test_acc = test(model, test_loader, device)
            test_acc_values[idx, epoch] = test_acc
            scheduler.step()

            print('Epoch {:03d}'.format(epoch + 1),
                  'train_loss: {:.4f}'.format(train_loss),
                  'train_acc: {:.4f}'.format(train_acc),
                  'test_acc: {:.4f}'.format(test_acc))

        print("Optimization Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_start))

    print(
        '============================================================================='
    )
    mean_test_acc = torch.mean(test_acc_values, dim=0)
    best_epoch = int(torch.argmax(mean_test_acc).data)
    print('Best Epoch:', best_epoch + 1)
    print('Best Testing Accs:')
    for i in test_acc_values[:, best_epoch]:
        print('{:0.4f},'.format(i.item()), end='')
    print('\n')
    print('Averaged Best Testing Acc:')
    print('{:0.4f}'.format(mean_test_acc[best_epoch].item()))
Пример #14
0
    1: 'orange',
    2: 'yellow',
    3: 'green',
    4: 'lime',
    5: 'blue',
    6: 'magenta'
}
#
#cmap = plt.cm.rainbow
#norm = matplotlib.colors.Normalize(vmin=0, vmax=5)

g_att = []
i = 0
np.random.seed(0)
graphs, num_classes = load_data("NCI1", False)
train_graphs, test_graphs = separate_data(graphs, 0, 0)
Gs = train_graphs[0:64]
load_path = input("input which file that you want read:")
att5 = np.load(load_path)
for g in Gs:
    n = g.g.number_of_nodes()
    g_att.append(list(att5[0, i:i + n]))
    i = i + n

i = 0
while (1):
    se = input("inout your select:")
    print(se)
    if se == "w":
        i = min(len(Gs) - 1, i + 1)
    else:
Пример #15
0
    "fold_idx": 0,
    "num_layers": 5,
    "num_mlp_layers": 2,
    "hidden_dim": 64,
    "final_dropout": 0.5,
    "graph_pooling_type": 'sum',
    "neighbor_pooling_type": 'sum',
    "learn_eps": 'store_true',
    'degree_as_tag': 'store_true',
    'filename': 'output.txt'
})

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

graphs, num_classes = load_data(args.dataset, args.degree_as_tag)
train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)
labels = tf.constant([graph.label for graph in train_graphs])

model = GraphCNN(args.num_layers, args.num_mlp_layers, args.hidden_dim,
                 num_classes, args.final_dropout, args.learn_eps,
                 args.graph_pooling_type, args.neighbor_pooling_type)

optimizer = tf.keras.optimizers.Adam(lr=args.lr)


#def train(loss,model,opt,original):
def train(args, model, train_graphs, opt, epoch):
    total_iters = args.iters_per_epoch
    pbar = tqdm(range(total_iters), unit='batch')

    loss_accum = 0
Пример #16
0
from sklearn.metrics import roc_auc_score
from sklearn.cross_validation import StratifiedKFold, cross_val_score

data_dir = '../../data'
cleaned_file = 'trauma_los_cleaned.csv'
extract_file = 'trauma_los_cleaned_extract.csv'
desired_headings = ['sex', 'normalvitals', 'gcs1', 'iss8', 'age65', 'transfer',
    'penetrating', 'mechcode', 'bodyregions', 'headany', 'faceany', 'neckany',
    'chestany', 'abdoany', 'spineany', 'upperlimbany', 'lowerlimbany', 'head3',
    'face3', 'neck3', 'chest3', 'abdo3', 'spine3', 'upper3', 'lower3',
    'operation', 'neurosurgery', 'laparotomy', 'thoracotomy', 'married',
    'english', 'mentalhealth', 'comorbidity', 'ssa']

# trim the input file into only the features we want to use
extract_features(data_dir, cleaned_file, extract_file, desired_headings)
X, y, headings = separate_data(True, data_dir, extract_file)
# convert all strings to ints
X, y = map(lambda x:list(map(int, x)), X), map(int, y)
X, y = np.array(list(X)), np.array(list(y))
n_samples, n_features = X.shape

# create a stratified 10-fold cross-validation iterator that generates the
# indices for us
cv = StratifiedKFold(y=y, n_folds=10, shuffle=True)

# create a support vector machine classifier
clf_svm = svm.SVC(kernel='linear', probability=True)

# create a Gaussian naive Bayes classifier
clf_gauss_nb = GaussianNB()
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(
        description=
        'PyTorch graph convolutional neural net for whole-graph classification'
    )
    parser.add_argument('--dataset',
                        type=str,
                        default="DPGraphGAN_Resampled_IMDB_MULTI",
                        help='name of dataset')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument(
        '--iters_per_epoch',
        type=int,
        default=50,
        help='number of iterations per each epoch (default: 50)')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        help='number of epochs to train (default: 350)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument(
        '--fold_idx',
        type=int,
        default=0,
        help='the index of fold in 10-fold validation. Should be less then 10.'
    )
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of layers INCLUDING the input one (default: 5)')
    parser.add_argument(
        '--num_mlp_layers',
        type=int,
        default=2,
        help=
        'number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.'
    )
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=64,
                        help='number of hidden units (default: 64)')
    parser.add_argument('--final_dropout',
                        type=float,
                        default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument(
        '--graph_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average"],
        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument(
        '--neighbor_pooling_type',
        type=str,
        default="sum",
        choices=["sum", "average", "max"],
        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument(
        '--learn_eps',
        action="store_true",
        help=
        'Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.'
    )
    parser.add_argument(
        '--degree_as_tag',
        type=int,
        default=1,
        help=
        'let the input node features be the degree of nodes (heuristics for unlabeled graph)'
    )
    parser.add_argument('--filename',
                        type=str,
                        default="log.txt",
                        help='output file')
    args = parser.parse_args()

    #set up seeds and gpu device
    torch.manual_seed(0)
    np.random.seed(0)
    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    graphs, num_classes = load_data(args.dataset, args.degree_as_tag)

    ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

    model = GraphCNN(args.num_layers, args.num_mlp_layers,
                     train_graphs[0].node_features.shape[1], args.hidden_dim,
                     num_classes, args.final_dropout, args.learn_eps,
                     args.graph_pooling_type, args.neighbor_pooling_type,
                     device).to(device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    best_acc_test = 0
    file_path = dir_path + '/logs/' + args.dataset + '_' + args.filename
    if os.path.exists(file_path):
        os.remove(file_path)
    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        avg_loss = train(args, model, device, train_graphs, optimizer, epoch)
        acc_train, acc_test = test(args, model, device, train_graphs,
                                   test_graphs, epoch)
        print("%f %f %f" % (avg_loss, acc_train, acc_test))

        if acc_test > best_acc_test:
            with open(file_path, 'a') as f:
                f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
                f.write("\n")
                best_acc_test = acc_test
        print("")

        print(model.eps)
Пример #18
0
def main():
    # Training settings
    # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
    parser = argparse.ArgumentParser(description='PyTorch graph convolutional neural net for whole-graph classification')
    parser.add_argument('--dataset', type=str, default="MUTAG",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--device', type=int, default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size', type=int, default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument('--iters_per_epoch', type=int, default=50,
                        help='number of iterations per each epoch (default: 50)')
    parser.add_argument('--epochs', type=int, default=50,
                        help='number of epochs to train (default: 350)')
    parser.add_argument('--lr', type=float, default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--seed', type=int, default=0,
                        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument('--fold_idx', type=int, default=0,
                        help='the index of fold in 10-fold validation. Should be less then 10.')
    parser.add_argument('--num_layers', type=int, default=5,
                        help='number of layers INCLUDING the input one (default: 5)')
    parser.add_argument('--num_mlp_layers', type=int, default=2,
                        help='number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.')
    parser.add_argument('--hidden_dim', type=int, default=64,
                        help='number of hidden units (default: 64)')
    parser.add_argument('--final_dropout', type=float, default=0.5,
                        help='final layer dropout (default: 0.5)')
    parser.add_argument('--graph_pooling_type', type=str, default="sum", choices=["sum", "average"],
                        help='Pooling for over nodes in a graph: sum or average')
    parser.add_argument('--neighbor_pooling_type', type=str, default="sum", choices=["sum", "average", "max"],
                        help='Pooling for over neighboring nodes: sum, average or max')
    parser.add_argument('--learn_eps', action="store_true",
                                        help='Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.')
    parser.add_argument('--degree_as_tag', action="store_true",
    					help='let the input node features be the degree of nodes (heuristics for unlabeled graph)')
    parser.add_argument('--filename', type = str, default = "",
                                        help='output file')
    parser.add_argument('--bn', type=bool, default=True, help="Enable batchnorm for MLP")
    parser.add_argument('--gbn', type=bool, default=True, help="Enable batchnorm for graph")
    parser.add_argument('--corrupt_label', action="store_true",
                        help="Enable label corruption")
    parser.add_argument('--T', type=str, default="",
                        help="Label noise configuration T. Should be passed as a flattened string with row order or a single value for symmetrix noise config.")
    args = parser.parse_args()

    if args.dataset in {"MUTAG", "ENZYMES"}:
        load_data = load_tud_data

    if args.dataset in {"BIPARTITE"}:
        load_data = gen_bipartite 

    #set up seeds and gpu device
    torch.manual_seed(0)
    np.random.seed(0)    
    device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    if args.dataset in {"BIPARTITE"}:
        graphs, num_classes = load_data(200, perm_frac=0.0, p=0.2)
    else:
        graphs, num_classes = load_data(args.dataset, args.degree_as_tag)

    acc = []
    ##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
    for fid in range(10):
        train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)
         
        model = GraphCNN(args.num_layers, args.num_mlp_layers, train_graphs[0].node_tags.shape[1], args.hidden_dim, num_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, device, args.bn, args.gbn).to(device)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

        for epoch in range(1, args.epochs + 1):

            avg_loss = train(args, model, device, train_graphs, optimizer, epoch)
            scheduler.step()
            acc_train, acc_test = test(args, model, device, train_graphs, test_graphs, epoch)

            if not args.filename == "":
                with open(args.filename, 'w') as f:
                    f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
                    f.write("\n")
            # print("")

            # print(model.eps)
        acc.append(acc_test)

    print(np.mean(acc), np.std(acc))