Exemplo n.º 1
0
def run(is_train, hidden, data_name, random_state, use_cuda, save_path):

    if is_train is False and save_path is None:
        raise RuntimeError("There is no file for test.")

    device = "cpu"
    if use_cuda:
        device = "cuda"

    # Load Train data
    train_data = load_data(data_name=data_name,
                           train=True,
                           random_state=random_state)
    dim_features = train_data[0].shape[1]
    num_labels = max(train_data[2]).item() + 1

    model = GCN(dim_features, hidden, num_labels)

    # Train Phase
    if is_train:
        print("---Training Start---")
        train(model, train_data, device, save_path)
        print("---Training Done--- \n")

    if save_path is not None:
        # Load model
        model.load_state_dict(torch.load(save_path))

    # Test Phase
    print("---Test Start---")
    test_data = load_data(data_name=data_name, train=False, random_state=42)
    print("Test Accuracy: %2.2f %%" % get_acc(model, test_data, device))
    print("---Test Done--- \n")
Exemplo n.º 2
0
def main():
    data_generator = DataGenerator(args)
    meta_model = GCN(nfeat=args.in_f_d,
                     nhid=args.hidden,
                     nclass=args.nclasses,
                     dropout=args.dropout).to(device)
    proto_model = GCN_Proto(args, nfeat=args.hidden, dropout=args.dropout).to(device)
    structure_model = GCN_Structure(args, nfeat=args.hidden, nhid=args.structure_dim, dropout=args.dropout).to(
        device)

    if args.train:
        meta_optimiser = torch.optim.Adam(
            list(meta_model.parameters()) + list(proto_model.parameters()) + list(structure_model.parameters()),
            lr=args.meta_lr, weight_decay=args.weight_decay)
        train(args, meta_model, meta_optimiser, proto_model, structure_model,
              metatrain_iterations=args.metatrain_iterations,
              data_generator=data_generator, fit_function=meta_gradient_step,
              fit_function_kwargs={'train': True, 'inner_train_steps': args.inner_train_steps,
                                   'inner_lr': args.inner_lr, 'batch_n': args.batch_n, 'device': device})

    else:
        if args.test_load_epoch > 0:
            meta_model.load_state_dict(
                torch.load(args.logdir + '/' + exp_string + '/' + 'model_epoch_{}'.format(args.test_load_epoch)))
            proto_model.load_state_dict(
                torch.load(
                    args.logdir + '/' + exp_string + '/' + 'proto_model_epoch_{}'.format(args.test_load_epoch)))
            structure_model.load_state_dict(
                torch.load(
                    args.logdir + '/' + exp_string + '/' + 'structure_model_epoch_{}'.format(
                        args.test_load_epoch)))
        meta_optimiser = torch.optim.Adam(list(meta_model.parameters()) + list(proto_model.parameters()),
                                          lr=args.meta_lr, weight_decay=args.weight_decay)
        evaluate(args, meta_model, meta_optimiser, proto_model, structure_model, data_generator=data_generator,
                 fit_function=meta_gradient_step,
                 fit_function_kwargs={'train': False, 'inner_train_steps': args.inner_train_steps,
                                      'inner_lr': args.inner_lr_test, 'batch_n': args.test_sample_g_n,
                                      'device': device})
Exemplo n.º 3
0
            else:
                bad_counter += 1

            if bad_counter >= args.early_stopping and loss_val < args.loss_threshold:
                print("Early stopping...")
                break

    print("Optimization Finished!")
    total_time = time.time() - t_total
    mean_time = np.mean(epoch_time_list)
    print("Total time elapsed: {:.4f}s".format(total_time))
    print("Time per epoch: {:.4f}s".format(mean_time))

    if args.early_stopping and args.save:
        print('Loading {}th epoch'.format(best_epoch))
        model.load_state_dict(torch.load('{}.pkl'.format(run_id)))

    # Testing
    acc = test()
    valacc_list.append(acc_val)
    acc_list.append(acc)
    total_time_list.append(total_time)
    mean_time_list.append(mean_time)
avgvalacc = np.mean(valacc_list)
avgacc = np.mean(acc_list)
avg_total_time = np.mean(total_time_list)
avg_mean_time = np.mean(mean_time_list)
stdvalacc = np.std(valacc_list)
stdacc = np.std(acc_list)

print(
Exemplo n.º 4
0
            epoch_nb = int(file.split('.')[0])
            if epoch_nb < best_epoch:
                os.remove(file)

files = glob.glob('*.pkl')
for file in files:
    epoch_nb = int(file.split('.')[0])
    if epoch_nb > best_epoch:
        os.remove(file)

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(torch.load('{}.pkl'.format(best_epoch)))

# Testing
acc_sum = 0
loss_sum = 0
with torch.no_grad():
    model.eval()
    for subgraph in content_g:
        index = content_g[subgraph]['index_subgraph']
        idx_test = content_g[subgraph]['idx_test']
        adj = content_g[subgraph]['adj']
        adj = torch.FloatTensor(np.array(adj.todense()))
        if args.cuda:
            adj = adj.cuda()
        output = model(features[index], adj)
        labels_test = labels[index]
Exemplo n.º 5
0
    node_heat_map = np.array(node_heat_map[:mol.GetNumAtoms()]).reshape(-1, 1)
    pos_node_heat_map = MinMaxScaler(feature_range=(0, 1)).fit_transform(
        node_heat_map * (node_heat_map >= 0)).reshape(-1, )
    neg_node_heat_map = MinMaxScaler(feature_range=(-1, 0)).fit_transform(
        node_heat_map * (node_heat_map < 0)).reshape(-1, )
    return pos_node_heat_map + neg_node_heat_map


dataset = load_bbbp(hp.N)
random.Random(hp.shuffle_seed).shuffle(dataset)
split_idx = int(np.floor(len(dataset) * hp.train_frac))
test_dataset = dataset[split_idx:]
loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(hp.H_0, hp.H_1, hp.H_2, hp.H_3).to(device)
model.load_state_dict(torch.load('gcn_state_dict.pt'))
model.eval()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

print(model)
model.train()
total_loss = 0
for data in tqdm(loader):
    # breakpoint()
    data = data.to(device)
    optimizer.zero_grad()
    out = model(data)
    loss = F.binary_cross_entropy(out, data.y)
    loss.backward()
    try:
Exemplo n.º 6
0
    torch.cuda.manual_seed(args.seed)

# Load data
#adj, features, labels, idx_train, idx_val, idx_test = load_data()
adj, A_tilde, adj_sct1, adj_sct2, adj_sct4, features, labels, idx_train, idx_val, idx_test = load_citation(
    args.dataset, args.normalization, args.cuda)
# Model and optimizer
model = GCN(nfeat=features.shape[1],
            para3=args.hid1,
            para4=args.hid2,
            nclass=labels.max().item() + 1,
            dropout=args.dropout,
            smoo=args.smoo)

PATH = "state_dict_model.pt"
model.load_state_dict(torch.load(PATH))
if args.cuda:
    model = model.cuda()
    features = features.cuda()
    A_tilde = A_tilde.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)
scheduler = StepLR(optimizer, step_size=50, gamma=0.9)
Exemplo n.º 7
0
parser.add_argument('n_bits', type=int, default=32)
args = parser.parse_args()

n_anchor = 1000
n_bits =  args.n_bits
n_class = 21
n_epoch = 10
topk = 15
# dataset: 'cifar10', 'nuswide', 'ImageNet', 'sun'
dataset = 'nuswide'
dset = load_data(dataset)

meta = torch.load('nuswide_2000_32_0.4454_0.5912')
anchor = meta['anchor']
gcn = GCN(500, n_bits, n_class, meta['anchor_affnty'], 40)
gcn.load_state_dict(meta['state_dict'])
gcn.cuda()

test_loader = data.DataLoader(dataset=db(dset.testdata, dset.testlabel),
                              batch_size=100,
                              shuffle=False,
                              num_workers=4)
tH = []
gcn.eval()
for images, _ in test_loader:
    in_aff, out_aff = rbf_affnty(images, anchor, topk=topk)
    images = Variable(images).cuda()
    in_aff = Variable(in_aff).cuda()
    out_aff = Variable(out_aff).cuda()

    out, _ = gcn(images, in_aff, out_aff)
Exemplo n.º 8
0
normalise = softmax_normalisation  #manual entry

# Model and optimizer
model = GCN(dims=dims,
            dropout=dropout,
            adj=adj,
            nrm_mthd=nrm_mthd,
            learnable=blearnable,
            projection=bprojection)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
#results on the untrained version of the model.
#model(features)
#over_smoothing(model.embeddings_dict)

checkpoint = torch.load(path + 'model-optimised.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

print(values.shape)
nrm = SphToAdj(indices, model.edge_weights.detach(), size)

print('-')
print(nrm[torch.where(indices[0] == 2)[0]])
print(nrm[torch.where(indices[0] == 3)[0]])
print(nrm[torch.where(indices[0] == 4)[0]])
print(nrm[torch.where(indices[0] == 5)[0]])
print(nrm[torch.where(indices[0] == 6)[0]])
print(nrm[torch.where(indices[0] == 7)[0]])
print(nrm[torch.where(indices[0] == 8)[0]])
print(nrm[torch.where(indices[0] == 9)[0]])
Exemplo n.º 9
0
def train_gcn(dataset,
              test_ratio=0.5,
              val_ratio=0.2,
              seed=1,
              n_hidden=64,
              n_epochs=200,
              lr=1e-2,
              weight_decay=5e-4,
              dropout=0.5,
              use_embs=False,
              verbose=True,
              cuda=False):
    data = dataset.get_data()
    # train text embs
    if use_embs:
        pad_ix, n_tokens, matrix, pretrained_embs = data['features']
        if pretrained_embs is not None:
            pretrained_embs = torch.FloatTensor(pretrained_embs)
        features = torch.LongTensor(matrix)
    else:
        pad_ix = None
        n_tokens = None
        pretrained_embs = None
        features = torch.FloatTensor(data['features'])

    labels = torch.LongTensor(data['labels'])
    n = len(data['ids'])
    train_mask, val_mask, test_mask = get_masks(n,
                                                data['main_ids'],
                                                data['main_labels'],
                                                test_ratio=test_ratio,
                                                val_ratio=val_ratio,
                                                seed=seed)

    train_mask = torch.BoolTensor(train_mask)
    val_mask = torch.BoolTensor(val_mask)
    test_mask = torch.BoolTensor(test_mask)

    if cuda:
        torch.cuda.set_device("cuda:0")
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = DGLGraph(data['graph'])
    g = dgl.transform.add_self_loop(g)
    n_edges = g.number_of_edges()

    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0

    if cuda:
        norm = norm.cuda()

    g.ndata['norm'] = norm.unsqueeze(1)

    if use_embs:
        if pretrained_embs is not None:
            in_feats = 100
        else:
            in_feats = 64
    else:
        in_feats = features.shape[1]

    # + 1 for unknown class
    n_classes = data['n_classes'] + 1
    model = GCN(g,
                in_feats=in_feats,
                n_hidden=n_hidden,
                n_classes=n_classes,
                activation=F.relu,
                dropout=dropout,
                use_embs=use_embs,
                pretrained_embs=pretrained_embs,
                pad_ix=pad_ix,
                n_tokens=n_tokens)

    if cuda:
        model.cuda()

    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.9,
                                                           patience=20,
                                                           min_lr=1e-10)

    best_f1 = -100
    # initialize graph
    dur = []
    for epoch in range(n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        mask_probs = torch.empty(features.shape).uniform_(0, 1)
        if cuda:
            mask_probs = mask_probs.cuda()

        mask_features = torch.where(mask_probs > 0.2, features,
                                    torch.zeros_like(features))
        logits = model(mask_features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        f1 = evaluate(model, features, labels, val_mask)
        scheduler.step(1 - f1)
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), 'best_model.pt')

        if verbose:
            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | F1 {:.4f} | "
                  "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur),
                                                loss.item(), f1,
                                                n_edges / np.mean(dur) / 1000))

    model.load_state_dict(torch.load('best_model.pt'))
    f1 = evaluate(model, features, labels, test_mask)

    if verbose:
        print()
        print("Test F1 {:.2}".format(f1))

    return f1
Exemplo n.º 10
0
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  (epoch + 1, n_epoch, i + 1, len(unlabeled_loader),
                   loss.data[0]))

torch.save(
    {
        'state_dict': gcn.state_dict(),
        'mean_val': mean_val,
        'anchor': anchor,
        'anchor_affnty': anchor_affnty
    }, './ImageNet_%d_%d' % (n_labeled, n_bits))
'''
model = torch.load('./ImageNet_%d_%d' % (n_labeled, n_bits))
print model.keys()
anchor = model['anchor']
anchor_affnty = model['anchor_affnty']
gcn = GCN(4096, n_bits, 1000, anchor_affnty, 40)
gcn.load_state_dict(model['state_dict'])
gcn.cuda()
'''

traindata, testdata = load_ImageNet_full(mean_val)

train_loader = data.DataLoader(dataset=db(traindata, None),
                               batch_size=100,
                               shuffle=False,
                               num_workers=4)
Exemplo n.º 11
0
def main(args):
    # convert boolean type for args
    assert args.use_ist in ['True', 'False'], ["Only True or False for use_ist, get ",
                                               args.use_ist]
    assert args.split_input in ['True', 'False'], ["Only True or False for split_input, get ",
                                                   args.split_input]
    assert args.split_output in ['True', 'False'], ["Only True or False for split_output, get ",
                                                   args.split_output]
    assert args.self_loop in ['True', 'False'], ["Only True or False for self_loop, get ",
                                                 args.self_loop]
    assert args.use_layernorm in ['True', 'False'], ["Only True or False for use_layernorm, get ",
                                                     args.use_layernorm]
    assert args.use_random_proj in ['True', 'False'], ["Only True or False for use_random_proj, get ",
                                                       args.use_random_proj]
    use_ist = (args.use_ist == 'True')
    split_input = (args.split_input == 'True')
    split_output = (args.split_output == 'True')
    self_loop = (args.self_loop == 'True')
    use_layernorm = (args.use_layernorm == 'True')
    use_random_proj = (args.use_random_proj == 'True')

    # make sure hidden layer is the correct shape
    assert (args.n_hidden % args.num_subnet) == 0

    # load and preprocess dataset
    global t0
    if args.dataset in {'cora', 'citeseer', 'pubmed'}:
        data = load_data(args)
    else:
        raise NotImplementedError(f'{args.dataset} is not a valid dataset')

    # randomly project the input to make it dense
    if use_random_proj:
        # densify input features with random projection
        from sklearn import random_projection

        # make sure input features are divisible by number of subnets
        # otherwise some parameters of the last subnet will be handled improperly
        n_components = int(data.features.shape[-1] / args.num_subnet) * args.num_subnet
        transformer = random_projection.GaussianRandomProjection(n_components=n_components)
        new_feature = transformer.fit_transform(data.features)
        features = torch.FloatTensor(new_feature)
    else:
        assert (data.features.shape[-1] % args.num_subnet) == 0.
        features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.sum().item(),
           val_mask.sum().item(),
           test_mask.sum().item()))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    g = g.to(device)
    n_edges = g.number_of_edges()
    
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    norm = norm.to(device)
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(
            g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
            args.dropout, use_layernorm)
    model = model.to(device)
    loss_fcn = torch.nn.CrossEntropyLoss()

    # initialize graph
    dur = []
    record = []
    sub_models = []
    opt_list = []
    sub_dict_list = []
    main_dict = None
    for epoch in range(args.n_epochs):
        if epoch >= 3:
            t0 = time.time()
        if use_ist:
            model.eval()
            # IST training:
            # Distribute parameter to sub networks
            num_subnet = args.num_subnet
            if (epoch % args.iter_per_site) == 0.:
                main_dict = model.state_dict()
                feats_idx = [] # store all layer indices within a single list

                # create input partition
                if split_input:
                    feats_idx.append(torch.chunk(torch.randperm(in_feats), num_subnet))
                else:
                    feats_idx.append(None)

                # create hidden layer partitions
                for i in range(1, args.n_layers):
                    feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet))

                # create output layer partitions
                if split_output:
                    feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet))
                else:
                    feats_idx.append(None)

            for subnet_id in range(args.num_subnet):
                if (epoch % args.iter_per_site) == 0.:
                    # create the sub model to train
                    sub_model = GCN(
                            g, in_feats, args.n_hidden, n_classes,
                            args.n_layers, F.relu, args.dropout, use_layernorm,
                            split_input, split_output, args.num_subnet) 
                    sub_model = sub_model.to(device)
                    sub_dict = main_dict.copy()

                    # split input params
                    if split_input:
                        idx = feats_idx[0][subnet_id]
                        sub_dict['layers.0.weight'] = main_dict['layers.0.weight'][idx, :]

                    # split hidden params (and output params)
                    for i in range(1, args.n_layers + 1):
                        if i == args.n_layers and not split_output:
                            pass # params stay the same 
                        else:
                            idx = feats_idx[i][subnet_id]
                            sub_dict[f'layers.{i - 1}.weight'] = sub_dict[f'layers.{i -1}.weight'][:, idx]
                            sub_dict[f'layers.{i - 1}.bias'] = main_dict[f'layers.{i - 1}.bias'][idx]
                            sub_dict[f'layers.{i}.weight'] = main_dict[f'layers.{i}.weight'][idx, :]

                    # use a lr scheduler
                    curr_lr = args.lr
                    if epoch >= int(args.n_epochs*0.5):
                        curr_lr /= 10
                    if epoch >= int(args.n_epochs*0.75):
                        curr_lr /= 10

                    # import params into subnet for training
                    sub_model.load_state_dict(sub_dict)
                    sub_models.append(sub_model)
                    sub_models = sub_models[-num_subnet:]
                    optimizer = torch.optim.Adam(
                            sub_model.parameters(), lr=curr_lr,
                            weight_decay=args.weight_decay)
                    opt_list.append(optimizer)
                    opt_list = opt_list[-num_subnet:]
                else:
                    sub_model = sub_models[subnet_id]
                    optimizer = opt_list[subnet_id]

                # train a sub network
                optimizer.zero_grad()
                sub_model.train()
                if split_input:
                    model_input = features[:, feats_idx[0][subnet_id]]
                else:
                    model_input = features
                logits = sub_model(model_input)
                loss = loss_fcn(logits[train_mask], labels[train_mask])

                # reset optimization for every sub training
                loss.backward()
                optimizer.step()

                # save sub model parameter
                if (
                        ((epoch + 1) % args.iter_per_site == 0.)
                        or (epoch == args.n_epochs - 1)):
                    sub_dict = sub_model.state_dict()
                    sub_dict_list.append(sub_dict)
                    sub_dict_list = sub_dict_list[-num_subnet:]

            # Merge parameter to main network:
            # force aggregation if training about to end
            if (
                    ((epoch + 1) % args.iter_per_site == 0.)
                    or (epoch == args.n_epochs - 1)):
                #keys = main_dict.keys()
                update_dict = main_dict.copy()

                # copy in the input parameters
                if split_input:
                    if args.n_layers <= 1 and not split_output:
                        for idx, sub_dict in zip(feats_idx[0], sub_dict_list):
                            update_dict['layers.0.weight'][idx, :] = sub_dict['layers.0.weight']
                    else:
                        for i, sub_dict in enumerate(sub_dict_list):
                            curr_idx = feats_idx[0][i]
                            next_idx = feats_idx[1][i]
                            correct_rows = update_dict['layers.0.weight'][curr_idx, :]
                            correct_rows[:, next_idx] = sub_dict['layers.0.weight']
                            update_dict['layers.0.weight'][curr_idx, :] = correct_rows
                else:
                    if args.n_layers <= 1 and not split_output:
                        update_dict['layers.0.weight'] = sum(sub_dict['layers.0.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list)
                    else:
                        for i, sub_dict in enumerate(sub_dict_list):
                            next_idx = feats_idx[1][i]
                            update_dict['layers.0.weight'][:, next_idx] = sub_dict['layers.0.weight']

                # copy the rest of the parameters
                for i in range(1, args.n_layers + 1):
                    if i == args.n_layers:
                        if not split_output:
                            update_dict[f'layers.{i-1}.bias'] = sum(sub_dict[f'layers.{i-1}.bias'] for sub_dict in sub_dict_list) / len(sub_dict_list)
                            update_dict[f'layers.{i}.weight'] = sum(sub_dict[f'layers.{i}.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list)
                        else:
                            for idx, sub_dict in zip(feats_idx[i], sub_dict_list):
                                update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias']
                                update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight']
                    else:
                        if i >= args.n_layers - 1 and not split_output:
                            for idx, sub_dict in zip(feats_idx[i], sub_dict_list):
                                update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias']
                                update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight']
                        else:
                            for idx, sub_dict in enumerate(sub_dict_list):
                                curr_idx = feats_idx[i][idx]
                                next_idx = feats_idx[i+1][idx]
                                update_dict[f'layers.{i-1}.bias'][curr_idx] = sub_dict[f'layers.{i-1}.bias']
                                correct_rows = update_dict[f'layers.{i}.weight'][curr_idx, :]
                                correct_rows[:, next_idx] = sub_dict[f'layers.{i}.weight']
                                update_dict[f'layers.{i}.weight'][curr_idx, :] = correct_rows 
                model.load_state_dict(update_dict)

        else:
            raise NotImplementedError('Should train with IST')

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc_val = evaluate(model, features, labels, val_mask)
        acc_test = evaluate(model, features, labels, test_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Val Accuracy {:.4f} | Test Accuracy {:.4f} |"
              "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                            acc_val, acc_test, n_edges / np.mean(dur) / 1000))
        record.append([acc_val, acc_test])

    all_test_acc = [v[1] for v in record]
    all_val_acc = [v[0] for v in record]
    acc = evaluate(model, features, labels, test_mask)
    print(f"Final Test Accuracy: {acc:.4f}")
    print(f"Best Val Accuracy: {max(all_val_acc):.4f}")
    print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
Exemplo n.º 12
0

# Train model
t_total = time.time()
loss_values = []
bad_counter = 0
best_loss = np.inf
best_epoch = 0

for epoch in range(args.epochs):
    loss_values.append(train(epoch))

    if loss_values[-1] < best_loss:
        best_loss = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
        best_state_dict = copy.deepcopy(model.state_dict())
    else:
        bad_counter += 1

    if bad_counter == args.patience:
        break

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(best_state_dict)
acc_test = test()
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()))

    return loss_val.item(), acc_val.item()


def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))


stopping_args = Stop_args(patience=args.patience, max_epochs=args.epochs)
early_stopping = EarlyStopping(model, **stopping_args)
for epoch in range(args.epochs):
    loss_val, acc_val = train(epoch)
    if early_stopping.check([acc_val, loss_val], epoch):
        break

print("Optimization Finished!")

# Restore best model
print('Loading {}th epoch'.format(early_stopping.best_epoch))
model.load_state_dict(early_stopping.best_state)
test()
Exemplo n.º 14
0
def main():
    net = GCN(num_classes=num_classes,
              input_size=train_args['input_size']).cuda()
    if len(train_args['snapshot']) == 0:
        curr_epoch = 0
    else:
        print 'training resumes from ' + train_args['snapshot']
        net.load_state_dict(
            torch.load(
                os.path.join(ckpt_path, exp_name, train_args['snapshot'])))
        split_snapshot = train_args['snapshot'].split('_')
        curr_epoch = int(split_snapshot[1])
        train_record['best_val_loss'] = float(split_snapshot[3])
        train_record['corr_mean_iu'] = float(split_snapshot[6])
        train_record['corr_epoch'] = curr_epoch

    net.train()

    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    train_simul_transform = simul_transforms.Compose([
        simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)),
        simul_transforms.RandomCrop(train_args['input_size']),
        simul_transforms.RandomHorizontallyFlip()
    ])
    val_simul_transform = simul_transforms.Compose([
        simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)),
        simul_transforms.CenterCrop(train_args['input_size'])
    ])
    img_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
    target_transform = standard_transforms.Compose([
        expanded_transforms.MaskToTensor(),
        expanded_transforms.ChangeLabel(ignored_label, num_classes - 1)
    ])
    restore_transform = standard_transforms.Compose([
        expanded_transforms.DeNormalize(*mean_std),
        standard_transforms.ToPILImage()
    ])

    train_set = CityScapes('train',
                           simul_transform=train_simul_transform,
                           transform=img_transform,
                           target_transform=target_transform)
    train_loader = DataLoader(train_set,
                              batch_size=train_args['batch_size'],
                              num_workers=16,
                              shuffle=True)
    val_set = CityScapes('val',
                         simul_transform=val_simul_transform,
                         transform=img_transform,
                         target_transform=target_transform)
    val_loader = DataLoader(val_set,
                            batch_size=val_args['batch_size'],
                            num_workers=16,
                            shuffle=False)

    weight = torch.ones(num_classes)
    weight[num_classes - 1] = 0
    criterion = CrossEntropyLoss2d(weight).cuda()

    # don't use weight_decay for bias
    optimizer = optim.SGD([{
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] == 'bias' and ('gcm' in name or 'brm' in name)
        ],
        'lr':
        2 * train_args['new_lr']
    }, {
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] != 'bias' and ('gcm' in name or 'brm' in name)
        ],
        'lr':
        train_args['new_lr'],
        'weight_decay':
        train_args['weight_decay']
    }, {
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] == 'bias' and not ('gcm' in name or 'brm' in name)
        ],
        'lr':
        2 * train_args['pretrained_lr']
    }, {
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] != 'bias' and not ('gcm' in name or 'brm' in name)
        ],
        'lr':
        train_args['pretrained_lr'],
        'weight_decay':
        train_args['weight_decay']
    }],
                          momentum=0.9,
                          nesterov=True)

    if len(train_args['snapshot']) > 0:
        optimizer.load_state_dict(
            torch.load(
                os.path.join(ckpt_path, exp_name,
                             'opt_' + train_args['snapshot'])))
        optimizer.param_groups[0]['lr'] = 2 * train_args['new_lr']
        optimizer.param_groups[1]['lr'] = train_args['new_lr']
        optimizer.param_groups[2]['lr'] = 2 * train_args['pretrained_lr']
        optimizer.param_groups[3]['lr'] = train_args['pretrained_lr']

    if not os.path.exists(ckpt_path):
        os.mkdir(ckpt_path)
    if not os.path.exists(os.path.join(ckpt_path, exp_name)):
        os.mkdir(os.path.join(ckpt_path, exp_name))

    for epoch in range(curr_epoch, train_args['epoch_num']):
        train(train_loader, net, criterion, optimizer, epoch)
        validate(val_loader, net, criterion, optimizer, epoch,
                 restore_transform)
Exemplo n.º 15
0
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))
    return acc_test


model_file = 'model_save/' + args.dataset + '.pkl'
# Train model
t_total = time.time()
max_acc = 0
acc_list = []
for epoch in range(args.epochs):
    val_acc = train(epoch)
    if val_acc > max_acc:
        max_acc = val_acc
        torch.save(model.state_dict(), model_file)
        acc_list.append(val_acc)

if args.load_best:
    model.load_state_dict(torch.load(model_file))
print(max(acc_list))
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Testing
acc_test = test()
if len(args.save_file) > 0:
    with open(args.save_file, 'a') as f:
        f.write('GCN %.4f' % acc_test)
        f.write('\n')
Exemplo n.º 16
0
Arquivo: train.py Projeto: victai/SDML
            vecs = []
            cnt += sum(res)
            for j in res:
                print(int(j), file=f)
    print("")
    print(cnt, "/", len(test_data))

    #loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    #acc_test = accuracy(output[idx_test], labels[idx_test])
    #print("Test set results:",
    #      "loss= {:.4f}".format(loss_test.item()),
    #      "accuracy= {:.4f}".format(acc_test.item()))


# Train model
t_total = time.time()
if args.train:
    for epoch in range(args.epochs):
        train(epoch)
    print("Optimization Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
    torch.save(model.state_dict(), 'model.mdl')
else:
    model.load_state_dict(torch.load('model.mdl'))

# Extract Embedding
#emb = torch.nn.Sequential(*list(model.children())[:-1])

# Testing
test()