Пример #1
0
def gen_training_accuracy(args):
    # load data and model
    params = utils.load_params(args.model_dir)
    ckpt_dir = os.path.join(args.model_dir, 'checkpoints')
    ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"]
    ckpt_paths = np.sort(ckpt_paths)
    
    # csv
    headers = ["epoch", "acc_train", "acc_test"]
    csv_path = utils.create_csv(args.model_dir, 'accuracy.csv', headers)

    for epoch, ckpt_paths in enumerate(ckpt_paths):
        if epoch % 5 != 0:
            continue
        net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True)
        # load data
        train_transforms = tf.load_transforms('test')
        trainset = tf.load_trainset(params['data'], train_transforms, train=True)
        trainloader = DataLoader(trainset, batch_size=500, num_workers=4)
        train_features, train_labels = tf.get_features(net, trainloader, verbose=False)

        test_transforms = tf.load_transforms('test')
        testset = tf.load_trainset(params['data'], test_transforms, train=False)
        testloader = DataLoader(testset, batch_size=500, num_workers=4)
        test_features, test_labels = tf.get_features(net, testloader, verbose=False)

        acc_train, acc_test = svm(args, train_features, train_labels, test_features, test_labels)
        utils.save_state(args.model_dir, epoch, acc_train, acc_test, filename='accuracy.csv')
    print("Finished generating accuracy.")
Пример #2
0
def gen_testloss(args):
    # load data and model
    params = utils.load_params(args.model_dir)
    ckpt_dir = os.path.join(args.model_dir, 'checkpoints')
    ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"]
    ckpt_paths = np.sort(ckpt_paths)
    
    # csv
    headers = ["epoch", "step", "loss", "discrimn_loss_e", "compress_loss_e", 
        "discrimn_loss_t",  "compress_loss_t"]
    csv_path = utils.create_csv(args.model_dir, 'losses_test.csv', headers)
    print('writing to:', csv_path)

    # load data
    test_transforms = tf.load_transforms('test')
    testset = tf.load_trainset(params['data'], test_transforms, train=False)
    testloader = DataLoader(testset, batch_size=params['bs'], shuffle=False, num_workers=4)
    
    # save loss
    criterion = MaximalCodingRateReduction(gam1=params['gam1'], gam2=params['gam2'], eps=params['eps'])
    for epoch, ckpt_path in enumerate(ckpt_paths):
        net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True)
        for step, (batch_imgs, batch_lbls) in enumerate(testloader):
            features = net(batch_imgs.cuda())
            loss, loss_empi, loss_theo = criterion(features, batch_lbls, 
                                            num_classes=len(testset.num_classes))
            utils.save_state(args.model_dir, epoch, step, loss.item(), 
                *loss_empi, *loss_theo, filename='losses_test.csv')
    print("Finished generating test loss.")
Пример #3
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Extract features from model and data')
    parser.add_argument('--model_dir',
                        type=str,
                        help='base directory for saving PyTorch model.')
    parser.add_argument('--epoch',
                        type=int,
                        default=None,
                        help='which epoch for evaluation')
    parser.add_argument('--save_dir', type=str, default="./extractions/")
    parser.add_argument('--tail',
                        type=str,
                        default='',
                        help='extra information to add to file name')

    args = parser.parse_args()
    params = utils.load_params(args.model_dir)
    net, epoch = tf.load_checkpoint(args.model_dir, args.epoch, eval_=True)
    train_transforms = tf.load_transforms('test')
    trainset = tf.load_trainset(params['data'], train_transforms, train=True)
    trainloader = DataLoader(trainset, batch_size=200, num_workers=4)
    features, labels = tf.get_features(net, trainloader)

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)
    np.save(os.path.join(args.save_dir, "features.npy"),
            features.cpu().detach().numpy())
    np.save(os.path.join(args.save_dir, "labels.npy"), labels.numpy())
    make_tarfile("./extractions.tgz", args.save_dir)
Пример #4
0
        default=10,
        help='number of classes in each learning batch (default: 10)')

    parser.add_argument('--save', action='store_true', help='save labels')
    parser.add_argument('--data_dir',
                        default='./data/',
                        help='path to dataset')
    args = parser.parse_args()

    print("evaluate using label_batch: {}".format(args.label_batch))

    params = utils.load_params(args.model_dir)
    # get train features and labels
    train_transforms = tf.load_transforms('test')
    trainset = tf.load_trainset(params['data'],
                                train_transforms,
                                train=True,
                                path=args.data_dir)
    if 'lcr' in params.keys():  # supervised corruption case
        trainset = tf.corrupt_labels(trainset, params['lcr'], params['lcs'])
    new_labels = trainset.targets
    assert (trainset.num_classes %
            args.cpb == 0), "Number of classes not divisible by cpb"
    ## load model
    net, epoch = tf.load_checkpoint_ce(args.model_dir,
                                       trainset.num_classes,
                                       args.epoch,
                                       eval_=True,
                                       label_batch_id=args.label_batch)
    net = net.cuda().eval()

    classes = np.unique(trainset.targets)
Пример #5
0
model_dir = os.path.join(
    args.save_dir,
    'selfsup_{}+{}_{}_epo{}_bs{}_aug{}+{}_lr{}_mom{}_wd{}_gam1{}_gam2{}_eps{}{}'
    .format(args.arch, args.fd, args.data, args.epo, args.bs, args.aug,
            args.transform, args.lr, args.mom, args.wd, args.gam1, args.gam2,
            args.eps, args.tail))
utils.init_pipeline(model_dir)

## Prepare for Training
if args.pretrain_dir is not None:
    net, _ = tf.load_checkpoint(args.pretrain_dir, args.pretrain_epo)
    utils.update_params(model_dir, args.pretrain_dir)
else:
    net = tf.load_architectures(args.arch, args.fd)
transforms = tf.load_transforms(args.transform)
trainset = tf.load_trainset(args.data, path=args.data_dir)
trainloader = AugmentLoader(trainset,
                            transforms=transforms,
                            sampler=args.sampler,
                            batch_size=args.bs,
                            num_aug=args.aug)

criterion = MaximalCodingRateReduction(gam1=args.gam1,
                                       gam2=args.gam2,
                                       eps=args.eps)
optimizer = optim.SGD(net.parameters(),
                      lr=args.lr,
                      momentum=args.mom,
                      weight_decay=args.wd)
scheduler = lr_scheduler.MultiStepLR(optimizer, [30, 60], gamma=0.1)
utils.save_params(model_dir, vars(args))
Пример #6
0
## per model functions
def lr_schedule(epoch, optimizer):
    """decrease the learning rate"""
    lr = args.lr
    if epoch >= 400:
        lr = args.lr * 0.01
    elif epoch >= 200:
        lr = args.lr * 0.1
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


## Prepare for Training
transforms = tf.load_transforms(args.transform)
trainset = tf.load_trainset(args.data, transforms, path=args.data_dir)
#trainset = tf.corrupt_labels(trainset, args.lcr, args.lcs)
if args.pretrain_dir is not None:
    net, _ = tf.load_checkpoint(args.pretrain_dir, args.pretrain_epo)
    utils.update_params(model_dir, args.pretrain_dir)
else:
    net = tf.load_architectures_ce(args.arch, trainset.num_classes)
assert (trainset.num_classes %
        args.cpb == 0), "Number of classes not divisible by cpb"
classes = np.unique(trainset.targets)
class_batch_num = trainset.num_classes // args.cpb
class_batch_list = classes.reshape(class_batch_num, args.cpb)

#trainloader = DataLoader(trainset, batch_size=args.bs, drop_last=True, num_workers=4)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(net.parameters(),
Пример #7
0
            elif epoch >= 200:
                lr = args.lr * 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr


        ## Prepare for Training
        if args.pretrain_dir is not None:
            pretrain_model_dir = os.path.join(args.pretrain_dir,
                                'sup_expert_resnet18+128_{}_epo200_bs1000_lr0.001_mom0.9_wd0.0005_gam11.0_gam21.0_eps0.5_lcr0.0'.format(source_name))
            net, _ = tf.load_checkpoint(pretrain_model_dir, args.pretrain_epo)
            utils.update_params(model_dir, pretrain_model_dir)
        else:
            net = tf.load_architectures(args.arch, args.fd)
        transforms = tf.load_transforms(args.transform)
        trainset = tf.load_trainset(ds_name, transforms, path=args.data_dir)
        print("Number of classes in {} is: {}".format(ds_name,trainset.num_classes))
        trainset = tf.corrupt_labels(trainset, args.lcr, args.lcs)
        trainloader = DataLoader(trainset, batch_size=args.bs, drop_last=True, num_workers=4)
        criterion = MaximalCodingRateReduction(gam1=args.gam1, gam2=args.gam2, eps=args.eps)
        optimizer = SGD(net.parameters(), lr=args.lr, momentum=args.mom, weight_decay=args.wd)


        ## Training
        for epoch in range(args.epo):
            lr_schedule(epoch, optimizer)
            for step, (batch_imgs, batch_lbls) in enumerate(trainloader):
                features = net(batch_imgs.cuda())
                loss, loss_empi, loss_theo = criterion(features, batch_lbls, num_classes=trainset.num_classes)
                optimizer.zero_grad()
                loss.backward()
Пример #8
0
def plot_pca_epoch(args):
    """Plot PCA for different epochs in the same plot. """
    EPOCHS = [0, 10, 100, 500]

    params = utils.load_params(args.model_dir)
    transforms = tf.load_transforms('test')
    trainset = tf.load_trainset(params['data'], transforms)
    trainloader = DataLoader(trainset, batch_size=200, num_workers=4)

    sig_vals = []
    for epoch in EPOCHS:
        epoch_ = epoch - 1
        if epoch_ == -1:  # randomly initialized
            net = tf.load_architectures(params['arch'], params['fd'])
        else:
            net, epoch = tf.load_checkpoint(args.model_dir,
                                            epoch=epoch_,
                                            eval_=True)
        features, labels = tf.get_features(net, trainloader)
        if args.class_ is not None:
            features_sort, _ = utils.sort_dataset(
                features.numpy(),
                labels.numpy(),
                num_classes=trainset.num_classes,
                stack=False)
            features_ = features_sort[args.class_]
        else:
            features_ = features.numpy()
        n_comp = np.min([args.comp, features.shape[1]])
        pca = PCA(n_components=n_comp).fit(features_)
        sig_vals.append(pca.singular_values_)

    ## plot singular values
    plt.rc('text', usetex=True)
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['font.serif'] = ['Times New Roman']
    fig, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=400)
    x_min = np.min([len(sig_val) for sig_val in sig_vals])
    if args.class_ is not None:
        ax.set_xticks(np.arange(0, x_min, 10))
        ax.set_yticks(np.linspace(0, 40, 9))
        ax.set_ylim(0, 40)
    else:
        ax.set_xticks(np.arange(0, x_min, 10))
        ax.set_yticks(np.linspace(0, 80, 9))
        ax.set_ylim(0, 90)
    for epoch, sig_val in zip(EPOCHS, sig_vals):
        ax.plot(np.arange(x_min),
                sig_val[:x_min],
                marker='',
                markersize=5,
                label=f'epoch - {epoch}',
                alpha=0.6)
    ax.legend(loc='upper right',
              frameon=True,
              fancybox=True,
              prop={"size": 8},
              ncol=1,
              framealpha=0.5)
    ax.set_xlabel("components")
    ax.set_ylabel("sigular values")
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    [tick.label.set_fontsize(12) for tick in ax.xaxis.get_major_ticks()]
    [tick.label.set_fontsize(12) for tick in ax.yaxis.get_major_ticks()]
    ax.grid(True, color='white')
    ax.set_facecolor('whitesmoke')
    fig.tight_layout()

    ## save
    save_dir = os.path.join(args.model_dir, 'figures', 'pca')
    np.save(os.path.join(save_dir, "sig_vals_epoch.npy"), sig_vals)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    file_name = os.path.join(save_dir, f"pca_class{args.class_}.png")
    fig.savefig(file_name)
    print("Plot saved to: {}".format(file_name))
    file_name = os.path.join(save_dir, f"pca_class{args.class_}.pdf")
    fig.savefig(file_name)
    print("Plot saved to: {}".format(file_name))
    plt.close()
Пример #9
0
        path = os.path.join(args.model_dir, 'losses_test.csv')
        if not os.path.exists(path):
            gen_testloss(args)
        plot_traintest(args, path)
    if args.acc:
        path = os.path.join(args.model_dir, 'accuracy.csv')
        if not os.path.exists(path):
            gen_training_accuracy(args)
        plot_accuracy(args, path)

    if args.pca or args.hist or args.heat or args.nearcomp_sup or args.nearcomp_unsup or args.nearcomp_class:
        ## load data and model
        params = utils.load_params(args.model_dir)
        net, epoch = tf.load_checkpoint(args.model_dir, args.epoch, eval_=True)
        transforms = tf.load_transforms('test')
        trainset = tf.load_trainset(params['data'], transforms)
        if 'lcr' in params.keys():  # supervised corruption case
            trainset = tf.corrupt_labels(trainset, params['lcr'],
                                         params['lcs'])
        trainloader = DataLoader(trainset, batch_size=200, num_workers=4)
        features, labels = tf.get_features(net, trainloader)

    if args.pca:
        plot_pca(args, features, labels, epoch)
    if args.nearcomp_sup:
        plot_nearest_component_supervised(args, features, labels, epoch,
                                          trainset)
    if args.nearcomp_unsup:
        plot_nearest_component_unsupervised(args, features, labels, epoch,
                                            trainset)
    if args.nearcomp_class:
Пример #10
0
# Prepare data loaders
transform_train = transforms.Compose([
    transforms.Resize(72),
    transforms.RandomCrop(64),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.Resize(72),
    transforms.CenterCrop(64),
    transforms.ToTensor(),
])

trainset = tf.load_trainset(args.dataset[0],
                            transform_train,
                            train=True,
                            path=args.datadir)
testset = tf.load_trainset(args.dataset[0],
                           transform_test,
                           train=False,
                           path=args.datadir)

trainloader = DataLoader(trainset, batch_size=128)
testloader = DataLoader(testset, batch_size=100)

train_loaders, val_loaders = [trainloader], [testloader]
args.num_classes = [trainset.num_classes]

# Load checkpoint and initialize the networks with the weights of a pretrained network
print('==> Resuming from checkpoint..')
net_old, _ = tf.load_checkpoint(args.source, None, eval_=True)
Пример #11
0
    print("Use pretrained network on: {}".format(source_ds_name))
    pretrained_model_dir = args.model_dir.split(
        "{}")[0] + source_ds_name + args.model_dir.split("{}")[1]
    ## load model
    params = utils.load_params(pretrained_model_dir)
    net, epoch = tf.load_checkpoint(pretrained_model_dir,
                                    args.epoch,
                                    eval_=True)
    net = net.cuda().eval()

    for j, target_ds_name in enumerate(dataset_list):
        stats_dict = {}
        # get train features and labels
        train_transforms = tf.load_transforms('transfer')  #('test')
        trainset = tf.load_trainset(target_ds_name,
                                    train_transforms,
                                    train=True,
                                    path=args.data_dir)
        if 'lcr' in params.keys():  # supervised corruption case
            trainset = tf.corrupt_labels(trainset, params['lcr'],
                                         params['lcs'])
        new_labels = trainset.targets
        trainloader = DataLoader(trainset, batch_size=200)
        print("Target task on: {}".format(target_ds_name))
        train_features, train_labels = tf.get_features(net,
                                                       trainloader,
                                                       verbose=False)

        # get test features and labels
        test_transforms = tf.load_transforms('transfer')  #('test')
        testset = tf.load_trainset(target_ds_name,
                                   test_transforms,