示例#1
0
    def __init__(self, config):

        self.device = config["device"]

        self.model = ConvNet(num_classes=config["num-classes"])
        self.model.to(self.device)

        if config["resume"]:
            print("> Loading Checkpoint")
            self.model.load_state_dict(T.load(config["load-path"]))

        self.train_loader, self.val_loader = get_train_valid_loader(
            config["data-path"], config["num-classes"], config["batch-size"],
            config["val-batch-size"], config["augment"], config["seed"],
            config["valid-size"], config["shuffle"], config["num-workers"])

        self.test_loader = get_test_loader(
            config["data-path"], config["num-classes"], config["batch-size"],
            config["shuffle"], config["num-workers"], config["pin-memory"])

        self.criterion = nn.CrossEntropyLoss()
        self.optim = T.optim.AdamW(self.model.parameters(),
                                   lr=config["lr-init"],
                                   weight_decay=config["weight-decay"])

        self.writer = SummaryWriter(
            log_dir=os.path.join("logs", config["run-title"]))
        self.reduce_lr = T.optim.lr_scheduler.ReduceLROnPlateau(
            self.optim,
            factor=config["lr-factor"],
            patience=config["lr-patience"],
            min_lr=config["lr-min"])

        self.stopping_patience = config["stopping-patience"]
        self.stopping_delta = config["stopping-delta"]

        self.filepath = os.path.join(config["save-path"], config["run-title"],
                                     config["run-title"] + ".pt")
示例#2
0
if args.valid:
    valid_len = 60000
else:
    valid_len = 0

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_loader, valid_loader = get_train_valid_loader(data_dir='./data.svhn',
                                                    batch_size=args.batch_size,
                                                    augment=True,
                                                    random_seed=args.seed,
                                                    valid_len=valid_len,
                                                    shuffle=True,
                                                    show_sample=False,
                                                    **kwargs)
test_loader = get_test_loader(data_dir='./data.svhn',
                              batch_size=args.batch_size,
                              shuffle=True,
                              **kwargs)

last_prec1 = 0
model = None
cfg = None
if args.model:
    if os.path.isfile(args.model):
        checkpoint = torch.load(args.model)
        cfg = checkpoint['cfg']
        model = preresnet(dataset=args.dataset, depth=args.depth, cfg=cfg)
        # print(cfg)
        # print(model)
        # print(checkpoint['state_dict'])
        model.load_state_dict(checkpoint['state_dict'])
        last_prec1 = checkpoint['best_prec1']
示例#3
0
    return model

def cifar100(n_channel, pretrained=None):
    cfg = [n_channel, n_channel, 'M', 2*n_channel, 2*n_channel, 'M', 4*n_channel, 4*n_channel, 'M', (8*n_channel, 0), 'M']
    layers = make_layers(cfg, batch_norm=True)
    model = CIFAR(layers, n_channel=8*n_channel, num_classes=100)
    if pretrained is not None:
        m = model_zoo.load_url(model_urls['cifar100'])
        state_dict = m.state_dict() if isinstance(m, nn.Module) else m
        assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict)
        model.load_state_dict(state_dict)
    return model

if __name__ == "__main__":
    device = 'cuda'

    model = cifar100(128, pretrained=True).to(device)
    model.eval()
    
    test_loader = get_test_loader("./data", num_classes=100, batch_size=32)

    total, correct = 0, 0
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = T.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    acc = correct / total
    print(f"Testing Accuracy: {acc*100:.4f}%") 
示例#4
0
from models import UNet, contour_SEResUNet

parser = argparse.ArgumentParser(description='Kaggle Cdiscounts Training')
parser.add_argument('--gpu', default=1, type=int, 
                    help='which gpu to run')
parser.add_argument('--batch_size', default=16, type=int, 
                    help='size of batches')
parser.add_argument('--img_size', default=448, type=int,
                    help='height and width of images to use')
args = parser.parse_args()

net = contour_SEResUNet().cuda()
net.load_state_dict(torch.load('../models-pytorch/best_SEResUNet_Contour_flips_l1_lamb0.5.pth'))
net.eval()

test_loader = get_test_loader(imsize=args.img_size)

# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x):
    # watershed instance generation
    #x = np.where(x > 0.5, 1, 0)
示例#5
0
    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res
#%%
train_loader, val_loader, train_dataset = utils.get_train_valid_loader(args.train_dir,
        batch_size=args.batch_size, crop_size=args.crop_size,
        augment=args.augment, random_seed=111,
        shuffle=True, valid_size=args.valid_size,
        filtering=args.filtering, 
        num_channels=args.num_channels,
        l2_loss=args.l2_loss, same_crop=args.same_crop,
        num_workers=args.num_workers)
test_loader, test_dataset = utils.get_test_loader(TEST_DIR, batch_size=args.batch_size, crop_size=args.crop_size, filtering=args.filtering, num_channels=args.num_channels, l2_loss=args.l2_loss, num_workers=args.num_workers)
#val_loader, val_dataset = utils.get_val_loader(VAL_DIR, batch_size=args.batch_size, crop_size=args.crop_size, filtering=args.filtering, num_channels=args.num_channels, l2_loss=args.l2_loss)
print(train_dataset.classes)


print(args)
if args.arch.startswith('my'):
    model = myresnet.ResNet18()
else:
    #original_model = models.resnet101(pretrained=True)
    original_model = globals()[args.arch](pretrained=args.pretrained)
    if args.finetune:
        for param in original_model.parameters():
            param.requires_grad = False

    model = utils.FineTuneModel(original_model, args.arch, 10, num_channels=args.num_channels)
示例#6
0
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import time
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import pylab
from tensorboardX import SummaryWriter
import torchvision.utils as vutils
import utils
import models
import params
import train, test

src_train_dataloader = utils.get_train_loader('MNIST')
src_test_dataloader = utils.get_test_loader('MNIST')
tgt_train_dataloader = utils.get_train_loader('MNIST_M')
tgt_test_dataloader = utils.get_test_loader('MNIST_M')

common_net = models.Extractor()
src_net = models.Classifier()
tgt_net = models.Classifier()

src_dataiter = iter(src_train_dataloader)
tgt_dataiter = iter(tgt_train_dataloader)
src_imgs, src_labels = next(src_dataiter)
tgt_imgs, tgt_labels = next(tgt_dataiter)

src_imgs_show = src_imgs[:4]
tgt_imgs_show = tgt_imgs[:4]
示例#7
0
with open(os.path.join(root_path, 'setting.json')) as f:
    SETTINGS = json.load(f)

model_ckp = os.path.join(root_path, '{net}-{idx}-{epoch}-{type}.pth')

net = get_network(net=SETTINGS['NET'], num_classes=SETTINGS['STEP_CLASSES'], input_channels=3)

if not os.path.exists(eval_path):
    os.makedirs(eval_path)
    os.makedirs(eval_path+'/OldAccuracy', exist_ok=True)
    os.makedirs(eval_path+'/NewAccuracy', exist_ok=True)
    os.makedirs(eval_path+'/AvgIncrementalAccuracy', exist_ok=True)

incremental_accuracy = []
for iteration, sequence in enumerate(SETTINGS['TRAINING_BATCHES']):
    test_loader = get_test_loader(dataset=SETTINGS['DATASET'], accepted_class_labels=sequence, num_workers=0)
    cum_old_accuracies = [0]
    new_accuracies = []

    for epoch in range(SETTINGS['EPOCH']):
        net.load_state_dict(torch.load(model_ckp.format(net=SETTINGS['NET'], idx=iteration, epoch=epoch, type='end')))
        current_acc = evaluate(net, test_loader, label_correction=iteration*SETTINGS['STEP_CLASSES'])
        old_accuracies = []
        for old_iteration in range(iteration):
            old_sequence = SETTINGS['TRAINING_BATCHES'][old_iteration]
            old_test_loader = get_test_loader(dataset=SETTINGS['DATASET'], accepted_class_labels=old_sequence, num_workers=0)
            acc = evaluate(net, old_test_loader, label_correction=old_iteration*SETTINGS['STEP_CLASSES'])
            old_accuracies.append(acc.cpu().numpy())
        new_accuracies.append(current_acc.cpu().numpy())
        if iteration > 0:
            cum_old_accuracies.append(np.mean(np.asarray(old_accuracies)))
示例#8
0
from transformers import BertModel, BertConfig, BertTokenizer, BertTokenizerFast, AdamW, get_linear_schedule_with_warmup

###BERT model instead of the Extractor
# create the BERTConfig, BERTTokenizer, and BERTModel
model_name = "bert-base-uncased"
config = BertConfig.from_pretrained(model_name,
                                    output_hidden_states=True,
                                    return_dict=True)
tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True)
bert = BertModel.from_pretrained(model_name, config=config)

src_train_dataloader = utils.get_train_loader(
    '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/books.csv',
    tokenizer)
src_test_dataloader = utils.get_test_loader(
    '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/books.csv',
    tokenizer)
tgt_train_dataloader = utils.get_train_loader(
    '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/dvd.csv',
    tokenizer)
tgt_test_dataloader = utils.get_test_loader(
    '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/dvd.csv',
    tokenizer)

common_net = bert
src_net = models.BertForSequenceClassification(config, common_net)
tgt_net = models.BertForSequenceClassification(config, common_net)

src_dataiter = iter(src_train_dataloader)
tgt_dataiter = iter(tgt_train_dataloader)
示例#9
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    fix_seed(args.seed)

    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion,
                    space=args.search_space)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_queue, train_sampler, valid_queue = utils.get_train_validation_loader(
        args)

    test_queue = utils.get_test_loader(args)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    best_acc = 0
    total_train_time, total_valid_time, total_test_time = 0, 0, 0
    for epoch in range(args.epochs):
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        #print(F.softmax(model.alphas_normal, dim=-1))
        #print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        architect.alpha_forward = 0
        architect.alpha_backward = 0
        start_time = time.time()
        train_acc, train_obj, alphas_time, forward_time, backward_time = \
          train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,epoch)
        logging.info('train_acc %f', train_acc)
        end_time = time.time()
        search_time = end_time - start_time
        total_train_time += search_time
        logging.info("train time %f", end_time - start_time)
        logging.info("alphas_time %f ", alphas_time)
        logging.info("forward_time %f", forward_time)
        logging.info("backward_time %f", backward_time)
        logging.info("alpha_forward %f", architect.alpha_forward)
        logging.info("alpha_backward %f", architect.alpha_backward)
        logging.info('train_acc %f', train_acc)

        # validation
        # if args.epochs-epoch<=1:
        #   valid_acc, valid_obj = infer(valid_queue, model, criterion)
        #   logging.info('valid_acc %f', valid_acc)

        # utils.save(model, os.path.join(args.save, 'weights.pt'))
        start_time2 = time.time()
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        end_time2 = time.time()
        valid_time = end_time2 - start_time2
        total_valid_time += valid_time
        logging.info("inference time %f", end_time2 - start_time2)
        logging.info('valid_acc %f', valid_acc)

        # test
        start = time.time()
        test_acc, test_obj = infer(test_queue, model, criterion)
        end = time.time()
        test_time = end - start
        total_test_time += test_time
        logging.info("inference time %f", end - start)
        logging.info('test_acc %f, test_obj %f', test_acc, test_obj)

        # update learning rate
        scheduler.step()

        is_best = valid_acc > best_acc
        best_acc = max(valid_acc, best_acc)
        if is_best:
            logging.info(
                'best valid_acc: {} at epoch: {}, test_acc: {}'.format(
                    best_acc, epoch, test_acc))
            logging.info('Current best genotype = {}'.format(model.genotype()))
    return total_train_time, total_valid_time, total_test_time
示例#10
0
def main():
    root = logging.getLogger()

    if not torch.cuda.is_available():
        root.info('no gpu device available')
        sys.exit(1)

    # Fix seed
    utils.fix_seed(args.seed)

    root.info('gpu device = %d' % args.gpu)
    root.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion,
                    args.greedy, args.l2)
    model = model.cuda()
    root.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # Data loading code
    train_queue, train_sampler, valid_queue = utils.get_train_validation_loader(
        args)
    test_queue = utils.get_test_loader(args)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    best_acc = 0
    for epoch in range(args.epochs):
        lr = scheduler.get_lr()[0]
        log_value("lr", lr, epoch)
        root.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        root.info('genotype = %s', genotype)

        # training
        architect.alpha_forward = 0
        architect.alpha_backward = 0
        start_time = time.time()
        train_acc, train_obj, alphas_time, forward_time, backward_time = \
          train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch)
        end_time = time.time()
        root.info("train time %f", end_time - start_time)
        root.info("alphas_time %f ", alphas_time)
        root.info("forward_time %f", forward_time)
        root.info("backward_time %f", backward_time)
        root.info("alpha_forward %f", architect.alpha_forward)
        root.info("alpha_backward %f", architect.alpha_backward)
        log_value('train_acc', train_acc, epoch)
        root.info('train_acc %f', train_acc)

        # validation
        start_time2 = time.time()
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        end_time2 = time.time()
        root.info("inference time %f", end_time2 - start_time2)
        log_value('valid_acc', valid_acc, epoch)
        root.info('valid_acc %f', valid_acc)

        # test
        start = time.time()
        test_acc, test_obj = infer(test_queue, model, criterion)
        end = time.time()
        root.info("inference time %f", end - start)
        log_value('test_acc', test_acc, epoch)
        root.info('test_acc %f, test_obj %f', test_acc, test_obj)

        # update learning rate
        scheduler.step()

        is_best = valid_acc > best_acc
        best_acc = max(valid_acc, best_acc)
        if is_best:
            root.info('best valid_acc: {} at epoch: {}, test_acc: {}'.format(
                best_acc, epoch, test_acc))
            root.info('Current best genotype = {}'.format(model.genotype()))
            utils.save(model, os.path.join(args.save, 'best_weights.pt'))
示例#11
0
def main():

    classes = [i for i in range(SETTINGS['NUM_CLASSES'])]
    training_batches = [
        classes[i:i + SETTINGS['STEP_CLASSES']]
        for i in range(0, len(classes), SETTINGS['STEP_CLASSES'])
    ]
    SETTINGS['TRAINING_BATCHES'] = training_batches
    checkpoint_path = os.path.join(
        SETTINGS['CHECKPOINT_ROOT'], SETTINGS['DATASET'],
        'StepClasses-{}'.format(str(SETTINGS['STEP_CLASSES'])),
        'BufferSamples-{}'.format(str(SETTINGS['K_SHOT'])), SETTINGS['NET'],
        SETTINGS['TIME_NOW'])

    if not os.path.exists(checkpoint_path):
        Path(checkpoint_path).mkdir(parents=True, exist_ok=True)
    model_ckp_path = os.path.join(checkpoint_path,
                                  '{net}-{idx}-{epoch}-{type}.pth')
    save_setting(SETTINGS, checkpoint_path)

    net = get_network(net=SETTINGS['NET'],
                      num_classes=SETTINGS['STEP_CLASSES'],
                      input_channels=3)

    norm_alpha_loss = torch.nn.MSELoss()
    norm_triangle_loss = torch.nn.MSELoss()
    ce_criterion = torch.nn.CrossEntropyLoss()
    zero_img = torch.zeros(size=[1, 3, 32, 32])
    zero_label = torch.zeros(size=[1, 512])

    old_classes = []

    for iteration, training_sequence in enumerate(training_batches):
        if not os.path.exists(
                os.path.join(checkpoint_path, 'Plots', str(iteration))):
            base_path = os.path.join(checkpoint_path, 'Plots', str(iteration))
            base_gradients_path = os.path.join(base_path, 'Gradients')
            g_zero_path = os.path.join(base_gradients_path, 'L_Zero')
            g_alpha_path = os.path.join(base_gradients_path, 'L_Alpha')
            g_triangle_path = os.path.join(base_gradients_path, 'L_Triangle')
            loss_path = os.path.join(base_path, 'LossPlots')
            embedding_path = os.path.join(base_path, 'EmbeddingPlots')
            Path(loss_path).mkdir(parents=True, exist_ok=True)
            Path(embedding_path).mkdir(parents=True, exist_ok=True)
            Path(g_zero_path).mkdir(parents=True, exist_ok=True)
            Path(g_alpha_path).mkdir(parents=True, exist_ok=True)
            Path(g_triangle_path).mkdir(parents=True, exist_ok=True)

        training_loader = get_train_loader(
            SETTINGS['DATASET'],
            accepted_class_labels=training_sequence,
            norm_lambda=SETTINGS['NORM_LAMBDA'],
            batch_size=SETTINGS['BATCH_SIZE'])
        old_classes.extend(training_sequence)
        test_loader = get_test_loader(SETTINGS['DATASET'],
                                      accepted_class_labels=old_classes,
                                      batch_size=5 * SETTINGS['BATCH_SIZE'])

        if iteration == 0:
            EPOCH = SETTINGS['STARTING_EPOCH']
            lr = SETTINGS['STARTING_LEARNING_RATE']
        else:
            EPOCH = SETTINGS['OTHER_EPOCHS']
            lr = SETTINGS['OTHER_LEARNING_RATE']

        ce_optimizer = optim.SGD(params=net.parameters(), lr=lr, momentum=0.9)
        triangle_optimizer = optim.SGD(params=net.parameters(),
                                       lr=lr,
                                       momentum=0.9)
        zero_optimizer = optim.SGD(params=net.parameters(),
                                   lr=lr,
                                   momentum=0.9)

        for epoch in range(EPOCH):
            print('Processing iteration: {}\nEpoch:{}'.format(
                iteration, epoch))
            for batch_idx, data in enumerate(training_loader):
                x, y, alpha, x2, y2, x_alpha, x_convex = data
                y = y - iteration * SETTINGS['STEP_CLASSES']

                if mode == MODE.SUPER_DEBUG:
                    print('---INPUT SHAPES---')
                    print(x.shape, y.shape, alpha.shape, x2.shape, y2.shape,
                          x_alpha.shape, x_convex.shape)

                net.eval()
                with torch.no_grad():
                    _, x2_features = net(x2.cuda())
                    _, alpha_x_features = net(x_alpha.cuda())

                alpha_sq = torch.unsqueeze(alpha, dim=1)
                if 'CE' in SETTINGS['LOSSES']:
                    net.train()
                    net.zero_grad()
                    preds, x_features = net(x.cuda())
                    l_ce = ce_criterion(preds, y.cuda())
                    l_ce.backward(retain_graph=True)
                    ce_gradients = get_gradient_magnitudes(net)
                    plot_gradients(ce_gradients, g_alpha_path,
                                   '{}--{}'.format(epoch, batch_idx))
                    del ce_gradients
                    ce_optimizer.step()
                else:
                    l_ce = DummyLoss()
                """net.train()
                net.zero_grad()
                _, x_features = net(x.cuda())
                x_norm = torch.unsqueeze(torch.norm(x_features, p=2, dim=1), dim=1)
                alpha_sq = torch.unsqueeze(alpha, dim=1)
                alpha_x_norm = torch.unsqueeze(torch.norm(alpha_x_features, p=2, dim=1), dim=1)
                # print(alpha_sq.shape, x_norm.shape, alpha_x_norm.shape)
                l_a = norm_alpha_loss(x_norm*alpha_sq.cuda(), alpha_x_norm)
                l_a.backward(retain_graph=True)
                alpha_gradients = get_gradient_magnitudes(net)
                plot_gradients(alpha_gradients, g_alpha_path, '{}--{}'.format(epoch, batch_idx))
                del alpha_gradients
                ce_optimizer.step()"""

                if 'TRIANGLE' in SETTINGS['LOSSES']:
                    net.train()
                    net.zero_grad()
                    _, cvx_features = net(x_convex.cuda())
                    l_t = norm_triangle_loss(
                        torch.log(
                            torch.unsqueeze(torch.norm(cvx_features,
                                                       p=2,
                                                       dim=1),
                                            dim=1)),
                        torch.log(
                            alpha_sq.cuda() * torch.unsqueeze(
                                torch.norm(x_features, p=2, dim=1), dim=1) +
                            (1 - alpha_sq.cuda()) * torch.unsqueeze(
                                torch.norm(x2_features, p=2, dim=1), dim=1)))
                    l_t.backward()
                    triangle_gradients = get_gradient_magnitudes(net)
                    plot_gradients(triangle_gradients, g_triangle_path,
                                   '{}--{}'.format(epoch, batch_idx))
                    del triangle_gradients
                    triangle_optimizer.step()
                else:
                    l_t = DummyLoss()
                """net.zero_grad()
                _, zero_features = net(zero_img.cuda())
                l_z = zero_loss(zero_features)/SETTINGS['BATCH_SIZE']
                l_z.backward()
                zero_gradients = get_gradient_magnitudes(net)
                plot_gradients(zero_gradients, g_zero_path, '{}--{}'.format(epoch, batch_idx))
                del zero_gradients
                zero_optimizer.step()"""
                plot_norm_losses(l_ce.item(),
                                 l_t.item(),
                                 0,
                                 path=loss_path,
                                 fid='Epoch:{}--BatchNo:{}'.format(
                                     epoch, batch_idx))

            train_acc = evaluate(net,
                                 training_loader,
                                 label_correction=iteration *
                                 SETTINGS['STEP_CLASSES'])
            print('Training accuracy: {}'.format(train_acc))
            test_features = None
            test_labels = None
            for data in test_loader:
                x_test, y_test, _, _, _, _, _ = data
                net.eval()
                with torch.no_grad():
                    _, x_test_features = net(x_test.cuda())
                    if test_features is None:
                        test_features = x_test_features.cpu()
                        test_labels = y_test.cpu()
                    else:
                        test_features = torch.cat(
                            [test_features,
                             x_test_features.cpu()], dim=0)
                        test_labels = torch.cat(
                            [test_labels, y_test.cpu()], dim=0)
            plot_embedding(test_features.numpy(),
                           test_labels.numpy(),
                           num_classes=len(old_classes),
                           filepath=embedding_path,
                           filename='Epoch:{}'.format(epoch))
            torch.save(
                net.state_dict(),
                model_ckp_path.format(net=SETTINGS['NET'],
                                      idx=iteration,
                                      epoch=epoch,
                                      type='end'))