예제 #1
0
def test(opt, model, dataloader):
    # Logging
    logger = logging.Logger(opt.ckpt_path, opt.split)
    stats = logging.Statistics(opt.ckpt_path, opt.split)
    logger.log(opt)

    model.load(opt.load_ckpt_paths, opt.load_opts, opt.load_epoch)
    all_scores = []
    video_names = []
    for step, data in enumerate(dataloader, 1):
        inputs, label, vid_name = data
        info_acc, logits, scores = model.test(inputs, label, opt.timestep)

        all_scores.append(scores)
        video_names.append(vid_name[0])
        update = stats.update(logits.shape[0], info_acc)
        if utils.is_due(step, opt.print_every):
            utils.info('step {}/{}: {}'.format(step, len(dataloader), update))

    logger.log('[Summary] {}'.format(stats.summarize()))

    # Evaluate
    iou_thresholds = [0.1, 0.3, 0.5]
    groundtruth_dir = os.path.join(opt.dset_path, opt.dset, 'groundtruth',
                                   'validation/cross-subject')
    assert os.path.exists(groundtruth_dir), '{} does not exist'.format(
        groundtruth_dir)
    mean_aps = calc_map(opt, all_scores, video_names, groundtruth_dir,
                        iou_thresholds)

    for i in range(len(iou_thresholds)):
        logger.log('IoU: {}, mAP: {}'.format(iou_thresholds[i], mean_aps[i]))
예제 #2
0
def train(opt, model, dataloader):
    # Logging
    logger = logging.Logger(opt.ckpt_path, opt.split)
    stats = logging.Statistics(opt.ckpt_path, opt.split)
    logger.log(opt)

    model.load(opt.load_ckpt_paths, opt.load_opts, opt.load_epoch)
    for epoch in range(1, opt.n_epochs + 1):
        for step, data in enumerate(dataloader, 1):
            # inputs is a list of input of each modality
            inputs, label, _ = data
            ret = model.train(inputs, label)
            update = stats.update(len(label), ret)
            if utils.is_due(step, opt.print_every):
                utils.info('epoch {}/{}, step {}/{}: {}'.format(
                    epoch, opt.n_epochs, step, len(dataloader), update))

        logger.log('[Summary] epoch {}/{}: {}'.format(epoch, opt.n_epochs,
                                                      stats.summarize()))

        if utils.is_due(epoch, opt.n_epochs, opt.save_every):
            model.save(epoch)
            stats.save()
            logger.log('***** saved *****')

        if utils.is_due(epoch, opt.lr_decay_at):
            lrs = model.lr_decay()
            logger.log('***** lr decay *****: {}'.format(lrs))
예제 #3
0
def init_config(conf):
    conf.is_finished = False
    assert conf.ptl in conf.model

    # configure the training device.
    assert conf.world is not None, "Please specify the gpu ids."
    conf.world = ([int(x) for x in conf.world.split(",")]
                  if "," in conf.world else [int(conf.world)])
    conf.n_sub_process = len(conf.world)

    # init the masking scheduler.
    conf.masking_scheduler_conf_ = (param_parser.dict_parser(
        conf.masking_scheduler_conf) if conf.masking_scheduler_conf is not None
                                    else None)
    if conf.masking_scheduler_conf is not None:
        for k, v in conf.masking_scheduler_conf_.items():
            setattr(conf, f"masking_scheduler_{k}", v)

    # init the layers to mask.
    assert conf.layers_to_mask is not None, "Please specify which BERT layers to mask."
    conf.layers_to_mask_ = ([
        int(x) for x in conf.layers_to_mask.split(",")
    ] if "," in conf.layers_to_mask else [int(conf.layers_to_mask)])

    # init the params for structure pruning.
    if (conf.structured_masking is not None
            and conf.structured_masking_types is not None):
        conf.structured_masking_types_ = conf.structured_masking_types.split(
            ",")
    else:
        conf.structured_masking_types_ = None

    # init the params for do_tuning_on_MS_scheme
    if conf.do_tuning_on_MS:
        assert conf.do_tuning_on_MS_scheme is not None
        conf.do_tuning_on_MS_scheme_ = conf.do_tuning_on_MS_scheme.split(",")

    # re-configure batch_size if sub_process > 1.
    if conf.n_sub_process > 1:
        conf.batch_size = conf.batch_size * conf.n_sub_process

    # configure cuda related.
    assert torch.cuda.is_available()
    torch.manual_seed(conf.manual_seed)
    torch.cuda.manual_seed(conf.manual_seed)
    torch.cuda.set_device(conf.world[0])
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True if conf.train_fast else False

    # define checkpoint for logging.
    checkpoint.init_checkpoint(conf)

    # display the arguments' info.
    logging.display_args(conf)

    # configure logger.
    conf.logger = logging.Logger(conf.checkpoint_root)
def main(args):
    # s_ = time.time()

    save_dir = args.save_dir          #模型存储位置
    mkdir_if_missing(save_dir)        #检查该存储文件是否可用/utils库

    sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt'))
    display(args)                                                   #打印当前训练模型的参数
    start = 0

    model = models.create(args.net, pretrained = False , model_path = None, normalized = True)   #@@@创建模型/ pretrained = true 将会去读取现有预训练模型/models文件中的函数


    model = torch.nn.DataParallel(model)    #使用torch进行模型的并行训练/分布
    model = model.cuda()                    #使用GPU

    print('initial model is save at %s' % save_dir)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr,
                                 weight_decay=args.weight_decay)              #优化器

    criterion = losses.create(args.loss, margin_same=args.margin_same, margin_diff=args.margin_diff).cuda()  #TWConstrativeloss

    data = DataSet.create(name = args.data, root=args.data_root, set_name = args.set_name)  #数据 set_name = "test" or "train" ;

    train_loader = torch.utils.data.DataLoader(
        data.train, batch_size=args.batch_size,shuffle = True,
        drop_last=True, pin_memory=True, num_workers=args.nThreads)

    for epoch in range(start, 50): #args.epochs

        L = train(epoch=epoch, model=model, criterion=criterion,
              optimizer=optimizer, train_loader=train_loader, args=args)
        losses_.append(L)


        if (epoch+1) % args.save_step == 0 or epoch==0:
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': (epoch+1),
            }, is_best=False, fpath=osp.join(args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))

    # added
    batch_nums = range(1, len(losses_) + 1)
    import matplotlib.pyplot as plt
    plt.plot(batch_nums, losses_)
    plt.show()
예제 #5
0
    def test_with_fields(self):
        """It should append the `extras` dictionary to the logger.
        """
        logger = logging.Logger('app.testing', {'user': '******'})
        logger.with_fields(pid=10)

        with self.assertLogs(logger.name, level=pylogging.INFO) as cm:
            logger.info('This is the first log.')
            logger.info('This is the second log.')

        self.assertEqual(cm.output, [
            'INFO:app.testing:(user: 12345) (pid: 10) This is the first log.',
            'INFO:app.testing:(user: 12345) (pid: 10) This is the second log.',
        ])
예제 #6
0
    def __init__(self, app_id):
        self.app_id = app_id
        super().__init__(application_id=self.app_id,
                         flags=Gio.ApplicationFlags.HANDLES_COMMAND_LINE)
        self.connect('command-line', self.do_command_line)
        GLib.set_application_name(_("Good Old Mupen64+"))
        GLib.set_prgname('gom64p')
        #GLib.setenv("")
        #self.settings = Gio.Settings.new('org.mupen64plus.good-old-m64p')

        self.main = None
        self.args = None
        self.frontend_conf = None
        self.logger = u_log.Logger()
예제 #7
0
                    default=0e-3,
                    help='try to make the last linear weight matrix to '
                    'approximate the orthogonal matrix')

args = parser.parse_args()

print(args.nums)
print(type(args.nums))

if args.log_dir is None:
    log_dir = os.path.join('checkpoints', args.loss)
else:
    log_dir = os.path.join('checkpoints', args.log_dir)
mkdir_if_missing(log_dir)
# write log
sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))

#  display information of current training
print('train on dataset %s' % args.data)
print('batch size is: %d' % args.BatchSize)
print('num_instance is %d' % args.num_instances)
print('dimension of the embedding space is %d' % args.dim)
print('log dir is: %s' % args.log_dir)
print('the network is : %s' % args.net)
print('loss function for training is: %s' % args.loss)
print('learn rate : %f' % args.lr)
print('base parameter de learn rate is : %f' % args.base)
print('the orthogonal weight regular is %f ' % args.orth_cof)

#  load pretrained models
if args.r is not None:
예제 #8
0
                    default=4,
                    type=float,
                    help='triplet loss margin')
parser.add_argument('--file_name',
                    default='result',
                    type=str,
                    help='file name to save')
parser.add_argument('--pmap', default=False, help='use part_map')
parser.add_argument('--mat',
                    default='',
                    type=str,
                    help='name for saving representation')
opt = parser.parse_args()

sys.stdout = logging.Logger(
    os.path.join(
        '/home/guojianyuan/ReID_Duke/' + opt.file_name + '/' + opt.name + '/',
        'log.txt'))
tripletloss = TripletLoss(opt.margin)

gpu_ids = []
str_gpu_ids = opt.gpu_ids.split(',')
for str_id in str_gpu_ids:
    gpu_ids.append(int(str_id))
torch.cuda.set_device(gpu_ids[0])

# Load Data
if opt.pmap:
    transform_train_list = [
        transforms.Resize((384, 128), interpolation=3),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
예제 #9
0
def train_fun(args, train_loader, feat_loader, current_task, fisher={}, prototype={}):

    log_dir = args.log_dir
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))
    display(args)

    model = models.create(args.net, Embed_dim=args.dim)
    # load part of the model
    if args.method == 'Independent' or current_task == 0:
        model_dict = model.state_dict()

        if args.net == 'resnet32':
            pickle.load = partial(pickle.load, encoding="latin1")
            pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
            pretrained_dict = torch.load(
                'pretrained_models/Finetuning_0_task_0_200_model_task2_cifar100_seed1993.pkl',
                map_location=lambda storage, loc: storage, pickle_module=pickle)
            pretrained_dict = pretrained_dict.state_dict()
            pretrained_dict = {k: v for k, v in pretrained_dict.items(
            ) if k in model_dict and 'fc' not in k}
            model_dict.update(pretrained_dict)
            model.load_state_dict(model_dict)

        elif args.net == 'resnet18' and args.data == 'imagenet_sub':
            pickle.load = partial(pickle.load, encoding="latin1")
            pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
            pretrained_dict = torch.load(
                'pretrained_models/Finetuning_0_task_0_200_model_task2_imagenet_sub_seed1993.pkl',
                map_location=lambda storage, loc: storage, pickle_module=pickle)
            pretrained_dict = pretrained_dict.state_dict()
            pretrained_dict = {k: v for k, v in pretrained_dict.items(
            ) if k in model_dict and 'fc' not in k}
            model_dict.update(pretrained_dict)
            model.load_state_dict(model_dict)

        else:
            print (' Oops!  That was no valid models. ')

    if args.method != 'Independent' and current_task > 0:
        model = torch.load(os.path.join(log_dir, args.method + '_' + args.exp +
                                        '_task_' + str(current_task-1) + '_%d_model.pkl' % int(args.epochs-1)))
        model_old = deepcopy(model)
        model_old.eval()
        model_old = freeze_model(model_old)

    model = model.cuda()
    torch.save(model, os.path.join(log_dir, args.method + '_' +
                                   args.exp + '_task_' + str(current_task) + '_pre_model.pkl'))
    print('initial model is save at %s' % log_dir)

    # fine tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.Embed.parameters()))

    new_params = [p for p in model.parameters() if
                  id(p) in new_param_ids]

    base_params = [p for p in model.parameters() if
                   id(p) not in new_param_ids]
    param_groups = [
        {'params': base_params, 'lr_mult': 0.1},
        {'params': new_params, 'lr_mult': 1.0}]

    criterion = losses.create(args.loss, margin=args.margin, num_instances=args.num_instances).cuda()
    optimizer = torch.optim.Adam(
        param_groups, lr=args.lr, weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer, step_size=200, gamma=0.1)

    if args.data == 'cifar100' or args.data == 'imagenet_sub':
        if current_task > 0:
            model.eval()

    for epoch in range(args.start, args.epochs):

        running_loss = 0.0
        running_lwf = 0.0
        scheduler.step()

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            # wrap them in Variable
            inputs = Variable(inputs.cuda())
            labels = Variable(labels).cuda()
            optimizer.zero_grad()
            _, embed_feat = model(inputs)

            if current_task == 0:
                loss_aug = 0*torch.sum(embed_feat)
            else:
                if args.method == 'Finetuning' or args.method == 'Independent':
                    loss_aug = 0*torch.sum(embed_feat)
                elif args.method == 'LwF':
                    _, embed_feat_old = model_old(inputs)
                    loss_aug = args.tradeoff * \
                        torch.sum((embed_feat-embed_feat_old).pow(2))/2.
                elif args.method == 'EWC' or args.method == 'MAS':
                    loss_aug = 0
                    for (name, param), (_, param_old) in zip(model.named_parameters(), model_old.named_parameters()):
                        loss_aug += args.tradeoff * \
                            torch.sum(fisher[name]*(param_old-param).pow(2))/2.

            if args.loss == 'MSLoss':
                loss = criterion(embed_feat, labels)
                inter_ = 0
                dist_ap = 0
                dist_an = 0
            else:
                loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels)
            loss += loss_aug

            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            running_lwf += loss_aug.data[0]
            if epoch == 0 and i == 0:
                print(50*'#')
                print('Train Begin -- HA-HA-HA')

        print('[Epoch %05d]\t Total Loss: %.3f \t LwF Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
              % (epoch + 1,  running_loss, running_lwf, inter_, dist_ap, dist_an))

        if epoch % args.save_step == 0:
            torch.save(model, os.path.join(log_dir, args.method + '_' +
                                           args.exp + '_task_' + str(current_task) + '_%d_model.pkl' % epoch))

    if args.method == 'EWC' or args.method == 'MAS':
        fisher = fisher_matrix_diag(
            model, criterion, train_loader, number_samples=500)
        return fisher
예제 #10
0
def main(args):

    #  训练日志保存
    log_dir = os.path.join('checkpoints', args.log_dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))
    display(args)

    if args.r is None:
        model = models.create(args.net, Embed_dim=args.dim)
        # load part of the model
        model_dict = model.state_dict()
        # print(model_dict)
        if args.net == 'bn':
            pretrained_dict = torch.load(
                'pretrained_models/bn_inception-239d2248.pth')
        else:
            pretrained_dict = torch.load(
                'pretrained_models/inception_v3_google-1a9a5a14.pth')

        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }

        model_dict.update(pretrained_dict)

        model.load_state_dict(model_dict)
    else:
        # resume model
        model = torch.load(args.r)

    model = model.cuda()

    torch.save(model, os.path.join(log_dir, 'model.pkl'))
    print('initial model is save at %s' % log_dir)

    # fine tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.Embed.parameters()))

    new_params = [p for p in model.parameters() if id(p) in new_param_ids]

    base_params = [p for p in model.parameters() if id(p) not in new_param_ids]
    param_groups = [{
        'params': base_params,
        'lr_mult': 0.1
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda()

    data = DataSet.create(args.data, root=None, test=False)
    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.BatchSize,
        sampler=RandomIdentitySampler(data.train,
                                      num_instances=args.num_instances),
        drop_last=True,
        num_workers=args.nThreads)

    for epoch in range(args.start, args.epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            # wrap them in Variable
            inputs = Variable(inputs.cuda())
            labels = Variable(labels).cuda()

            optimizer.zero_grad()

            embed_feat = model(inputs)

            loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels)
            if args.orth > 0:
                loss = orth_reg(model, loss, cof=args.orth)
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            if epoch == 0 and i == 0:
                print(50 * '#')
                print('Train Begin -- HA-HA-HA')

        print(
            '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
            % (epoch + 1, running_loss, inter_, dist_ap, dist_an))

        if epoch % args.save_step == 0:
            torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
예제 #11
0
def train_task(args, train_loader, current_task, prototype={}, pre_index=0):
    num_class_per_task = (args.num_class-args.nb_cl_fg) // args.num_task
    task_range = list(range(args.nb_cl_fg + (current_task - 1) * num_class_per_task, args.nb_cl_fg + current_task * num_class_per_task))
    if num_class_per_task==0:
        pass  # JT
    else:
        old_task_factor = args.nb_cl_fg // num_class_per_task + current_task - 1
    log_dir = os.path.join(args.ckpt_dir, args.log_dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log_task{}.txt'.format(current_task)))
    tb_writer = SummaryWriter(log_dir)
    display(args)

    if 'miniimagenet' in args.data:
        model = models.create('resnet18_imagenet', pretrained=False, feat_dim=args.feat_dim,embed_dim=args.num_class)
    elif 'cifar' in args.data:
        model = models.create('resnet18_cifar', pretrained=False, feat_dim=args.feat_dim,embed_dim=args.num_class)

    # mlp = ClassifierMLP()

    if current_task > 0:
        model = torch.load(os.path.join(log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model.pkl' % int(args.epochs - 1)))
        model_old = deepcopy(model)
        model_old.eval()
        model_old = freeze_model(model_old)
        # mlp_old = deepcopy(mlp)
        # mlp_old.eval()
        # mlp_old = freeze_model(mlp_old)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # model = model.cuda()
    model = model.to(device)
    # mlp = mlp.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay)
    # optimizer_mlp = torch.optim.Adam(mlp.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    # scheduler_mlp = StepLR(optimizer_mlp, step_size=args.lr_decay_step, gamma=args.lr_decay)

    loss_mse = torch.nn.MSELoss(reduction='sum')

    # # Loss weight for gradient penalty used in W-GAN
    lambda_gp = args.lambda_gp
    lambda_lwf = args.gan_tradeoff
    # Initialize generator and discriminator
    if current_task == 0:
        generator = Generator(feat_dim=args.feat_dim,latent_dim=args.latent_dim, hidden_dim=args.hidden_dim, class_dim=args.num_class)
        discriminator = Discriminator(feat_dim=args.feat_dim,hidden_dim=args.hidden_dim, class_dim=args.num_class)
    else:
        generator = torch.load(os.path.join(log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_generator.pkl' % int(args.epochs_gan - 1)))
        discriminator = torch.load(os.path.join(log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_discriminator.pkl' % int(args.epochs_gan - 1)))
        generator_old = deepcopy(generator)
        generator_old.eval()
        generator_old = freeze_model(generator_old)

    FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor 

    # if args.learn_inner_lr:
    #         learned_lrs = []
    #         for i in range(args.update_steps):
    #             gen_lrs =[Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True)]*len(generator.parameters())
    #             # nway_lrs = [Variable(self.FloatTensor(1).fill_(self.update_lr), requires_grad=True)]*len(self.nway_net.parameters())
    #             discrim_lrs = [Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True)]*len(discriminator.parameters())

    #             learned_lrs.append((discrim_lrs, gen_lrs))

    generator = generator.to(device)
    discriminator = discriminator.to(device)

    optimizer_G = torch.optim.Adam(generator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999))
    optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999))
    # optimizer_lr = torch.optim.Adam(learned_lrs, lr=args.gan_lr, betas=(0.5, 0.999))
    # scheduler_G = StepLR(optimizer_G, step_size=200, gamma=0.3)
    # scheduler_D = StepLR(optimizer_D, step_size=200, gamma=0.3)

    y_onehot = torch.FloatTensor(args.meta_batch_size, args.num_class)

    for p in generator.parameters():  # set requires_grad to False
        p.requires_grad = False

    if current_task>0:
        model = model.eval()

    for epoch in range(args.epochs):

        loss_log = {'C/loss': 0.0,
                    'C/loss_aug': 0.0,
                    'C/loss_cls': 0.0,
                    'C/loss_cls_q':0.0}
        scheduler.step()


##### MAML on feature extraction
		# db = DataLoader(mini, args.meta_batch_size, shuffle=True, num_workers=1, pin_memory=True)

        for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(train_loader):
            x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to(device), x_qry.to(device), y_qry.to(device)
			
			loss = torch.zeros(1).to(device)
            loss_cls = torch.zeros(1).to(device)
            loss_aug = torch.zeros(1).to(device)
            loss_tmp = torch.zeros(1).to(device)

			meta_batch_size, setsz, c_, h, w = x_spt.size()
            querysz = x_qry.size(1)

       	    losses_q = [0 for _ in range(args.update_step + 1)]  # losses_q[i] is the loss on step i
            corrects = [0 for _ in range(args.update_step + 1)]


            for i in range(args.meta_batch_size):
            	# 1. run the i-th task and compute loss for k=0
            	embed_feat = model(x_spt[i])

            	# $$$$$$$$$$$$$$$$
            	if current_task == 0:
            	    soft_feat = model.embed(embed_feat)
            	    # y_pred = mlp(soft_feat)
            	    loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, y_spt[i])
            	    loss += loss_cls
            	else:                       
            	    embed_feat_old = model_old(x_spt[i]) 

            	### Feature Extractor Loss
            	if current_task > 0:                                    
            	    loss_aug = torch.dist(embed_feat, embed_feat_old , 2)  
            	    # loss_tmp += args.tradeoff * loss_aug * old_task_factor                  
            	    loss += args.tradeoff * loss_aug * old_task_factor
            	
            	### Replay and Classification Loss
            	if current_task > 0: 
            	    embed_sythesis = []
            	    embed_label_sythesis = []
            	    ind = list(range(len(pre_index)))

            	    if args.mean_replay:
            	        for _ in range(setsz):                        
            	            np.random.shuffle(ind)
            	            tmp = prototype['class_mean'][ind[0]]+np.random.normal()*prototype['class_std'][ind[0]]
            	            embed_sythesis.append(tmp)
            	            embed_label_sythesis.append(prototype['class_label'][ind[0]])
            	        embed_sythesis = np.asarray(embed_sythesis)
            	        embed_label_sythesis=np.asarray(embed_label_sythesis)
            	        embed_sythesis = torch.from_numpy(embed_sythesis).to(device)
            	        embed_label_sythesis = torch.from_numpy(embed_label_sythesis)
            	    else:
            	        for _ in range(setsz):
            	            np.random.shuffle(ind)
            	            embed_label_sythesis.append(pre_index[ind[0]])
            	        embed_label_sythesis = np.asarray(embed_label_sythesis)
            	        embed_label_sythesis = torch.from_numpy(embed_label_sythesis)
            	        y_onehot.zero_()
            	        y_onehot.scatter_(1, embed_label_sythesis[:, None], 1)
            	        syn_label_pre = y_onehot.to(device)

            	        z = torch.Tensor(np.random.normal(0, 1, (setsz, args.latent_dim))).to(device)
            	        
            	        embed_sythesis = generator(z, syn_label_pre)

            	    embed_sythesis = torch.cat((embed_feat,embed_sythesis))
            	    embed_label_sythesis = torch.cat((y_spt[i],embed_label_sythesis.to(device)))
            	    soft_feat_syt = model.embed(embed_sythesis)
            	    
            	    batch_size1 = inputs1.shape[0]
            	    batch_size2 = embed_feat.shape[0]
                    
                    # soft_feat_syt = mlp(soft_feat_syt)

            	    loss_cls = torch.nn.CrossEntropyLoss()(soft_feat_syt[:batch_size1], embed_label_sythesis[:batch_size1])

            	    loss_cls_old = torch.nn.CrossEntropyLoss()(soft_feat_syt[batch_size2:], embed_label_sythesis[batch_size2:])
            	    
            	    loss_cls += loss_cls_old * old_task_factor
            	    loss_cls /= args.nb_cl_fg // num_class_per_task + current_task
            	    loss += loss_cls
            	# $$$$$$$$$$$$$$$$
            	# loss = F.cross_entropy(embed_feat, y_spt[i])
            	grad = torch.autograd.grad(loss, model.parameters(),create_graph=True, retain_graph=True)
            	# fast_weights = list(map(lambda p: p[1] - args.update_lr * p[0], zip(grad, model.parameters())))
            	fast_weights_dict = fast_weights(grad,model.state_dict(),args.update_lr)
            	# this is the loss and accuracy before first update
            	with torch.no_grad():
            	    # [setsz, nway]
            	    embed_feat_q = model(x_qry[i])
            	    soft_feat_q = model.embed(embed_feat_q)
            	    # y_pred_q = mlp(soft_feat_q)
            	    loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i])
            	    # loss_q = F.cross_entropy(embed_feat_q, y_qry[i])
            	    # loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i])
            	    losses_q[0] += loss_q

            	    pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1)
            	    correct = torch.eq(pred_q, y_qry[i]).sum().item()
            	    corrects[0] = corrects[0] + correct
            	# this is the loss and accuracy after the first update
            	with torch.no_grad():
            	    # [setsz, nway]
            	    model.load_state_dict(fast_weights_dict)
            	    embed_feat_q = model(x_qry[i])
            	    soft_feat_q = model.embed(embed_feat_q)
            	    # y_pred_q = mlp(soft_feat_q)
            	    loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i])
            	    # loss_q = torch.nn.cross_entropy(soft_feat_q, y_qry[i])
            	    losses_q[1] += loss_q
            	    # [setsz]
            	    pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1)
            	    correct = torch.eq(pred_q, y_qry[i]).sum().item()
            	    corrects[1] = corrects[1] + correct

            	for k in range(1, args.update_step):
            	    # 1. run the i-th task and compute loss for k=1~K-1
            	    model.load_state_dict(fast_weights_dict)
            	    embed_feat = model(x_spt[i])
            	    # loss = torch.nn.cross_entropy(embed_feat, y_spt[i])
            	    loss = torch.zeros(1).to(device)
            	    if current_task>0:
            	    	embed_feat_old = model_old(x_spt[i])
            	    	loss_aug = torch.dist(embed_feat, embed_feat_old , 2)                    
            	    	loss += args.tradeoff * loss_aug * old_task_factor
            	    	soft_feat_syt = model.embed(embed_sythesis)
            	        batch_size1 = inputs1.shape[0]
            	        batch_size2 = embed_feat.shape[0]

            	        # soft_feat_syt = mlp(soft_feat_syt)

            	        loss_cls = torch.nn.CrossEntropyLoss()(soft_feat_syt[:batch_size1], embed_label_sythesis[:batch_size1])

            	        loss_cls_old = torch.nn.CrossEntropyLoss()(soft_feat_syt[batch_size2:], embed_label_sythesis[batch_size2:])
            	        
            	        loss_cls += loss_cls_old * old_task_factor
            	        loss_cls /= args.nb_cl_fg // num_class_per_task + current_task
            	        loss += loss_cls
            	    else:
            	    	soft_feat = model.embed(embed_feat)
            	    	# y_pred = mlp(soft_feat)
            	    	# loss_cls = torch.nn.CrossEntropyLoss()(y_pred, y_spt[i])
            	    	loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, y_spt[i])
            	    	loss += loss_cls
            	    # 2. compute grad on theta_pi
            	    grad = torch.autograd.grad(loss, model.parameters(),create_graph=True, retain_graph=True)
            	    # 3. theta_pi = theta_pi - train_lr * grad
            	    # fast_weights = list(map(lambda p: p[1] - args.update_lr * p[0], zip(grad, fast_weights)))
            	    fast_weights_dict = fast_weights(grad,model.state_dict(),args.update_lr)
            	    model.load_state_dict(fast_weights_dict)
            	    embed_feat_q = model(x_qry[i])
            	    soft_feat_q = model.embed(embed_feat_q)
            	    # loss_q will be overwritten and just keep the loss_q on last update step.
            	    # soft_feat_q = mlp(soft_feat_q)
            	    loss_q = torch.nn.cross_entropy(soft_feat_q, y_qry[i])
            	    losses_q[k + 1] += loss_q

            	    with torch.no_grad():
            	        pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1)
            	        correct = torch.eq(pred_q, y_qry[i]).sum().item()  # convert to numpy
            	        corrects[k + 1] = corrects[k + 1] + correct



            # end of all tasks
            # sum over all losses on query set across all tasks
            loss_q = losses_q[-1] / meta_batch_size
            # loss += loss_q
            # optimize theta parameters
            optimizer.zero_grad()
            # optimizer_mlp.zero_grad()
            # loss.backward()
            loss_q.backward()
            # print('meta update')
            # for p in self.net.parameters()[:5]:
            # 	print(torch.norm(p).item())
            optimizer.step()
            # optimizer_mlp.step()
            accs = np.array(corrects) / (querysz * meta_batch_size)
            loss_log['C/loss'] += loss.item()
            loss_log['C/loss_cls'] += loss_cls.item()
            loss_log['C/loss_aug'] += args.tradeoff*loss_aug.item() if args.tradeoff != 0 else 0
            loss_log['C/loss_cls_q'] += loss_q.item()
            del loss_cls
            del loss_q
            if epoch == 0 and i == 0:
                print(50 * '#')
예제 #12
0
parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--z_dim', type=int, default=100)
parser.add_argument('--lr_adam', type=float, default=2e-4)
parser.add_argument('--lr_rmsprop', type=float, default=2e-4)
parser.add_argument('--beta1', type=float, default=0.5, help='for adam')
parser.add_argument('--slope', type=float, default=0.2, help='for leaky ReLU')
parser.add_argument('--std', type=float, default=0.02, help='for weight')
parser.add_argument('--dropout', type=float, default=0.2)
parser.add_argument('--clamp', type=float, default=1e-2)
parser.add_argument('--wasserstein', type=bool, default=False)

opt = parser.parse_args()
if opt.clean_ckpt:
  shutil.rmtree(opt.ckpt_path)
os.makedirs(opt.ckpt_path, exist_ok=True)
logger = logging.Logger(opt.ckpt_path)
opt.seed = 1
torch.manual_seed(opt.seed)
torch.cuda.manual_seed(opt.seed)
cudnn.benchmark = True
EPS = 1e-12

transform = transforms.Compose([transforms.Scale(opt.image_size),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dataset = dset.CIFAR10(root=opt.dataset_path, train=True, download=False, transform=transform)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers)

D = model.Discriminator(opt).cuda()
G = model.Generator(opt).cuda()
예제 #13
0
def main(args):
    s_ = time.time()

    #  训练日志保存
    save_dir = args.save_dir
    mkdir_if_missing(save_dir)

    sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt'))
    display(args)
    start = 0

    model = models.create(args.net, pretrained=True, dim=args.dim)

    if args.r is None:
        model_dict = model.state_dict()
        # orthogonal init
        if args.init == 'orth':
            w = model_dict['classifier.0.weight']
            model_dict['classifier.0.weight'] = torch.nn.init.orthogonal_(w)
        else:
            print('initialize the FC layer kai-ming-ly')
            w = model_dict['classifier.0.weight']
            model_dict['classifier.0.weight'] = torch.nn.init.kaiming_normal_(
                w)

        # zero bias
        model_dict['classifier.0.bias'] = torch.zeros(args.dim)
        model.load_state_dict(model_dict)
    else:
        # resume model
        chk_pt = load_checkpoint(args.r)
        weight = chk_pt['state_dict']
        start = chk_pt['epoch']
        model.load_state_dict(weight)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # freeze BN
    if args.BN == 1:
        print(40 * '#', 'BatchNorm frozen')
        model.apply(set_bn_eval)
    else:
        print(40 * '#', 'BatchNorm NOT frozen')
    # Fine-tune the model: the learning rate for pre-trained parameter is 1/10

    new_param_ids = set(map(id, model.module.classifier.parameters()))

    new_params = [
        p for p in model.module.parameters() if id(p) in new_param_ids
    ]

    base_params = [
        p for p in model.module.parameters() if id(p) not in new_param_ids
    ]

    param_groups = [{
        'params': base_params,
        'lr_mult': 0.0
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    print('initial model is save at %s' % save_dir)

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    if args.loss == 'center-nca':
        criterion = losses.create(args.loss, alpha=args.alpha).cuda()
    elif args.loss == 'cluster-nca':
        criterion = losses.create(args.loss, alpha=args.alpha,
                                  beta=args.beta).cuda()
    elif args.loss == 'neighbour':
        criterion = losses.create(args.loss, k=args.k,
                                  margin=args.margin).cuda()
    elif args.loss == 'nca':
        criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda()
    elif args.loss == 'triplet':
        criterion = losses.create(args.loss, alpha=args.alpha).cuda()
    elif args.loss == 'bin' or args.loss == 'ori_bin':
        criterion = losses.create(args.loss,
                                  margin=args.margin,
                                  alpha=args.alpha)
    else:
        criterion = losses.create(args.loss).cuda()

    # Decor_loss = losses.create('decor').cuda()
    data = DataSet.create(args.data, root=None)

    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.BatchSize,
        sampler=FastRandomIdentitySampler(data.train,
                                          num_instances=args.num_instances),
        drop_last=True,
        pin_memory=True,
        num_workers=args.nThreads)

    # save the train information
    epoch_list = list()
    loss_list = list()
    pos_list = list()
    neg_list = list()

    for epoch in range(start, args.epochs):
        epoch_list.append(epoch)

        running_loss = 0.0
        running_pos = 0.0
        running_neg = 0.0

        if epoch == 1:
            optimizer.param_groups[0]['lr_mul'] = 0.1

        if (epoch == 1000 and args.data == 'car') or \
                (epoch == 550 and args.data == 'cub') or \
                (epoch == 100 and args.data in ['shop', 'jd']):

            param_groups = [{
                'params': base_params,
                'lr_mult': 0.1
            }, {
                'params': new_params,
                'lr_mult': 1.0
            }]

            optimizer = torch.optim.Adam(param_groups,
                                         lr=0.1 * args.lr,
                                         weight_decay=args.weight_decay)

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            # wrap them in Variable
            inputs = Variable(inputs.cuda())

            # type of labels is Variable cuda.Longtensor
            labels = Variable(labels).cuda()

            optimizer.zero_grad()

            embed_feat = model(inputs)

            loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels)

            # decor_loss = Decor_loss(embed_feat)

            # loss += args.theta * decor_loss

            if not type(loss) == torch.Tensor:
                print('One time con not back-ward')
                continue

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_neg += dist_an
            running_pos += dist_ap

            if epoch == 0 and i == 0:
                print(50 * '#')
                print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --')

        loss_list.append(running_loss)
        pos_list.append(running_pos / (i + 1))
        neg_list.append(running_neg / (i + 1))

        print(
            '[Epoch %03d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
            % (epoch + 1, running_loss / (i + 1), inter_, dist_ap, dist_an))

        if (epoch + 1) % args.save_step == 0:
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': (epoch + 1),
            },
                            is_best=False,
                            fpath=osp.join(
                                args.save_dir,
                                'ckp_ep' + str(epoch + 1) + '.pth.tar'))

    np.savez(os.path.join(save_dir, "result.npz"),
             epoch=epoch_list,
             loss=loss_list,
             pos=pos_list,
             neg=neg_list)
    t = time.time() - s_
    print('training takes %.2f hour' % (t / 3600))
예제 #14
0
def main(args):
    # s_ = time.time()
    print(torch.cuda.get_device_properties(device=0).total_memory)
    torch.cuda.empty_cache()
    print(args)
    save_dir = args.save_dir
    mkdir_if_missing(save_dir)
    num_txt = len(glob.glob(save_dir + "/*.txt"))
    sys.stdout = logging.Logger(
        os.path.join(save_dir, "log_" + str(num_txt) + ".txt"))
    display(args)
    start = 0

    model = models.create(args.net,
                          pretrained=args.pretrained,
                          dim=args.dim,
                          self_supervision_rot=args.self_supervision_rot)
    all_pretrained = glob.glob(save_dir + "/*.pth.tar")

    if (args.resume is None) or (len(all_pretrained) == 0):
        model_dict = model.state_dict()

    else:
        # resume model
        all_pretrained_epochs = sorted(
            [int(x.split("/")[-1][6:-8]) for x in all_pretrained])
        args.resume = os.path.join(
            save_dir, "ckp_ep" + str(all_pretrained_epochs[-1]) + ".pth.tar")
        print('load model from {}'.format(args.resume))
        chk_pt = load_checkpoint(args.resume)
        weight = chk_pt['state_dict']
        start = chk_pt['epoch']
        model.load_state_dict(weight)

    model = torch.nn.DataParallel(model)
    model = model.cuda()
    fake_centers_dir = os.path.join(args.save_dir, "fake_center.npy")

    if np.sum(["train_1.txt" in x
               for x in glob.glob(args.save_dir + "/**/*")]) == 0:
        if args.rot_only:
            create_fake_labels(None, None, args)

        else:
            data = dataset.Dataset(args.data,
                                   ratio=args.ratio,
                                   width=args.width,
                                   origin_width=args.origin_width,
                                   root=args.data_root,
                                   self_supervision_rot=0,
                                   mode="test",
                                   rot_bt=args.rot_bt,
                                   corruption=args.corruption,
                                   args=args)

            fake_train_loader = torch.utils.data.DataLoader(
                data.train,
                batch_size=100,
                shuffle=False,
                drop_last=False,
                pin_memory=True,
                num_workers=args.nThreads)

            train_feature, train_labels = extract_features(
                model,
                fake_train_loader,
                print_freq=1e5,
                metric=None,
                pool_feature=args.pool_feature,
                org_feature=True)

            create_fake_labels(train_feature, train_labels, args)

            del train_feature

            fake_centers = "k-means++"

            torch.cuda.empty_cache()

    elif os.path.exists(fake_centers_dir):
        fake_centers = np.load(fake_centers_dir)
    else:
        fake_centers = "k-means++"

    time.sleep(60)

    model.train()

    # freeze BN
    if (args.freeze_BN is True) and (args.pretrained):
        print(40 * '#', '\n BatchNorm frozen')
        model.apply(set_bn_eval)
    else:
        print(40 * '#', 'BatchNorm NOT frozen')

    # Fine-tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.module.classifier.parameters()))
    new_rot_param_ids = set()
    if args.self_supervision_rot:
        new_rot_param_ids = set(
            map(id, model.module.classifier_rot.parameters()))
        print(new_rot_param_ids)

    new_params = [
        p for p in model.module.parameters() if id(p) in new_param_ids
    ]

    new_rot_params = [
        p for p in model.module.parameters() if id(p) in new_rot_param_ids
    ]

    base_params = [
        p for p in model.module.parameters()
        if (id(p) not in new_param_ids) and (id(p) not in new_rot_param_ids)
    ]

    param_groups = [{
        'params': base_params
    }, {
        'params': new_params
    }, {
        'params': new_rot_params,
        'lr': args.rot_lr
    }]

    print('initial model is save at %s' % save_dir)

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    criterion = losses.create(args.loss,
                              margin=args.margin,
                              alpha=args.alpha,
                              beta=args.beta,
                              base=args.loss_base).cuda()

    data = dataset.Dataset(args.data,
                           ratio=args.ratio,
                           width=args.width,
                           origin_width=args.origin_width,
                           root=args.save_dir,
                           self_supervision_rot=args.self_supervision_rot,
                           rot_bt=args.rot_bt,
                           corruption=1,
                           args=args)
    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.batch_size,
        sampler=FastRandomIdentitySampler(data.train,
                                          num_instances=args.num_instances),
        drop_last=True,
        pin_memory=True,
        num_workers=args.nThreads)

    # save the train information

    for epoch in range(start, args.epochs):

        train(epoch=epoch,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              train_loader=train_loader,
              args=args)

        if (epoch + 1) % args.save_step == 0 or epoch == 0:
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': (epoch + 1),
            },
                            is_best=False,
                            fpath=osp.join(
                                args.save_dir,
                                'ckp_ep' + str(epoch + 1) + '.pth.tar'))

        if ((epoch + 1) % args.up_step == 0) and (not args.rot_only):
            # rewrite train_1.txt file
            data = dataset.Dataset(args.data,
                                   ratio=args.ratio,
                                   width=args.width,
                                   origin_width=args.origin_width,
                                   root=args.data_root,
                                   self_supervision_rot=0,
                                   mode="test",
                                   rot_bt=args.rot_bt,
                                   corruption=args.corruption,
                                   args=args)
            fake_train_loader = torch.utils.data.DataLoader(
                data.train,
                batch_size=args.batch_size,
                shuffle=False,
                drop_last=False,
                pin_memory=True,
                num_workers=args.nThreads)
            train_feature, train_labels = extract_features(
                model,
                fake_train_loader,
                print_freq=1e5,
                metric=None,
                pool_feature=args.pool_feature,
                org_feature=(args.dim % 64 != 0))
            fake_centers = create_fake_labels(train_feature,
                                              train_labels,
                                              args,
                                              init_centers=fake_centers)
            del train_feature
            torch.cuda.empty_cache()
            time.sleep(60)
            np.save(fake_centers_dir, fake_centers)
            # reload data
            data = dataset.Dataset(
                args.data,
                ratio=args.ratio,
                width=args.width,
                origin_width=args.origin_width,
                root=args.save_dir,
                self_supervision_rot=args.self_supervision_rot,
                rot_bt=args.rot_bt,
                corruption=1,
                args=args)

            train_loader = torch.utils.data.DataLoader(
                data.train,
                batch_size=args.batch_size,
                sampler=FastRandomIdentitySampler(
                    data.train, num_instances=args.num_instances),
                drop_last=True,
                pin_memory=True,
                num_workers=args.nThreads)

            # test on testing data
            # extract_recalls(data=args.data, data_root=args.data_root, width=args.width, net=args.net, checkpoint=None,
            #         dim=args.dim, batch_size=args.batch_size, nThreads=args.nThreads, pool_feature=args.pool_feature,
            #         gallery_eq_query=args.gallery_eq_query, model=model)
            model.train()
            if (args.freeze_BN is True) and (args.pretrained):
                print(40 * '#', '\n BatchNorm frozen')
                model.apply(set_bn_eval)
예제 #15
0
# -*- coding: utf-8 -*-
import json
import utils.logging as logging

from django.core.files.storage import default_storage
from django.core.files.base import ContentFile

from utils.gzip import GzipFile, gunzip_bytes, is_gzipped
from utils.encoders import DjangoPartialModelJsonEncoder


logger = logging.Logger(__name__)


def get_object(prefix):
    """Retrieve an object from S3 and load into memory.
    """
    with default_storage.open(prefix) as fo:
        content = fo.read()
        if is_gzipped(content):
            content = gunzip_bytes(content)
    if isinstance(content, (bytes,)):
        content = content.decode()
    return json.loads(content)


def put_object(prefix, content, gzipped=True):
    """Load a blob into S3.
    """
    FileClass = GzipFile if gzipped else ContentFile
def main(args):
    print(args.p_lambda)
    save_dir = args.save_dir
    mkdir_if_missing(save_dir)

    print("DRO:", args.DRO)

    # sys.stdout: output from console
    # sys.stderr: exceptions from python
    sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) #sys.stdout --> 'log.txt'
    sys.stderr = logging.Logger(os.path.join(save_dir, 'error.txt')) #sys.stderr --> 'error.txt'

    display(args)
    start = 0

    model = models.create(args.net, pretrained=True, dim=args.dim)


    save_checkpoint({
        'state_dict': model.state_dict(),
        'epoch': 0,
    }, is_best=False, fpath=osp.join(args.save_dir, 'ckp_ep'+ str(start) + '.pth.tar'))
    # for vgg and densenet

    if args.resume is None:
        model_dict = model.state_dict()
    else:
        # resume model
        print('load model from {}'.format(args.resume))
        chk_pt = load_checkpoint(args.resume)
        weight = chk_pt['state_dict']
        start = chk_pt['epoch']
        model.load_state_dict(weight)


    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # freeze BN
    if args.freeze_BN is True:
        print(40 * '#', '\n BatchNorm frozen')
        model.apply(set_bn_eval) # m represents default layers.
    else:
        print(40*'#', 'BatchNorm NOT frozen')


    optimizer = torch.optim.Adam(model.module.parameters(), lr=args.lr,
                                 weight_decay=args.weight_decay)

    print("--------------------------:", args.p_lambda)
    criterion = DRO.create(args.DRO, loss = args.loss, margin=args.margin, alpha=args.alpha,
                           beta = args.beta,
                           p_lambda = args.p_lambda, p_lambda_neg = args.p_lambda_neg, K = args.K,
                           select_TOPK_all = args.select_TOPK_all, p_choice = args.p_choice,
                           truncate_p = args.truncate_p).cuda()

    # Decor_loss = losses.create('decode').cuda()
    print("Train, RAE:", args.mode)
    data = DataSet.create(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root, RAE=args.mode)

    train_loader = torch.utils.data.DataLoader(
        data.train, batch_size=args.batch_size,
        sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances),
        drop_last=True, pin_memory=True, num_workers=args.nThreads)


    # save the train information

    for epoch in range(start, args.epochs):


        train(epoch=epoch, model=model, criterion=criterion,
              optimizer=optimizer, train_loader=train_loader, args=args)

        if epoch == 1:
            optimizer.param_groups[0]['lr_mul'] = 0.1
        
        if (epoch+1) % args.save_step == 0 or epoch==0:
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': (epoch+1),
            }, is_best=False, fpath=osp.join(args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
예제 #17
0
def main(args):
    num_class_dict = {'cub': int(100), 'car': int(98)}
    #  训练日志保存
    log_dir = os.path.join(args.checkpoints, args.log_dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))
    display(args)

    if args.r is None:
        model = models.create(args.net, Embed_dim=args.dim)
        # load part of the model
        model_dict = model.state_dict()
        # print(model_dict)
        if args.net == 'bn':
            pretrained_dict = torch.load('pretrained_models/bn_inception-239d2248.pth')
        else:
            pretrained_dict = torch.load('pretrained_models/inception_v3_google-1a9a5a14.pth')

        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}

        model_dict.update(pretrained_dict)

        # orth init
        if args.init == 'orth':
            print('initialize the FC layer orthogonally')
            _, _, v = torch.svd(model_dict['Embed.linear.weight'])
            model_dict['Embed.linear.weight'] = v.t()

        # zero bias
        model_dict['Embed.linear.bias'] = torch.zeros(args.dim)

        model.load_state_dict(model_dict)
    else:
        # resume model
        model = torch.load(args.r)

    model = model.cuda()

    # compute the cluster centers for each class here

    def normalize(x):
        norm = x.norm(dim=1, p=2, keepdim=True)
        x = x.div(norm.expand_as(x))
        return x

    data = DataSet.create(args.data, root=None, test=False)

    if args.center_init == 'cluster':
        data_loader = torch.utils.data.DataLoader(
            data.train, batch_size=args.BatchSize, shuffle=False, drop_last=False)

        features, labels = extract_features(model, data_loader, print_freq=32, metric=None)
        features = [feature.resize_(1, args.dim) for feature in features]
        features = torch.cat(features)
        features = features.numpy()
        labels = np.array(labels)

        centers, center_labels = cluster_(features, labels, n_clusters=args.n_cluster)
        center_labels = [int(center_label) for center_label in center_labels]

        centers = Variable(torch.FloatTensor(centers).cuda(),  requires_grad=True)
        center_labels = Variable(torch.LongTensor(center_labels)).cuda()
        print(40*'#', '\n Clustering Done')

    else:
        center_labels = int(args.n_cluster) * list(range(num_class_dict[args.data]))
        center_labels = Variable(torch.LongTensor(center_labels).cuda())

        centers = normalize(torch.rand(num_class_dict[args.data]*args.n_cluster, args.dim))
        centers = Variable(centers.cuda(), requires_grad=True)

    torch.save(model, os.path.join(log_dir, 'model.pkl'))
    print('initial model is save at %s' % log_dir)

    # fine tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.Embed.parameters()))

    new_params = [p for p in model.parameters() if
                  id(p) in new_param_ids]

    base_params = [p for p in model.parameters() if
                   id(p) not in new_param_ids]
    param_groups = [
                {'params': base_params, 'lr_mult': 0.1},
                {'params': new_params, 'lr_mult': 1.0},
                {'params': centers, 'lr_mult': 1.0}]

    optimizer = torch.optim.Adam(param_groups, lr=args.lr,
                                 weight_decay=args.weight_decay)

    cluster_counter = np.zeros([num_class_dict[args.data], args.n_cluster])
    criterion = losses.create(args.loss, alpha=args.alpha, centers=centers,
                              center_labels=center_labels, cluster_counter=cluster_counter).cuda()

    # random sampling to generate mini-batch
    train_loader = torch.utils.data.DataLoader(
        data.train, batch_size=args.BatchSize, shuffle=True, drop_last=False)

    # save the train information
    epoch_list = list()
    loss_list = list()
    pos_list = list()
    neg_list = list()

    # _mask = Variable(torch.ByteTensor(np.ones([2, 4]))).cuda()
    dtype = torch.ByteTensor
    _mask = torch.ones(int(num_class_dict[args.data]), args.n_cluster).type(dtype)
    _mask = Variable(_mask).cuda()

    for epoch in range(args.start, args.epochs):
        epoch_list.append(epoch)

        running_loss = 0.0
        running_pos = 0.0
        running_neg = 0.0
        to_zero(cluster_counter)

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            # wrap them in Variable
            inputs = Variable(inputs.cuda())

            # type of labels is Variable cuda.Longtensor
            labels = Variable(labels).cuda()
            optimizer.zero_grad()
            # centers.zero_grad()
            embed_feat = model(inputs)

            # update network weight
            loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels, _mask)
            loss.backward()
            optimizer.step()

            centers.data = normalize(centers.data)

            running_loss += loss.data[0]
            running_neg += dist_an
            running_pos += dist_ap

            if epoch == 0 and i == 0:
                print(50 * '#')
                print('Train Begin -- HA-HA-HA')
            if i % 10 == 9:
                print('[Epoch %05d Iteration %2d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
                      % (epoch + 1,  i+1, loss.data[0], inter_, dist_ap, dist_an))
        # cluster number counter show here
        print(cluster_counter)
        loss_list.append(running_loss)
        pos_list.append(running_pos / i)
        neg_list.append(running_neg / i)
        # update the _mask to make the cluster with only 1 or no member to be silent
        # _mask = Variable(torch.FloatTensor(cluster_counter) > 1).cuda()
        # cluster_distribution = torch.sum(_mask, 1).cpu().data.numpy().tolist()
        # print(cluster_distribution)
        # print('[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
        #       % (epoch + 1, running_loss, inter_, dist_ap, dist_an))

        if epoch % args.save_step == 0:
            torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
    np.savez(os.path.join(log_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list)
예제 #18
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    dir = '%s_%s_dis_%s_%s_%s_%0.2f_%s' % (args.data, args.loss, args.net,
                                           args.TNet, args.Ttype, args.lamda,
                                           args.lr)
    log_dir = os.path.join('checkpoints', dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))
    display(args)
    # Teacher Netowrk
    if args.r is None:
        Network_T = args.TNet
        model_T = models.create(Network_T, Embed_dim=args.dim)
        model_dict_T = model_T.state_dict()

        if args.data == 'cub':
            model_T = torch.load('checkpoints/cub_Tmodel.pkl')
        elif args.data == 'car':
            model_T = torch.load('checkpoints/car_Tmodel.pkl')
        elif args.data == 'product':
            model_T = torch.load('checkpoints/product_Tmodel.pkl')

    else:
        model_T = torch.load(args.r)

    model_T = model_T.cuda()
    model_T.eval()

    # Student network
    if args.r is None:
        model = models.create(args.net, Embed_dim=args.dim)
        model_dict = model.state_dict()
        if args.net == 'bn':
            pretrained_dict = torch.load(
                'pretrained_models/bn_inception-239d2248.pth')
        elif args.net == 'resnet101':
            pretrained_dict = torch.load(
                'pretrained_models/resnet101-5d3b4d8f.pth')
        elif args.net == 'resnet50':
            pretrained_dict = torch.load(
                'pretrained_models/resnet50-19c8e357.pth')
        elif args.net == 'resnet34':
            pretrained_dict = torch.load(
                'pretrained_models/resnet34-333f7ec4.pth')
        elif args.net == 'resnet18':
            pretrained_dict = torch.load(
                'pretrained_models/resnet18-5c106cde.pth')
        elif args.net == 'inception':
            pretrained_dict = torch.load(
                'pretrained_models/inception_v3_google-1a9a5a14.pth')
        else:
            print(' Oops!  That was no valid models. ')

        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

    else:
        model = torch.load(args.r)

    if args.continue_train:
        model = torch.load(log_dir + '/%d_model.pkl' % (args.start))

    model = model.cuda()

    torch.save(model, os.path.join(log_dir, 'model.pkl'))
    print('initial model is save at %s' % log_dir)

    new_param_ids = set(map(id, model.Embed.parameters()))

    new_params = [p for p in model.parameters() if id(p) in new_param_ids]

    base_params = [p for p in model.parameters() if id(p) not in new_param_ids]
    param_groups = [{
        'params': base_params,
        'lr_mult': 0.1
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    if args.loss == 'knnsoftmax':
        criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda()
    else:
        criterion = losses.create(args.loss).cuda()

    data = DataSet.create(args.data, root=None, test=False)
    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.BatchSize,
        sampler=RandomIdentitySampler(data.train,
                                      num_instances=args.num_instances),
        drop_last=True,
        num_workers=args.nThreads)

    loss_log = []
    for i in range(3):
        loss_log.append([])
    loss_dis = []
    for i in range(3):
        loss_dis.append([])
    for epoch in range(args.start, args.epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs = Variable(inputs.cuda())
            labels = Variable(labels).cuda()

            optimizer.zero_grad()

            embed_feat = model(inputs)
            embed_feat_T = model_T(inputs)

            loss_net, inter_, dist_ap, dist_an, dis_pos, dis_neg, dis = criterion(
                embed_feat, labels)
            loss_net_T, inter_T, dist_ap_T, dist_an_T, dis_pos_T, dis_neg_T, dis_T = criterion(
                embed_feat_T, labels)

            lamda = args.lamda

            if args.Ttype == 'relative':
                loss_dis[0].append(
                    torch.mean(torch.norm(dis - dis_T, p=2)).data[0])
                loss_dis[1].append(0.0)
                loss_dis[2].append(0.0)

                loss_distillation = 0.0 * torch.mean(
                    F.pairwise_distance(embed_feat, embed_feat_T))
                loss_distillation += torch.mean(torch.norm(dis - dis_T, p=2))
                loss = loss_net + lamda * loss_distillation

            elif args.Ttype == 'absolute':
                loss_dis[0].append(0.0)
                loss_dis[1].append(0.0)
                loss_dis[2].append(
                    torch.mean(F.pairwise_distance(embed_feat,
                                                   embed_feat_T)).data[0])
                loss_distillation = torch.mean(
                    F.pairwise_distance(embed_feat, embed_feat_T))
                loss = loss_net + lamda * loss_distillation

            else:
                print('This type does not exist')

            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            loss_log[0].append(loss.data[0])
            loss_log[1].append(loss_net.data[0])
            loss_log[2].append(lamda * loss_distillation.data[0])

            if epoch == 0 and i == 0:
                print(50 * '#')
                print('Train Begin -- HA-HA-HA')

        print(
            '[Epoch %05d]\t  Loss_net: %.3f \t Loss_distillation: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
            % (epoch + 1, loss_net, lamda * loss_distillation, inter_, dist_ap,
               dist_an))

        if epoch % args.save_step == 0:
            torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))

    #plot loss
    line1, = plt.plot(
        loss_log[0],
        'r-',
        label="Total loss",
    )
    line2, = plt.plot(loss_log[1], 'b-', label="KNNsoftmax loss")
    line3, = plt.plot(loss_log[2], 'g--', label="Distillation loss")
    plt.title(
        '%s_%s_dis_%s_%s_%s_%0.2f' %
        (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda))
    plt.legend([line1, line2, line3],
               ['Total loss', 'Contrastive loss', 'Distance loss'])
    plt.savefig(
        './fig/%s_%s_dis_%s_%s_%s_%0.2f.jpg' %
        (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda))
예제 #19
0
def main(args):
    s_ = time.time()

    #  训练日志保存
    log_dir = args.log_dir
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))
    display(args)

    if args.r is None:
        model = models.create(args.net)
        model = load_parameter(model)

    else:
        # resume model
        print('Resume from model at Epoch %d' % args.start)
        model = torch.load(args.r)

    model = model.cuda()
    torch.save(model, os.path.join(log_dir, 'model.pkl'))
    print('initial model is save at %s' % log_dir)
    # fine tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set.union(
        set(map(id, model.Embedding.parameters())),
        set(map(id, model.attention_blocks.parameters())))

    new_params = [p for p in model.parameters() if id(p) in new_param_ids]

    base_params = [p for p in model.parameters() if id(p) not in new_param_ids]
    param_groups = [{
        'params': base_params,
        'lr_mult': 0.0
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    if args.loss == 'bin':
        criterion = losses.create(args.loss,
                                  margin=args.margin,
                                  alpha=args.alpha).cuda()
        Div = losses.create('div').cuda()
    else:
        criterion = losses.create(args.loss).cuda()

    data = DataSet.create(args.data, root=None)
    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.BatchSize,
        sampler=RandomIdentitySampler(data.train,
                                      num_instances=args.num_instances),
        drop_last=True,
        num_workers=args.nThreads)

    # save the train information
    epoch_list = list()
    loss_list = list()
    pos_list = list()
    neg_list = list()

    for epoch in range(args.start, args.epochs):
        epoch_list.append(epoch)

        running_loss = 0.0
        divergence = 0.0
        running_pos = 0.0
        running_neg = 0.0

        if epoch == 2:
            param_groups[0]['lr_mult'] = 0.1

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            # wrap them in Variable
            inputs = Variable(inputs.cuda())

            # type of labels is Variable cuda.Longtensor
            labels = Variable(labels).cuda()

            optimizer.zero_grad()

            embed_feat = model(inputs)

            loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels)
            div = Div(embed_feat)

            loss_ = loss + args.theta * div
            if not type(loss) == torch.Tensor:
                print('One time con not back-ward')
                continue

            loss_.backward()
            optimizer.step()

            running_loss += loss.item()
            divergence += div.item()
            running_neg += dist_an
            running_pos += dist_ap

            if epoch == 0 and i == 0:
                print(50 * '#')
                print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --')

        loss_list.append(running_loss)
        pos_list.append(running_pos / i)
        neg_list.append(running_neg / i)

        print(
            '[Epoch %05d]\t Loss: %.2f \t Divergence: %.2f \t Accuracy: %.2f \t Pos-Dist: %.2f \t Neg-Dist: %.2f'
            % (epoch + 1, running_loss, divergence, inter_, dist_ap, dist_an))

        if epoch % args.save_step == 0:
            torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
    np.savez(os.path.join(log_dir, "result.npz"),
             epoch=epoch_list,
             loss=loss_list,
             pos=pos_list,
             neg=neg_list)
    t = time.time() - s_
    print('training takes %.2f hour' % (t / 3600))
def main(args):

    # s_ = time.time()
    save_dir = args.save_dir
    mkdir_if_missing(save_dir)

    sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt'))
    display(args)
    start = 0

    model = models.create(args.net, pretrained=True, dim=args.dim)  #
    model_frozen = models.create(args.net, pretrained=True, dim=args.dim)  #

    # for vgg and densenet
    if args.resume is None:
        model_dict = model.state_dict()
    else:
        # resume model
        print('load model from {}'.format(args.resume))

        model_dict = model.state_dict()
        model_dict_frozen = model_frozen.state_dict()
        chk_pt = torch.load(args.resume)
        weight = chk_pt['state_dict']
        start = chk_pt['epoch']
        pretrained_dict = {k: v for k, v in weight.items() if k in model_dict}
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

        pretrained_dict_frozen = {
            k: v
            for k, v in weight.items() if k in model_dict_frozen
        }
        model_dict_frozen.update(pretrained_dict_frozen)
        model_frozen.load_state_dict(model_dict_frozen)
        model_frozen.eval()

    model = torch.nn.DataParallel(model)
    model = model.cuda()

    model_frozen = torch.nn.DataParallel(model_frozen)
    model_frozen = model_frozen.cuda()

    # freeze BN
    if args.freeze_BN is True:
        print(40 * '#', '\n BatchNorm frozen')
        model.apply(set_bn_eval)
    else:
        print(40 * '#', 'BatchNorm NOT frozen')

    # Fine-tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids_fc_layer = set(map(id, model.module.fc_layer.parameters()))

    new_param_ids = new_param_ids_fc_layer

    new_params_fc = [
        p for p in model.module.parameters() if id(p) in new_param_ids_fc_layer
    ]
    base_params = [
        p for p in model.module.parameters() if id(p) not in new_param_ids
    ]

    frozen_params = [p for p in model_frozen.module.parameters()
                     ]  # frozen the model, but with learning_rate = 0.0
    for p in frozen_params:
        p.requires_grad = False

    # if fine-tune basenetwork, then lr_mult: 0.1. if lr_mult=0.0, then the basenetwork is not updated
    param_groups = [{
        'params': base_params,
        'lr_mult': 0.1
    }, {
        'params': new_params_fc,
        'lr_mult': 1.0
    }]

    print('initial model is save at %s' % save_dir)

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    criterion_loss = losses.create(args.loss,
                                   margin=args.margin,
                                   alpha=args.alpha,
                                   base=args.loss_base).cuda()
    CE_loss = nn.CrossEntropyLoss().cuda()
    l2_loss = L2Norm().cuda()
    similarity_loss = Similarity_preserving().cuda()

    criterion = [criterion_loss, CE_loss, l2_loss, similarity_loss]

    # Decor_loss = losses.create('decor').cuda()
    data = DataSet.create(args.data,
                          ratio=args.ratio,
                          width=args.width,
                          origin_width=args.origin_width,
                          root=args.data_root)

    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.batch_size,
        sampler=FastRandomIdentitySampler(data.train,
                                          num_instances=args.num_instances),
        drop_last=True,
        pin_memory=True,
        num_workers=args.nThreads)

    # save the train information
    best_accuracy = 0

    model_list = [model, model_frozen]

    if args.Incremental_flag == False:
        print(
            "######################This is non-incremental learning! ########################"
        )
    if args.Incremental_flag == True:
        print(
            "#########################This is incremental learning! #########################"
        )

    else:
        NotImplementedError()

    for epoch in range(start, args.epochs):

        accuracy = train(epoch=epoch,
                         model=model_list,
                         criterion=criterion,
                         optimizer=optimizer,
                         train_loader=train_loader,
                         args=args)

        if (epoch + 1) % args.save_step == 0 or epoch == 0:
            if use_gpu:
                state_dict = model.module.state_dict(
                )  # save the parameters from updated model
            else:
                state_dict = model.state_dict()

            is_best = accuracy > best_accuracy
            best_accuracy = max(accuracy, best_accuracy)

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': (epoch + 1),
            },
                            is_best,
                            fpath=osp.join(
                                args.save_dir,
                                'ckp_ep' + str(epoch + 1) + '.pth.tar'))
예제 #21
0
파일: train.py 프로젝트: yyht/Deep_metric
def main(args):
    # s_ = time.time()

    save_dir = args.save_dir
    mkdir_if_missing(save_dir)

    sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt'))
    display(args)
    start = 0

    model = models.create(args.net, pretrained=True, dim=args.dim)

    # for vgg and densenet
    if args.resume is None:
        model_dict = model.state_dict()

    else:
        # resume model
        print('load model from {}'.format(args.resume))
        chk_pt = load_checkpoint(args.resume)
        weight = chk_pt['state_dict']
        start = chk_pt['epoch']
        model.load_state_dict(weight)

    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # freeze BN
    if args.freeze_BN is True:
        print(40 * '#', '\n BatchNorm frozen')
        model.apply(set_bn_eval)
    else:
        print(40 * '#', 'BatchNorm NOT frozen')

    # Fine-tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.module.classifier.parameters()))

    new_params = [
        p for p in model.module.parameters() if id(p) in new_param_ids
    ]

    base_params = [
        p for p in model.module.parameters() if id(p) not in new_param_ids
    ]

    param_groups = [{
        'params': base_params,
        'lr_mult': 0.0
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    print('initial model is save at %s' % save_dir)

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    criterion = losses.create(args.loss,
                              margin=args.margin,
                              alpha=args.alpha,
                              base=args.loss_base).cuda()

    # Decor_loss = losses.create('decor').cuda()
    data = DataSet.create(args.data,
                          ratio=args.ratio,
                          width=args.width,
                          origin_width=args.origin_width,
                          root=args.data_root)

    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.batch_size,
        sampler=FastRandomIdentitySampler(data.train,
                                          num_instances=args.num_instances),
        drop_last=True,
        pin_memory=True,
        num_workers=args.nThreads)

    # save the train information

    for epoch in range(start, args.epochs):

        train(epoch=epoch,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              train_loader=train_loader,
              args=args)

        if epoch == 1:
            optimizer.param_groups[0]['lr_mul'] = 0.1

        if (epoch + 1) % args.save_step == 0 or epoch == 0:
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': (epoch + 1),
            },
                            is_best=False,
                            fpath=osp.join(
                                args.save_dir,
                                'ckp_ep' + str(epoch + 1) + '.pth.tar'))
예제 #22
0
def train_task(args, train_loader, current_task, prototype={}, pre_index=0):
    num_class_per_task = (args.num_class - args.nb_cl_fg) // args.num_task
    task_range = list(
        range(args.nb_cl_fg + (current_task - 1) * num_class_per_task,
              args.nb_cl_fg + current_task * num_class_per_task))
    if num_class_per_task == 0:
        pass  # JT
    else:
        old_task_factor = args.nb_cl_fg // num_class_per_task + current_task - 1
    log_dir = os.path.join(args.ckpt_dir, args.log_dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(
        os.path.join(log_dir, 'log_task{}.txt'.format(current_task)))
    tb_writer = SummaryWriter(log_dir)
    display(args)
    # One-hot encoding or attribute encoding
    if 'imagenet' in args.data:
        model = models.create('resnet18_imagenet',
                              pretrained=False,
                              feat_dim=args.feat_dim,
                              embed_dim=args.num_class)
    elif 'cifar' in args.data:
        model = models.create('resnet18_cifar',
                              pretrained=False,
                              feat_dim=args.feat_dim,
                              embed_dim=args.num_class)

    if current_task > 0:
        model = torch.load(
            os.path.join(
                log_dir, 'task_' + str(current_task - 1).zfill(2) +
                '_%d_model.pkl' % int(args.epochs - 1)))
        model_old = deepcopy(model)
        model_old.eval()
        model_old = freeze_model(model_old)

    model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=args.lr_decay_step,
                       gamma=args.lr_decay)

    loss_mse = torch.nn.MSELoss(reduction='sum')

    # Loss weight for gradient penalty used in W-GAN
    lambda_gp = args.lambda_gp
    lambda_lwf = args.gan_tradeoff
    # Initialize generator and discriminator
    if current_task == 0:
        generator = Generator(feat_dim=args.feat_dim,
                              latent_dim=args.latent_dim,
                              hidden_dim=args.hidden_dim,
                              class_dim=args.num_class)
        discriminator = Discriminator(feat_dim=args.feat_dim,
                                      hidden_dim=args.hidden_dim,
                                      class_dim=args.num_class)
    else:
        generator = torch.load(
            os.path.join(
                log_dir, 'task_' + str(current_task - 1).zfill(2) +
                '_%d_model_generator.pkl' % int(args.epochs_gan - 1)))
        discriminator = torch.load(
            os.path.join(
                log_dir, 'task_' + str(current_task - 1).zfill(2) +
                '_%d_model_discriminator.pkl' % int(args.epochs_gan - 1)))
        generator_old = deepcopy(generator)
        generator_old.eval()
        generator_old = freeze_model(generator_old)

    generator = generator.cuda()
    discriminator = discriminator.cuda()

    optimizer_G = torch.optim.Adam(generator.parameters(),
                                   lr=args.gan_lr,
                                   betas=(0.5, 0.999))
    optimizer_D = torch.optim.Adam(discriminator.parameters(),
                                   lr=args.gan_lr,
                                   betas=(0.5, 0.999))
    scheduler_G = StepLR(optimizer_G, step_size=200, gamma=0.3)
    scheduler_D = StepLR(optimizer_D, step_size=200, gamma=0.3)

    # Y_onehot is used to generate one-hot encoding
    y_onehot = torch.FloatTensor(args.BatchSize, args.num_class)

    for p in generator.parameters():  # set requires_grad to False
        p.requires_grad = False

    ###############################################################Feature extractor training####################################################
    if current_task > 0:
        model = model.eval()

    for epoch in range(args.epochs):

        loss_log = {'C/loss': 0.0, 'C/loss_aug': 0.0, 'C/loss_cls': 0.0}
        scheduler.step()
        for i, data in enumerate(train_loader, 0):
            inputs1, labels1 = data
            inputs1, labels1 = inputs1.cuda(), labels1.cuda()

            loss = torch.zeros(1).cuda()
            loss_cls = torch.zeros(1).cuda()
            loss_aug = torch.zeros(1).cuda()
            optimizer.zero_grad()

            inputs, labels = inputs1, labels1  #!

            ### Classification loss
            embed_feat = model(inputs)
            if current_task == 0:
                soft_feat = model.embed(embed_feat)
                loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, labels)
                loss += loss_cls
            else:
                embed_feat_old = model_old(inputs)

            ### Feature Extractor Loss
            if current_task > 0:
                loss_aug = torch.dist(embed_feat, embed_feat_old, 2)
                loss += args.tradeoff * loss_aug * old_task_factor

            ### Replay and Classification Loss
            if current_task > 0:
                embed_sythesis = []
                embed_label_sythesis = []
                ind = list(range(len(pre_index)))

                if args.mean_replay:
                    for _ in range(args.BatchSize):
                        np.random.shuffle(ind)
                        tmp = prototype['class_mean'][
                            ind[0]] + np.random.normal(
                            ) * prototype['class_std'][ind[0]]
                        embed_sythesis.append(tmp)
                        embed_label_sythesis.append(
                            prototype['class_label'][ind[0]])
                    embed_sythesis = np.asarray(embed_sythesis)
                    embed_label_sythesis = np.asarray(embed_label_sythesis)
                    embed_sythesis = torch.from_numpy(embed_sythesis).cuda()
                    embed_label_sythesis = torch.from_numpy(
                        embed_label_sythesis)
                else:
                    for _ in range(args.BatchSize):
                        np.random.shuffle(ind)
                        embed_label_sythesis.append(pre_index[ind[0]])
                    embed_label_sythesis = np.asarray(embed_label_sythesis)
                    embed_label_sythesis = torch.from_numpy(
                        embed_label_sythesis)
                    y_onehot.zero_()
                    y_onehot.scatter_(1, embed_label_sythesis[:, None], 1)
                    syn_label_pre = y_onehot.cuda()

                    z = torch.Tensor(
                        np.random.normal(
                            0, 1, (args.BatchSize, args.latent_dim))).cuda()

                    embed_sythesis = generator(z, syn_label_pre)

                embed_sythesis = torch.cat((embed_feat, embed_sythesis))
                embed_label_sythesis = torch.cat(
                    (labels, embed_label_sythesis.cuda()))
                soft_feat_syt = model.embed(embed_sythesis)
                # real samples,   exemplars,      synthetic samples
                #           batch_size1       batch_size2

                batch_size1 = inputs1.shape[0]
                batch_size2 = embed_feat.shape[0]

                loss_cls = torch.nn.CrossEntropyLoss()(
                    soft_feat_syt[:batch_size1],
                    embed_label_sythesis[:batch_size1])

                loss_cls_old = torch.nn.CrossEntropyLoss()(
                    soft_feat_syt[batch_size2:],
                    embed_label_sythesis[batch_size2:])

                loss_cls += loss_cls_old * old_task_factor
                loss_cls /= args.nb_cl_fg // num_class_per_task + current_task
                loss += loss_cls

            loss.backward()
            optimizer.step()

            loss_log['C/loss'] += loss.item()
            loss_log['C/loss_cls'] += loss_cls.item()
            loss_log['C/loss_aug'] += args.tradeoff * loss_aug.item(
            ) if args.tradeoff != 0 else 0
            del loss_cls
            if epoch == 0 and i == 0:
                print(50 * '#')

        print('[Metric Epoch %05d]\t Total Loss: %.3f \t LwF Loss: %.3f \t' %
              (epoch + 1, loss_log['C/loss'], loss_log['C/loss_aug']))
        for k, v in loss_log.items():
            if v != 0:
                tb_writer.add_scalar(
                    'Task {} - Classifier/{}'.format(current_task, k), v,
                    epoch + 1)

        if epoch == args.epochs - 1:
            torch.save(
                model,
                os.path.join(
                    log_dir, 'task_' + str(current_task).zfill(2) +
                    '_%d_model.pkl' % epoch))

    ################################################################## W-GAN Training stage####################################################
    model = model.eval()
    for p in model.parameters():  # set requires_grad to False
        p.requires_grad = False
    for p in generator.parameters():  # set requires_grad to False
        p.requires_grad = True
    criterion_softmax = torch.nn.CrossEntropyLoss().cuda()
    if current_task != args.num_task:
        for epoch in range(args.epochs_gan):
            loss_log = {
                'D/loss': 0.0,
                'D/new_rf': 0.0,
                'D/new_lbls': 0.0,
                'D/new_gp': 0.0,
                'D/prev_rf': 0.0,
                'D/prev_lbls': 0.0,
                'D/prev_gp': 0.0,
                'G/loss': 0.0,
                'G/new_rf': 0.0,
                'G/new_lbls': 0.0,
                'G/prev_rf': 0.0,
                'G/prev_mse': 0.0,
                'G/new_classifier': 0.0,
                'E/kld': 0.0,
                'E/mse': 0.0,
                'E/loss': 0.0
            }
            scheduler_D.step()
            scheduler_G.step()
            for i, data in enumerate(train_loader, 0):
                for p in discriminator.parameters():
                    p.requires_grad = True
                inputs, labels = data

                inputs = Variable(inputs.cuda())

                ############################# Train Disciminator###########################
                optimizer_D.zero_grad()
                real_feat = model(inputs)
                z = torch.Tensor(
                    np.random.normal(
                        0, 1, (args.BatchSize, args.latent_dim))).cuda()

                y_onehot.zero_()
                y_onehot.scatter_(1, labels[:, None], 1)
                syn_label = y_onehot.cuda()
                fake_feat = generator(z, syn_label)
                fake_validity, _ = discriminator(fake_feat, syn_label)
                real_validity, disc_real_acgan = discriminator(
                    real_feat, syn_label)

                # Adversarial loss
                d_loss_rf = -torch.mean(real_validity) + torch.mean(
                    fake_validity)
                gradient_penalty = compute_gradient_penalty(
                    discriminator, real_feat, fake_feat, syn_label).mean()
                d_loss_lbls = criterion_softmax(disc_real_acgan, labels.cuda())
                d_loss = d_loss_rf + lambda_gp * gradient_penalty

                d_loss.backward()
                optimizer_D.step()
                loss_log['D/loss'] += d_loss.item()
                loss_log['D/new_rf'] += d_loss_rf.item()
                loss_log['D/new_lbls'] += 0  #!!!
                loss_log['D/new_gp'] += gradient_penalty.item(
                ) if lambda_gp != 0 else 0
                del d_loss_rf, d_loss_lbls
                ############################# Train Generaator###########################
                # Train the generator every n_critic steps
                if i % args.n_critic == 0:
                    for p in discriminator.parameters():
                        p.requires_grad = False
                    ############################# Train GAN###########################
                    optimizer_G.zero_grad()
                    # Generate a batch of images
                    fake_feat = generator(z, syn_label)

                    # Loss measures generator's ability to fool the discriminator
                    # Train on fake images
                    fake_validity, disc_fake_acgan = discriminator(
                        fake_feat, syn_label)
                    if current_task == 0:
                        loss_aug = 0 * torch.sum(fake_validity)
                    else:
                        ind = list(range(len(pre_index)))
                        embed_label_sythesis = []
                        for _ in range(args.BatchSize):
                            np.random.shuffle(ind)
                            embed_label_sythesis.append(pre_index[ind[0]])

                        embed_label_sythesis = np.asarray(embed_label_sythesis)
                        embed_label_sythesis = torch.from_numpy(
                            embed_label_sythesis)
                        y_onehot.zero_()
                        y_onehot.scatter_(1, embed_label_sythesis[:, None], 1)
                        syn_label_pre = y_onehot.cuda()

                        pre_feat = generator(z, syn_label_pre)
                        pre_feat_old = generator_old(z, syn_label_pre)
                        loss_aug = loss_mse(pre_feat, pre_feat_old)
                    g_loss_rf = -torch.mean(fake_validity)
                    g_loss_lbls = criterion_softmax(disc_fake_acgan,
                                                    labels.cuda())
                    g_loss = g_loss_rf \
                                + lambda_lwf*old_task_factor * loss_aug
                    loss_log['G/loss'] += g_loss.item()
                    loss_log['G/new_rf'] += g_loss_rf.item()
                    loss_log['G/new_lbls'] += 0  #!
                    loss_log['G/new_classifier'] += 0  #!
                    loss_log['G/prev_mse'] += loss_aug.item(
                    ) if lambda_lwf != 0 else 0
                    del g_loss_rf, g_loss_lbls
                    g_loss.backward()
                    optimizer_G.step()
            print(
                '[GAN Epoch %05d]\t D Loss: %.3f \t G Loss: %.3f \t LwF Loss: %.3f'
                % (epoch + 1, loss_log['D/loss'], loss_log['G/loss'],
                   loss_log['G/prev_rf']))
            for k, v in loss_log.items():
                if v != 0:
                    tb_writer.add_scalar(
                        'Task {} - GAN/{}'.format(current_task, k), v,
                        epoch + 1)

            if epoch == args.epochs_gan - 1:
                torch.save(
                    generator,
                    os.path.join(
                        log_dir, 'task_' + str(current_task).zfill(2) +
                        '_%d_model_generator.pkl' % epoch))
                torch.save(
                    discriminator,
                    os.path.join(
                        log_dir, 'task_' + str(current_task).zfill(2) +
                        '_%d_model_discriminator.pkl' % epoch))
    tb_writer.close()

    prototype = compute_prototype(model, train_loader)  #!
    return prototype
예제 #23
0
def train_task(args, train_loader, current_task, prototype={}, pre_index=0):
    num_class_per_task = (args.num_class - args.nb_cl_fg) // args.num_task
    task_range = list(
        range(args.nb_cl_fg + (current_task - 1) * num_class_per_task,
              args.nb_cl_fg + current_task * num_class_per_task))
    if num_class_per_task == 0:
        pass  # JT
    else:
        old_task_factor = args.nb_cl_fg // num_class_per_task + current_task - 1
        print(old_task_factor)
    log_dir = os.path.join(args.ckpt_dir, args.log_dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(
        os.path.join(log_dir, 'log_task{}.txt'.format(current_task)))
    tb_writer = SummaryWriter(log_dir)
    display(args)

    if 'miniimagenet' in args.data:
        model = models.create('resnet18_imagenet',
                              pretrained=False,
                              feat_dim=args.feat_dim,
                              embed_dim=args.num_class,
                              hidden_dim=256,
                              norm=True)
    elif 'cifar100' in args.data:
        model = models.create('resnet18_cifar',
                              pretrained=False,
                              feat_dim=args.feat_dim,
                              hidden_dim=256,
                              embed_dim=args.num_class,
                              norm=True)

    if current_task > 0:
        if 'miniimagenet' in args.data:
            model = models.create('resnet18_imagenet',
                                  pretrained=False,
                                  feat_dim=args.feat_dim,
                                  embed_dim=args.num_class,
                                  hidden_dim=256,
                                  norm=True)
        elif 'cifar100' in args.data:
            model = models.create('resnet18_cifar',
                                  pretrained=False,
                                  feat_dim=args.feat_dim,
                                  hidden_dim=256,
                                  embed_dim=args.num_class,
                                  norm=True)
        model = torch.load(
            os.path.join(
                log_dir, 'task_' + str(current_task - 1).zfill(2) +
                '_%d_model.pkl' % int(args.epochs - 1)))
        model_old = deepcopy(model)
        model_old.eval()
        model_old = freeze_model(model_old)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # model = model.cuda()
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=args.lr_decay_step,
                       gamma=args.lr_decay)

    loss_mse = torch.nn.MSELoss(reduction='sum')

    # # Loss weight for gradient penalty used in W-GAN
    lambda_gp = args.lambda_gp
    lambda_lwf = args.gan_tradeoff
    # Initialize generator and discriminator
    if current_task == 0:
        generator = Generator(feat_dim=args.feat_dim,
                              latent_dim=args.latent_dim,
                              hidden_dim=args.hidden_dim,
                              class_dim=args.num_class,
                              norm=True)
        discriminator = Discriminator(feat_dim=args.feat_dim,
                                      hidden_dim=args.hidden_dim,
                                      class_dim=args.num_class)
    else:
        generator = torch.load(
            os.path.join(
                log_dir, 'task_' + str(current_task - 1).zfill(2) +
                '_%d_model_generator.pkl' % int(args.epochs_gan - 1)))
        discriminator = torch.load(
            os.path.join(
                log_dir, 'task_' + str(current_task - 1).zfill(2) +
                '_%d_model_discriminator.pkl' % int(args.epochs_gan - 1)))
        generator_old = deepcopy(generator)
        generator_old.eval()
        generator_old = freeze_model(generator_old)

    cuda = torch.cuda.is_available()
    FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
    g_len = 0
    d_len = 0
    for p in generator.parameters():
        g_len += 1
    for p in discriminator.parameters():
        d_len += 1
    learned_lrs = []
    params = []
    for i in range(args.update_step):
        g_lrs = [
            Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True)
        ] * g_len  # len(generator.parameters())
        d_lrs = [
            Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True)
        ] * d_len  # len(discriminator.parameters())
        learned_lrs.append((g_lrs, d_lrs))
        for param_list in learned_lrs[i]:
            params += param_list

    generator = generator.to(device)
    discriminator = discriminator.to(device)

    optimizer_G = torch.optim.Adam(generator.parameters(),
                                   lr=args.gan_lr,
                                   betas=(0.5, 0.999))
    optimizer_D = torch.optim.Adam(discriminator.parameters(),
                                   lr=args.gan_lr,
                                   betas=(0.5, 0.999))
    optimizer_lr = torch.optim.Adam(params, lr=args.lr)

    scheduler_G = StepLR(optimizer_G, step_size=150, gamma=0.3)
    scheduler_D = StepLR(optimizer_D, step_size=150, gamma=0.3)

    for p in generator.parameters():  # set requires_grad to False
        p.requires_grad = False

    for epoch in range(args.epochs):

        loss_log = {
            'C/loss': 0.0,
            'C/loss_aug': 0.0,
            'C/loss_cls': 0.0,
            'C/loss_cls_q': 0.0
        }

        ##### MAML on feature extraction

        for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(train_loader):
            x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to(
                device), x_qry.to(device), y_qry.to(device)

            loss = torch.zeros(1).to(device)
            loss_cls = torch.zeros(1).to(device)
            loss_aug = torch.zeros(1).to(device)
            loss_tmp = torch.zeros(1).to(device)

            BatchSize, setsz, c_, h, w = x_spt.size()
            querysz = x_qry.size(1)

            losses_q = [0 for _ in range(args.update_step + 1)
                        ]  # losses_q[i] is the loss on step i
            corrects = [0.0 for _ in range(args.update_step + 1)]
            correct_s = [0.0 for _ in range(args.update_step + 1)]

            y_onehot = torch.cuda.FloatTensor(setsz, args.num_class)
            y_onehot_q = torch.cuda.FloatTensor(querysz, args.num_class)

            for i in range(args.BatchSize):
                # 1. run the i-th task and compute loss for k=0
                embed_feat = model(x_spt[i])
                if current_task == 0:
                    soft_feat = model.embed(embed_feat)
                    loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, y_spt[i])
                    loss = loss.clone() + loss_cls
                else:
                    embed_feat_old = model_old(x_spt[i])

                ### Feature Extractor Loss
                if current_task > 0:
                    loss_aug = torch.dist(embed_feat, embed_feat_old, 2)
                    loss = loss.clone(
                    ) + args.tradeoff * loss_aug * old_task_factor

                ### Replay and Classification Loss
                if current_task > 0:
                    embed_sythesis = []
                    embed_label_sythesis = []
                    ind = list(range(len(pre_index)))

                    if args.mean_replay:
                        for _ in range(setsz):
                            np.random.shuffle(ind)
                            tmp = prototype['class_mean'][
                                ind[0]] + np.random.normal(
                                ) * prototype['class_std'][ind[0]]
                            embed_sythesis.append(tmp)
                            embed_label_sythesis.append(
                                prototype['class_label'][ind[0]])
                        embed_sythesis = np.asarray(embed_sythesis)
                        embed_label_sythesis = np.asarray(embed_label_sythesis)
                        embed_sythesis = torch.from_numpy(embed_sythesis).to(
                            device)
                        embed_label_sythesis = torch.from_numpy(
                            embed_label_sythesis)
                    else:
                        for _ in range(setsz):
                            np.random.shuffle(ind)
                            embed_label_sythesis.append(pre_index[ind[0]])
                        embed_label_sythesis = np.asarray(embed_label_sythesis)
                        embed_label_sythesis = torch.from_numpy(
                            embed_label_sythesis).to(device)
                        y_onehot.zero_()
                        y_onehot.scatter(1, embed_label_sythesis[:, None], 1)
                        syn_label_pre = y_onehot.to(device)

                        z = torch.Tensor(
                            np.random.normal(
                                0, 1, (setsz, args.latent_dim))).to(device)

                        embed_sythesis = generator(z, syn_label_pre)

                    embed_sythesis = torch.cat((embed_feat, embed_sythesis))
                    embed_label_sythesis = torch.cat(
                        (y_spt[i], embed_label_sythesis.to(device)))
                    soft_feat_syt = model.embed(embed_sythesis)

                    batch_size1 = x_spt[i].shape[0]
                    batch_size2 = embed_feat.shape[0]

                    loss_cls = torch.nn.CrossEntropyLoss()(
                        soft_feat_syt[:batch_size1],
                        embed_label_sythesis[:batch_size1])
                    loss_cls_old = torch.nn.CrossEntropyLoss()(
                        soft_feat_syt[batch_size2:],
                        embed_label_sythesis[batch_size2:])

                    loss_cls += loss_cls_old * old_task_factor
                    loss_cls /= args.nb_cl_fg // num_class_per_task + current_task
                    loss += loss_cls

                grad = torch.autograd.grad(loss,
                                           model.parameters(),
                                           create_graph=True,
                                           retain_graph=True)

                # this is the loss and accuracy before first update
                with torch.no_grad():
                    # [setsz, nway]
                    embed_feat_q = model(x_qry[i])
                    soft_feat_q = model.embed(embed_feat_q)

                    loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i])
                    losses_q[0] += loss_q

                    embed_feat = model(x_spt[i])
                    soft_feat = model.embed(embed_feat)

                    pred_s = F.softmax(soft_feat, dim=1).argmax(dim=1)
                    corr = torch.eq(pred_s,
                                    y_spt[i]).sum().item()  # convert to numpy
                    correct_s[0] = correct_s[0] + corr

                    pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1)
                    correct = torch.eq(pred_q, y_qry[i]).sum().item()
                    corrects[0] = corrects[0] + correct

                # this is the loss and accuracy after the first update
                with torch.no_grad():
                    # [setsz, nway]
                    for e, param in enumerate(model.parameters(), 0):
                        param.data -= args.update_lr * grad[e]

                    embed_feat_q = model(x_qry[i])
                    soft_feat_q = model.embed(embed_feat_q)

                    loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i])
                    losses_q[1] += loss_q
                    # [setsz]
                    embed_feat = model(x_spt[i])
                    soft_feat = model.embed(embed_feat)

                    pred_s = F.softmax(soft_feat, dim=1).argmax(dim=1)
                    corr = torch.eq(pred_s,
                                    y_spt[i]).sum().item()  # convert to numpy
                    correct_s[1] = correct_s[1] + corr

                    pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1)
                    correct = torch.eq(pred_q, y_qry[i]).sum().item()
                    corrects[1] = corrects[1] + correct

                for k in range(1, args.update_step):
                    # 1. run the i-th task and compute loss for k=1~K-1
                    embed_feat = model(x_spt[i])

                    loss = torch.zeros(1).to(device)
                    if current_task > 0:
                        embed_feat_old = model_old(x_spt[i])
                        loss_aug = torch.dist(embed_feat, embed_feat_old, 2)
                        loss += args.tradeoff * loss_aug * old_task_factor
                        embed_sythesis = []
                        embed_label_sythesis = []
                        ind = list(range(len(pre_index)))
                        if args.mean_replay:
                            for _ in range(setsz):
                                np.random.shuffle(ind)
                                tmp = prototype['class_mean'][
                                    ind[0]] + np.random.normal(
                                    ) * prototype['class_std'][ind[0]]
                                embed_sythesis.append(tmp)
                                embed_label_sythesis.append(
                                    prototype['class_label'][ind[0]])
                            embed_sythesis = np.asarray(embed_sythesis)
                            embed_label_sythesis = np.asarray(
                                embed_label_sythesis)
                            embed_sythesis = torch.from_numpy(
                                embed_sythesis).to(device)
                            embed_label_sythesis = torch.from_numpy(
                                embed_label_sythesis)
                        else:
                            for _ in range(setsz):
                                np.random.shuffle(ind)
                                embed_label_sythesis.append(pre_index[ind[0]])
                            embed_label_sythesis = np.asarray(
                                embed_label_sythesis)
                            embed_label_sythesis = torch.from_numpy(
                                embed_label_sythesis).to(device)
                            y_onehot.zero_()
                            y_onehot.scatter(1, embed_label_sythesis[:, None],
                                             1)
                            syn_label_pre = y_onehot.to(device)

                            z = torch.Tensor(
                                np.random.normal(
                                    0, 1, (setsz, args.latent_dim))).to(device)
                            embed_sythesis = generator(z, syn_label_pre)

                        embed_sythesis = torch.cat(
                            (embed_feat, embed_sythesis))
                        embed_label_sythesis = torch.cat(
                            (y_spt[i], embed_label_sythesis.to(device)))

                        soft_feat_syt = model.embed(embed_sythesis)

                        batch_size1 = x_spt[i].shape[0]
                        batch_size2 = embed_feat.shape[0]

                        loss_cls = torch.nn.CrossEntropyLoss()(
                            soft_feat_syt[:batch_size1],
                            embed_label_sythesis[:batch_size1])
                        loss_cls_old = torch.nn.CrossEntropyLoss()(
                            soft_feat_syt[batch_size2:],
                            embed_label_sythesis[batch_size2:])
                        loss_cls += loss_cls_old * old_task_factor
                        loss_cls /= args.nb_cl_fg // num_class_per_task + current_task
                        loss += loss_cls
                    else:
                        soft_feat = model.embed(embed_feat)
                        loss_cls = torch.nn.CrossEntropyLoss()(soft_feat,
                                                               y_spt[i])
                        loss += loss_cls
                    # 2. compute grad on theta_pi
                    grad = torch.autograd.grad(loss,
                                               model.parameters(),
                                               create_graph=True,
                                               retain_graph=True,
                                               allow_unused=True)
                    # 3. theta_pi = theta_pi - train_lr * grad
                    for e, param in enumerate(model.parameters(), 0):
                        param.data -= args.update_lr * grad[e]
                    embed_feat = model(x_spt[i])
                    soft_feat = model.embed(embed_feat)
                    # soft_feat = mlp(embed_feat)
                    embed_feat_q = model(x_qry[i])
                    soft_feat_q = model.embed(embed_feat_q)

                    # loss_q will be overwritten and just keep the loss_q on last update step.
                    loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i])
                    losses_q[k + 1] += loss_q

                    with torch.no_grad():
                        pred_s = F.softmax(soft_feat, dim=1).argmax(dim=1)
                        corr = torch.eq(
                            pred_s, y_spt[i]).sum().item()  # convert to numpy
                        correct_s[k + 1] = correct_s[k + 1] + corr

                        pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1)
                        correct = torch.eq(
                            pred_q, y_qry[i]).sum().item()  # convert to numpy
                        corrects[k + 1] = corrects[k + 1] + correct

            # end of all tasks
            # sum over all losses on query set across all tasks
            loss_q = losses_q[-1] / BatchSize
            loss_q = Variable(loss_q, requires_grad=True)

            # optimize theta parameters
            optimizer.zero_grad()
            loss_q.backward()
            optimizer.step()
            scheduler.step()

            accs = np.array([float(c)
                             for c in corrects]) / float(querysz * BatchSize)
            accs_spt = np.array([float(c) for c in correct_s]) / float(
                setsz * BatchSize)
            loss_log['C/loss'] += loss.item()
            loss_log['C/loss_cls'] += loss_cls.item()
            loss_log['C/loss_aug'] += args.tradeoff * loss_aug.item(
            ) if args.tradeoff != 0 else 0
            loss_log['C/loss_cls_q'] += loss_q.item()

            del loss_cls
            del loss_q

        print(
            '[Metric Epoch %05d]\t Total Loss: %.3f \t LwF Loss: %.3f \t Spt Accuracy FeatureX: %.3f \t Query Loss: %.3f \t Query Accuracy FeatureX: %.3f \t'
            % (epoch + 1, loss_log['C/loss'], loss_log['C/loss_aug'],
               accs_spt[-1], loss_log['C/loss_cls_q'], accs[-1]))
        for k, v in loss_log.items():
            if v != 0:
                tb_writer.add_scalar(
                    'Task {} - Classifier/{}'.format(current_task, k), v,
                    epoch + 1)

        tb_writer.add_scalar('Task {}'.format(current_task), accs[-1],
                             epoch + 1)
        if epoch == args.epochs - 1:
            torch.save(
                model,
                os.path.join(
                    log_dir, 'task_' + str(current_task).zfill(2) +
                    '_%d_model.pkl' % epoch))


################# feature extraction training end ########################

############################################## GAN Training ####################################################
    model = model.eval()

    for p in model.parameters():  # set requires_grad to False
        p.requires_grad = False
    for p in generator.parameters():  # set requires_grad to True
        p.requires_grad = True
    for p in discriminator.parameters():
        p.requires_grad = True
    criterion_softmax = torch.nn.CrossEntropyLoss().to(device)
    if current_task != args.num_task:
        for epoch in range(args.epochs_gan):
            loss_log = {
                'D/loss': 0.0,
                'D/new_rf': 0.0,
                'D/new_lbls': 0.0,
                'D/new_gp': 0.0,
                'D/prev_rf': 0.0,
                'D/prev_lbls': 0.0,
                'D/prev_gp': 0.0,
                'D/loss_q': 0.0,
                'D/new_rf_q': 0.0,
                'D/new_lbls_q': 0.0,
                'D/new_gp_q': 0.0,
                'G/loss': 0.0,
                'G/new_rf': 0.0,
                'G/new_lbls': 0.0,
                'G/prev_rf': 0.0,
                'G/prev_mse': 0.0,
                'G/new_classifier': 0.0,
                'G/loss_q': 0.0,
                'G/new_rf_q': 0.0,
                'G/new_lbls_q': 0.0,
                'G/new_gp_q': 0.0,
                'E/kld': 0.0,
                'E/mse': 0.0,
                'E/loss': 0.0
            }

            for step, (x_spt, y_spt, x_qry,
                       y_qry) in enumerate(train_loader, 0):
                x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to(
                    device), x_qry.to(device), y_qry.to(device)

                BatchSize, setsz, c_, h, w = x_spt.size()
                querysz = x_qry.size(1)

                d_losses_q = [0.0 for _ in range(args.update_step)]
                g_losses_q = [0.0 for _ in range(args.update_step)]

                y_onehot = torch.cuda.FloatTensor(setsz, args.num_class)
                y_onehot_q = torch.cuda.FloatTensor(querysz, args.num_class)
                y_onehot_pre = torch.cuda.FloatTensor(setsz, args.num_class)

                for i in range(args.BatchSize):  # This is inner loop not task
                    inputs = Variable(x_spt[i])
                    labels = y_spt[i]

                    real_feat = model(inputs)
                    z = torch.Tensor(
                        np.random.normal(0, 1,
                                         (setsz, args.latent_dim))).to(device)

                    labels_q = y_qry[i]
                    real_feat_q = model(x_qry[i])

                    z_q = torch.Tensor(
                        np.random.normal(
                            0, 1, (querysz, args.latent_dim))).to(device)

                    y_onehot.zero_()
                    y_onehot.scatter(1, labels[:, None], 1)
                    syn_label = y_onehot.to(device)
                    y_onehot_q.zero_()
                    y_onehot_q.scatter(1, labels_q[:, None], 1)
                    syn_label_q = y_onehot_q.to(device)

                    ############################# Train MetaGAN ###########################

                    for k in range(args.update_step):

                        fake_feat = generator(z, syn_label)

                        fake_validity, disc_fake_acgan = discriminator(
                            fake_feat, syn_label)
                        real_validity, disc_real_acgan = discriminator(
                            real_feat, syn_label)

                        if current_task == 0:
                            loss_aug = 0 * torch.sum(fake_validity)
                        else:
                            ind = list(range(len(pre_index)))
                            embed_label_sythesis = []
                            for _ in range(setsz):
                                np.random.shuffle(ind)
                                embed_label_sythesis.append(pre_index[ind[0]])

                            embed_label_sythesis = np.asarray(
                                embed_label_sythesis)
                            embed_label_sythesis = torch.from_numpy(
                                embed_label_sythesis)
                            y_onehot_pre.zero_()
                            y_onehot_pre.scatter(
                                1, embed_label_sythesis[:, None].to(device), 1)
                            syn_label_pre = y_onehot_pre.to(device)

                            pre_feat = generator(z, syn_label_pre)
                            pre_feat_old = generator_old(z, syn_label_pre)
                            loss_aug = loss_mse(pre_feat, pre_feat_old)

                        # Adversarial loss (wasserstein)

                        g_loss_lbls = criterion_softmax(
                            disc_fake_acgan, labels.to(device))
                        d_loss_rf = -torch.mean(real_validity) + torch.mean(
                            fake_validity)
                        d_gradient_penalty = compute_gradient_penalty(
                            discriminator, real_feat, fake_feat,
                            syn_label).mean()
                        d_loss_lbls = criterion_softmax(
                            disc_real_acgan, labels.to(device))
                        d_loss = d_loss_rf + lambda_gp * d_gradient_penalty + 0.5 * (
                            d_loss_lbls + g_loss_lbls)

                        g_loss_rf = -torch.mean(fake_validity)
                        g_loss = g_loss_rf + lambda_lwf * old_task_factor * loss_aug + g_loss_lbls

                        grad_d = torch.autograd.grad(
                            d_loss,
                            discriminator.parameters(),
                            create_graph=True,
                            retain_graph=True)
                        grad_g = torch.autograd.grad(g_loss,
                                                     generator.parameters(),
                                                     create_graph=True,
                                                     retain_graph=True)

                        grad_d = clip_grad_by_norm_(grad_d, max_norm=5.0)
                        grad_g = clip_grad_by_norm_(grad_g, max_norm=5)

                        g_lr, d_lr = learned_lrs[k]

                        for e, param in enumerate(discriminator.parameters(),
                                                  0):
                            param.data = param.data.clone() - d_lr[e] * grad_d[
                                e]  # args.update_lr * grad_d[e]
                        for e, param in enumerate(generator.parameters(), 0):
                            param.data = param.data.clone() - g_lr[e] * grad_g[
                                e]  # args.update_lr * grad_g[e]

                        fake_feat_q = generator(z_q, syn_label_q)
                        fake_validity_q, disc_fake_acgan_q = discriminator(
                            fake_feat_q, syn_label_q)
                        real_validity_q, disc_real_acgan_q = discriminator(
                            real_feat_q, syn_label_q)

                        # Adversarial loss query
                        d_loss_rf_q = -torch.mean(
                            real_validity_q) + torch.mean(fake_validity_q)
                        d_gradient_penalty_q = compute_gradient_penalty(
                            discriminator, real_feat_q, fake_feat_q,
                            syn_label_q).mean()
                        d_loss_lbls_q = criterion_softmax(
                            disc_real_acgan_q, labels_q.to(device))
                        d_loss_q = d_loss_rf_q + lambda_gp * d_gradient_penalty_q + d_loss_lbls_q
                        d_losses_q[
                            k] = d_losses_q[k] + d_loss_q  # + d_loss_lbls_q

                        g_loss_rf_q = -torch.mean(fake_validity_q)
                        g_loss_lbls_q = criterion_softmax(
                            disc_fake_acgan_q, labels_q.to(device))
                        g_loss_q = g_loss_rf_q + g_loss_lbls_q  # + lambda_lwf*old_task_factor * loss_aug_q
                        g_losses_q[k] = g_losses_q[k] + g_loss_lbls_q

                #with torch.autograd.detect_anomaly():
                optimizer_D.zero_grad()
                optimizer_G.zero_grad()
                optimizer_lr.zero_grad()
                d_loss_q_total = d_losses_q[-1].clone() / args.BatchSize
                g_loss_q_total = g_losses_q[-1].clone() / args.BatchSize
                d_loss_q_total.backward()
                g_loss_q_total.backward()
                torch.nn.utils.clip_grad_norm_(discriminator.parameters(), 5)
                torch.nn.utils.clip_grad_norm_(generator.parameters(), 5)
                optimizer_D.step()
                optimizer_G.step()
                optimizer_lr.step()
                scheduler_G.step()
                scheduler_G.step()

                loss_log['D/loss'] += d_loss.item()
                loss_log['D/new_rf'] += d_loss_rf.item()
                loss_log['D/new_lbls'] += d_loss_lbls.item()  #!!!
                loss_log['D/new_gp'] += d_gradient_penalty.item(
                ) if lambda_gp != 0 else 0
                loss_log['D/loss_q'] += d_loss_q_total.item()
                #loss_log['D/new_rf_q'] += d_loss_rf_q.item()
                #loss_log['D/new_lbls_q'] += d_loss_lbls_q.item() #!!!
                #loss_log['D/new_gp_q'] += d_gradient_penalty_q.item() if lambda_gp != 0 else 0
                del d_loss_rf, d_loss_lbls

                loss_log['G/loss'] += g_loss.item()
                loss_log['G/new_rf'] += g_loss_rf.item()
                loss_log['G/new_lbls'] += g_loss_lbls.item()  #!
                loss_log['G/loss_q'] += g_loss_q_total.item()
                #loss_log['G/new_rf_q'] += g_loss_rf_q.item()
                #loss_log['G/new_lbls_q'] += g_loss_lbls_q.item() #!!!
                #loss_log['G/new_classifier'] += 0 #!
                loss_log['G/prev_mse'] += loss_aug.item(
                ) if lambda_lwf != 0 else 0

                del g_loss_rf, g_loss_lbls

            print(
                '[GAN Epoch %05d]\t D Total Loss: %.3f \t G Total Loss: %.3f \t LwF Loss: %.3f'
                % (epoch + 1, loss_log['D/loss'], loss_log['G/loss'],
                   loss_log['G/prev_rf']))
            print(
                '[GAN Epoch %05d]\t D Total Loss Query: %.3f \t G Total Loss Query: %.3f \t'
                % (epoch + 1, loss_log['D/loss_q'], loss_log['G/loss_q']))
            for k, v in loss_log.items():
                if v != 0:
                    tb_writer.add_scalar(
                        'Task {} - GAN/{}'.format(current_task, k), v,
                        epoch + 1)

            if epoch == args.epochs_gan - 1:
                torch.save(
                    generator,
                    os.path.join(
                        log_dir, 'task_' + str(current_task).zfill(2) +
                        '_%d_model_generator.pkl' % epoch))
                torch.save(
                    discriminator,
                    os.path.join(
                        log_dir, 'task_' + str(current_task).zfill(2) +
                        '_%d_model_discriminator.pkl' % epoch))
    tb_writer.close()

    prototype = compute_prototype(model,
                                  train_loader,
                                  batch_size=args.BatchSize)  #!
    return prototype
예제 #24
0
 def __init__(self):
     self.logger = logging.Logger("metamapper.graphql")
예제 #25
0
scheduler = StepLR(optimizer, step_size=20, gamma=10)

# optimizer.step()
# scheduler.step()
global_step = 0
np.random.seed(2001)
num_classes = 100
nb_cl_fg = 60
random_perm = list(range(num_classes))
traindir = os.path.join('home/abhilash/trial/', 'miniimagenet')
trainfolder = miniimagenet('miniimagenet',
                           mode='train',
                           resize=84,
                           cls_index=random_perm)  #[:nb_cl_fg]
log_dir = 'checkpoints'
sys.stdout = logging.Logger(os.path.join(log_dir, 'pre_train.txt'))
tb_writer = SummaryWriter(log_dir)
batchsize = 16
train_loader = torch.utils.data.DataLoader(trainfolder,
                                           batch_size=batchsize,
                                           shuffle=True,
                                           drop_last=True,
                                           num_workers=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
epochs = 100
print("Training Base Classes")
print("Total No. of classes: ", num_classes)
print("Number of Base Classes: ", nb_cl_fg)
print("Batch Size: ", batchsize)
print("No. of Epochs: ", epochs)
예제 #26
0
def main(args):

    #  训练日志保存
    log_dir = os.path.join(args.checkpoints, args.log_dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))
    display(args)

    if args.r is None:
        model = models.create(args.net)
        # load part of the model
        model_dict = model.state_dict()
        # print(model_dict)

        model = models.create(args.net, pretrained=True)
        model.features = torch.nn.Sequential(
            model.features,
            torch.nn.MaxPool2d(7),
            # torch.nn.BatchNorm2d(512),
            torch.nn.Dropout(p=0.01))
        model.classifier = torch.nn.Sequential(torch.nn.Linear(512, args.dim))

        # # orth init
        if args.init == 'orth':
            w = model_dict['classifier.0.weight']
            model_dict['classifier.0.weight'] = torch.nn.init.orthogonal_(w)
        else:
            print('initialize the FC layer kaiming-ly')
            w = model_dict['classifier.0.weight']
            model_dict['classifier.0.weight'] = torch.nn.init.kaiming_normal_(
                w)

        # zero bias
        model_dict['classifier.0.bias'] = torch.zeros(args.dim)
    else:
        # resume model
        print('Resume from model at Epoch %d' % args.start)
        model = torch.load(args.r)

    model = model.cuda()

    torch.save(model, os.path.join(log_dir, 'model.pkl'))
    print('initial model is save at %s' % log_dir)

    # fine tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.classifier.parameters()))

    new_params = [p for p in model.parameters() if id(p) in new_param_ids]

    base_params = [p for p in model.parameters() if id(p) not in new_param_ids]
    param_groups = [{
        'params': base_params,
        'lr_mult': 0.1
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    if args.loss == 'center-nca':
        criterion = losses.create(args.loss, alpha=args.alpha).cuda()
    elif args.loss == 'cluster-nca':
        criterion = losses.create(args.loss, alpha=args.alpha,
                                  beta=args.beta).cuda()
    elif args.loss == 'neighbour':
        criterion = losses.create(args.loss, k=args.k,
                                  margin=args.margin).cuda()
    elif args.loss == 'nca':
        criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda()
    elif args.loss == 'triplet':
        criterion = losses.create(args.loss, alpha=args.alpha).cuda()
    elif args.loss == 'bin':
        criterion = losses.create(args.loss, margin=args.margin)
    else:
        criterion = losses.create(args.loss).cuda()

    if args.data == 'shop':
        data = DataSet.create(args.data, root=None, gallery=False, query=False)
    else:
        data = DataSet.create(args.data, root=None, test=False)

    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.BatchSize,
        sampler=RandomIdentitySampler(data.train,
                                      num_instances=args.num_instances),
        drop_last=True,
        num_workers=args.nThreads)

    # save the train information
    epoch_list = list()
    loss_list = list()
    pos_list = list()
    neg_list = list()

    for epoch in range(args.start, args.epochs):
        epoch_list.append(epoch)

        running_loss = 0.0
        running_pos = 0.0
        running_neg = 0.0

        if epoch == 1500:
            optimizer = torch.optim.Adam(param_groups,
                                         lr=0.1 * args.lr,
                                         weight_decay=args.weight_decay)

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            # wrap them in Variable
            inputs = Variable(inputs.cuda())

            # type of labels is Variable cuda.Longtensor
            labels = Variable(labels).cuda()

            optimizer.zero_grad()

            embed_feat = model(inputs)

            loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_neg += dist_an
            running_pos += dist_ap

            if epoch == 0 and i == 0:
                print(50 * '#')
                print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --')

        loss_list.append(running_loss)
        pos_list.append(running_pos / i)
        neg_list.append(running_neg / i)

        print(
            '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
            % (epoch + 1, running_loss, inter_, dist_ap, dist_an))

        if epoch % args.save_step == 0:
            torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
    np.savez(os.path.join(log_dir, "result.npz"),
             epoch=epoch_list,
             loss=loss_list,
             pos=pos_list,
             neg=neg_list)