示例#1
0
	def __init__(self, d_latent, device='cuda', log_dir=''):
		super().__init__()
		self.d_latent = d_latent
		self.device = device
			
		n_blocks = [1, 1, 1, 1]
		mult = 8
		n_output_planes = [16 * mult, 32 * mult, 64 * mult, 128 * mult]
		self.n_in_planes = n_output_planes[0]
		
		self.layer0 = nn.Sequential(
		  nn_ops.conv3x3(3, self.n_in_planes, 1),
		  nn.BatchNorm2d(self.n_in_planes),
		  nn.ReLU(inplace=True)
		)
		self.layer1 = self._make_layer(BasicBlock, n_blocks[0], n_output_planes[0], 2)
		self.layer2 = self._make_layer(BasicBlock, n_blocks[1], n_output_planes[1], 2)
		self.layer3 = self._make_layer(BasicBlock, n_blocks[2], n_output_planes[2], 2)
		self.layer4 = self._make_layer(BasicBlock, n_blocks[3], n_output_planes[3], 2)
		self.latent_mapping = nn.Sequential(
			nn.Linear(n_output_planes[3] * BasicBlock.expansion, d_latent, True),
			nn.BatchNorm1d(d_latent),
			nn.Tanh()
		)
		
		self.apply(nn_ops.variable_init)
		self.to(device)
		utils.model_info(self, 'celebA_encoder', log_dir)
示例#2
0
def load_checkpoint(filepath, device):

    if device == 'cpu':
        checkpoint = torch.load(filepath,
                                map_location=lambda storage, loc: storage)
    else:
        checkpoint = torch.load(filepath)

    model = models.__dict__[checkpoint['pretrained']](pretrained=True)

    _, last_layer_name = model_info(model)

    if (last_layer_name == 'classifier'):
        classifier = checkpoint['classifier']
        model.classifier = classifier

    elif (last_layer_name == 'fc'):
        fc = checkpoint['classifier']
        model.fc = fc

    model.load_state_dict(checkpoint['state_dict'])
    model.class_to_idx = checkpoint['dict']
    model.optimizer = checkpoint['optimizer']

    return model
示例#3
0
    def __init__(self, d_latent, device='cuda', log_dir=''):
        super().__init__()

        self.d_latent = d_latent
        self.device = device

        self.mult = 8
        self.latent_mapping = nn.Sequential(
            nn.Linear(self.d_latent, 4 * 4 * 128 * self.mult),
            nn.BatchNorm1d(4 * 4 * 128 * self.mult), nn.ReLU())
        self.block1 = DecoderBlock(128 * self.mult, 64 * self.mult)
        self.block2 = DecoderBlock(64 * self.mult, 32 * self.mult)
        self.block3 = DecoderBlock(32 * self.mult, 16 * self.mult)
        self.block4 = DecoderBlock(16 * self.mult, 8 * self.mult)
        self.block5 = DecoderBlock(8 * self.mult, 4 * self.mult)
        self.block6 = DecoderBlock(4 * self.mult, 2 * self.mult)
        self.output_conv = nn_ops.conv3x3(2 * self.mult, 3, 1, True)
        self.final_act = nn.Sigmoid()

        self.apply(nn_ops.variable_init)
        self.to(device)
        utils.model_info(self, 'celebA_decoder', log_dir)
def main():
    from utils import (init_torch_seeds, model_info, profile, profile_training)

    init_torch_seeds(seed=1234)

    # analyze backbone characterstics of different models
    model_builders = [
        models.resnet18,
        models.resnet50,
        models.vgg16,
        models.shufflenet_v2_x2_0,
        models.mobilenet_v2,
        Yolov5,
        ghostnet,
    ][-2:]

    for model_builder in model_builders:
        print(f'{10*"-"} {model_builder.__name__} {10*"-"}')
        model = get_backbone(model_builder, pretrained=False)
        model_info(model, verbose=False, img_size=512)
        profile(model, verbose=True, amp=True)
        profile_training(model, amp=True)
    '''
示例#5
0
 def info(self, verbose=False, img_size=512):  # print model information
     model_info(self, verbose, img_size)
示例#6
0
    def __init__(self, mode):
        # Define Saver
        self.saver = Saver(opt, mode)
        self.logger = self.saver.logger

        # Visualize
        self.summary = TensorboardSummary(self.saver.experiment_dir)
        self.writer = self.summary.create_summary()

        # Dataset dataloader
        self.train_dataset, self.train_loader = make_data_loader(opt)
        self.nbatch_train = len(self.train_loader)
        self.val_dataset, self.val_loader = make_data_loader(opt, mode="val")
        self.nbatch_val = len(self.val_loader)

        # Model
        if opt.sync_bn is None and len(opt.gpu_id) > 1:
            opt.sync_bn = True
        else:
            opt.sync_bn = False
        # model = DeepLab(opt)
        # model = CSRNet()
        model = CRGNet(opt)
        model_info(model, self.logger)
        self.model = model.to(opt.device)

        # Loss
        if opt.use_balanced_weights:
            classes_weights_file = osp.join(opt.root_dir, 'train_classes_weights.npy')
            if os.path.isfile(classes_weights_file):
                weight = np.load(classes_weights_file)
            else:
                weight = calculate_weigths_labels(
                    self.train_loader, opt.root_dir)
            print(weight)
            opt.loss['weight'] = weight
        self.loss = build_loss(opt.loss)

        # Define Evaluator
        self.evaluator = Evaluator()  # use region to eval: class_num is 2

        # Resuming Checkpoint
        self.best_pred = 0.0
        self.start_epoch = 0
        if opt.resume:
            if os.path.isfile(opt.pre):
                print("=> loading checkpoint '{}'".format(opt.pre))
                checkpoint = torch.load(opt.pre)
                self.start_epoch = checkpoint['epoch']
                self.best_pred = checkpoint['best_pred']
                self.model.load_state_dict(checkpoint['state_dict'])
                print("=> loaded checkpoint '{}' (epoch {})"
                      .format(opt.pre, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(opt.pre))

        if len(opt.gpu_id) > 1:
            print("Using multiple gpu")
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=opt.gpu_id)

        # Define Optimizer
        # train_params = [{'params': model.get_1x_lr_params(), 'lr': opt.lr},
        #                 {'params': model.get_10x_lr_params(), 'lr': opt.lr * 10}]
        # self.optimizer = torch.optim.SGD(train_params,
        #                                  momentum=opt.momentum,
        #                                  weight_decay=opt.decay)
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=opt.lr,
                                         momentum=opt.momentum,
                                         weight_decay=opt.decay)

        # Define lr scheduler
        # self.scheduler = LR_Scheduler(mode=opt.lr_scheduler,
        #                               base_lr=opt.lr,
        #                               num_epochs=opt.epochs,
        #                               iters_per_epoch=self.nbatch_train,
        #                               lr_step=140)
        self.scheduler = optim.lr_scheduler.MultiStepLR(
            self.optimizer,
            milestones=[round(opt.epochs * x) for x in opt.steps],
            gamma=opt.gamma)

        # Time
        self.loss_hist = collections.deque(maxlen=500)
        self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val)
        self.step_time = collections.deque(maxlen=opt.print_freq)
示例#7
0
# args
path = results.path[0]
learning_rate, weight_decay, momentum = results.learning_rate, results.weight_decay, results.momentum
gpu, save_dir, hidden_units = results.gpu, results.save_dir, results.hidden_units
arch, epochs, dropout = results.arch, results.epochs, results.dropout

# load data from path
data_transforms, image_datasets, dataloaders, cat_to_name = utils.load_data(
    path)

# load pretrained model
model = models.__dict__[arch](pretrained=True)

# get pretrained model in_features number for the last layer
in_features, last_layer_name = utils.model_info(model)

# freeze pretrained model parameters
if hasattr(model, 'features'):
    for param in model.features.parameters():
        param.requires_grad = False
else:  # resnet
    for param in model.parameters():
        param.requires_grad = False

# create network with custom classifier
model = utils.create_network(model, in_features, last_layer_name, hidden_units,
                             dropout)
print(model)

# set loss
        model_dict = self.state_dict()
        if by_name:
            pretrianed_dict_update = {}
            for k, v in pretrianed_dict.items():
                if k in model_dict:
                    vv = model_dict[k]
                    if v.size() == vv.size():
                        pretrianed_dict_update[k] = v
            model_dict.update(pretrianed_dict_update)
        else:
            model_dict.update(pretrianed_dict)
        self.load_state_dict(model_dict)


if __name__ == '__main__':
    # import os
    # os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    #
    anchors = get_anchors(
        '/data1/chenww/my_research/Two-Stage-Defect-Detection/detector/config/small_8cls/anchors.txt'
    ).to('cuda')
    model = ResNet(anchors).to('cuda')
    model_info(model, verbose=True)
    # print(model)

    # input = torch.randn(1, 3, 224, 224)
    # map, outputs = model(input)
    # print([o.size() for o in map])
    # print()
    # torch.save(model.state_dict(), 'model.pth')
示例#9
0
for trial, seed in enumerate(seeds):
    logger.info('trial {} / {} ... '.format(trial + 1, n_trial))

    random.seed(seed)
    np.random.seed(seed)
    th.manual_seed(seed)

    exp_dir = join(args.exp_root, 'regg-model_{:03d}'.format(trial))
    utils.prepare_directory(exp_dir)
    cm_zsl_path = join(exp_dir, 'cm_zsl')
    cm_gzslu_path = join(exp_dir, 'cm_gzslu')
    cm_gzsls_path = join(exp_dir, 'cm_gzsls')

    logger.info('Initializing a regressor model ...')
    regg = classifiers.Regressor(args, dset.d_ft, dset.d_attr)
    utils.model_info(regg.net, 'regg', exp_dir)
    for epoch in range(args.n_epoch):

        train_loss, train_acc = regg.train_epoch(train_iter, _Sall)
        train_logs[trial, epoch, :] = train_loss, train_acc

        acc_zsl, _ = regg.test(unseen_test_iter, _Sall, _Cu, cm_zsl_path)
        acc_gzslu, _ = regg.test(unseen_test_iter,
                                 _Sall,
                                 confmat_path=cm_gzslu_path)
        acc_gzsls, _ = regg.test(seen_test_iter,
                                 _Sall,
                                 confmat_path=cm_gzsls_path)
        acc_gzslh = 2. * acc_gzslu * acc_gzsls / (acc_gzslu + acc_gzsls)
        accs[trial, epoch, :] = acc_zsl, acc_gzslu, acc_gzsls, acc_gzslh
示例#10
0
    train_iter = data.Iterator(
        [Xtr, Ytr],
        args.batch_size,
        shuffle=True,
        sampling_weights=sampling_weights,
        continuous=False)

    logger.info ('Initializing {} model ...'.format(args.clf_type))
    clf = None
    if args.clf_type == 'bilinear-comp':
        clf = classifiers.BilinearCompatibility(dset.d_ft, dset.d_attr, args)
    elif args.clf_type == 'mlp':
        clf = classifiers.MLP(dset.d_ft, dset.n_Call, args)
    elif args.clf_type == 'multilayer-comp':
        clf = classifiers.MultiLayerCompatibility(dset.d_ft, dset.d_attr, args)
    utils.model_info(clf.net, 'clf', exp_dir)
    for epoch in range(args.n_epoch):

        if args.clf_type == 'bilinear-comp' or args.clf_type == 'multilayer-comp':
            clf.train_epoch(train_iter, Str)
            acc_zsl, _ = clf.test(unseen_test_iter, _Sall, _Cu, cm_zsl_path)
            acc_gzslu, _ = clf.test(unseen_test_iter, _Sall, confmat_path=cm_gzslu_path)
            acc_gzsls, _ = clf.test(seen_test_iter, _Sall, confmat_path=cm_gzsls_path)
            acc_gzslh = 2. * acc_gzslu * acc_gzsls / (acc_gzslu + acc_gzsls)
            accs[trial, epoch, :] = acc_zsl, acc_gzslu, acc_gzsls, acc_gzslh
        else:
            clf.train_epoch(train_iter)
            acc_zsl, _ = clf.test(unseen_test_iter, _Cu, cm_zsl_path)
            acc_gzslu, _ = clf.test(unseen_test_iter, confmat_path=cm_gzslu_path)
            acc_gzsls, _ = clf.test(seen_test_iter, confmat_path=cm_gzsls_path)
            acc_gzslh = 2. * acc_gzslu * acc_gzsls / (acc_gzslu + acc_gzsls)
示例#11
0
def main():

    utils.prepare_directory(args.exp_dir, force_delete=False)
    utils.init_logger(join(args.exp_dir, 'program.log'))
    utils.write_args(args)

    # **************************************** load dataset ****************************************
    dset = data.XianDataset(args.data_dir,
                            args.mode,
                            feature_norm=args.feature_norm)
    _X_s_tr = FN(dset.X_s_tr).to(args.device)
    _Y_s_tr_ix = FN(dil(dset.Y_s_tr,
                        dset.Cs)).to(args.device)  # indexed labels
    _Ss = FN(dset.Sall[dset.Cs]).to(args.device)
    _Su = FN(dset.Sall[dset.Cu]).to(args.device)
    if args.d_noise == 0: args.d_noise = dset.d_attr

    # **************************************** create data loaders ****************************************
    _sampling_weights = None
    if args.dataset != 'SUN':
        _sampling_weights = data.compute_sampling_weights(
            dil(dset.Y_s_tr, dset.Cs)).to(args.device)
    xy_iter = data.Iterator([_X_s_tr, _Y_s_tr_ix],
                            args.batch_size,
                            sampling_weights=_sampling_weights)
    label_iter = data.Iterator([torch.arange(dset.n_Cs, device=args.device)],
                               args.batch_size)
    class_iter = data.Iterator([torch.arange(dset.n_Cs)], 1)

    # **************************************** per-class means and stds ****************************************
    # per class samplers and first 2 class moments
    per_class_iters = []
    Xs_tr_mean, Xs_tr_std = [], []
    Xs_te_mean, Xs_te_std = [], []
    Xu_te_mean, Xu_te_std = [], []
    for c_ix, c in enumerate(dset.Cs):
        # training samples of seen classes
        _inds = np.where(dset.Y_s_tr == c)[0]
        assert _inds.shape[0] > 0
        _X = dset.X_s_tr[_inds]
        Xs_tr_mean.append(_X.mean(axis=0, keepdims=True))
        Xs_tr_std.append(_X.std(axis=0, keepdims=True))

        if args.n_gm_iter > 0:
            _y = np.ones([_inds.shape[0]], np.int64) * c_ix
            per_class_iters.append(
                data.Iterator([FN(_X).to(args.device),
                               FN(_y).to(args.device)],
                              args.per_class_batch_size))

        # test samples of seen classes
        _inds = np.where(dset.Y_s_te == c)[0]
        assert _inds.shape[0] > 0
        _X = dset.X_s_te[_inds]
        Xs_te_mean.append(_X.mean(axis=0, keepdims=True))
        Xs_te_std.append(_X.std(axis=0, keepdims=True))

    # test samples of unseen classes
    for c_ix, c in enumerate(dset.Cu):
        _inds = np.where(dset.Y_u_te == c)[0]
        assert _inds.shape[0] > 0
        _X = dset.X_u_te[_inds]
        Xu_te_mean.append(_X.mean(axis=0, keepdims=True))
        Xu_te_std.append(_X.std(axis=0, keepdims=True))
    del _X, _inds, c_ix, c

    Xs_tr_mean = FN(np.concatenate(Xs_tr_mean, axis=0)).to(args.device)
    Xs_tr_std = FN(np.concatenate(Xs_tr_std, axis=0)).to(args.device)
    Xs_te_mean = FN(np.concatenate(Xs_te_mean, axis=0)).to(args.device)
    Xs_te_std = FN(np.concatenate(Xs_te_std, axis=0)).to(args.device)
    Xu_te_mean = FN(np.concatenate(Xu_te_mean, axis=0)).to(args.device)
    Xu_te_std = FN(np.concatenate(Xu_te_std, axis=0)).to(args.device)

    # **************************************** create networks ****************************************
    g_net = modules.get_generator(args.gen_type)(
        dset.d_attr, args.d_noise, args.n_g_hlayer, args.n_g_hunit,
        args.normalize_noise, args.dp_g, args.leakiness_g).to(args.device)
    g_optim = optim.Adam(g_net.parameters(),
                         args.gan_optim_lr_g,
                         betas=(args.gan_optim_beta1, args.gan_optim_beta2),
                         weight_decay=args.gan_optim_wd)

    d_net = modules.ConditionalDiscriminator(dset.d_attr, args.n_d_hlayer,
                                             args.n_d_hunit,
                                             args.d_normalize_ft, args.dp_d,
                                             args.leakiness_d).to(args.device)
    d_optim = optim.Adam(d_net.parameters(),
                         args.gan_optim_lr_d,
                         betas=(args.gan_optim_beta1, args.gan_optim_beta2),
                         weight_decay=args.gan_optim_wd)
    start_it = 1

    utils.model_info(g_net, 'g_net', args.exp_dir)
    utils.model_info(d_net, 'd_net', args.exp_dir)

    if args.n_gm_iter > 0:
        if args.clf_type == 'bilinear-comp':
            clf = classifiers.BilinearCompatibility(dset.d_ft, dset.d_attr,
                                                    args)
        elif args.clf_type == 'mlp':
            clf = classifiers.MLP(dset.d_ft, dset.n_Cs, args)
        utils.model_info(clf.net, 'clf', args.exp_dir)

    pret_clf = None
    if os.path.isfile(args.pretrained_clf_ckpt):
        logger.info('Loading pre-trained {} checkpoint at {} ...'.format(
            args.clf_type, args.pretrained_clf_ckpt))
        ckpt = torch.load(args.pretrained_clf_ckpt, map_location=args.device)
        pret_clf = classifiers.BilinearCompatibility(dset.d_ft, dset.d_attr,
                                                     args)
        pret_clf.net.load_state_dict(ckpt[args.clf_type])
        pret_clf.net.eval()
        for p in pret_clf.net.parameters():
            p.requires_grad = False

    pret_regg = None
    if os.path.isfile(args.pretrained_regg_ckpt):
        logger.info(
            'Loading pre-trained regressor checkpoint at {} ...'.format(
                args.pretrained_regg_ckpt))
        ckpt = torch.load(args.pretrained_regg_ckpt, map_location=args.device)
        pret_regg = classifiers.Regressor(args, dset.d_ft, dset.d_attr)
        pret_regg.net.load_state_dict(ckpt['regressor'])
        pret_regg.net.eval()
        for p in pret_regg.net.parameters():
            p.requires_grad = False

    training_log_titles = [
        'd/loss',
        'd/real',
        'd/fake',
        'd/penalty',
        'gm/loss',
        'gm/real_loss',
        'gm/fake_loss',
        'g/fcls_loss',
        'g/cycle_loss',
        'clf/train_loss',
        'clf/train_acc',
        'mmad/X_s_tr',
        'mmad/X_s_te',
        'mmad/X_u_te',
        'smad/X_s_tr',
        'smad/X_s_te',
        'smad/X_u_te',
    ]
    if args.n_gm_iter > 0:
        training_log_titles.extend([
            'grad-cossim/{}'.format(n) for n, p in clf.net.named_parameters()
        ])
        training_log_titles.extend(
            ['grad-mse/{}'.format(n) for n, p in clf.net.named_parameters()])
    training_logger = utils.Logger(os.path.join(args.exp_dir, 'training-logs'),
                                   'logs', training_log_titles)

    t0 = time.time()

    logger.info('penguenler olmesin')
    for it in range(start_it, args.n_iter + 1):

        # **************************************** Discriminator updates ****************************************
        for p in d_net.parameters():
            p.requires_grad = True
        for p in g_net.parameters():
            p.requires_grad = False
        for _ in range(args.n_d_iter):
            x_real, y_ix = next(xy_iter)
            s = _Ss[y_ix]
            x_fake = g_net(s)

            d_real = d_net(x_real, s).mean()
            d_fake = d_net(x_fake, s).mean()
            d_penalty = modules.gradient_penalty(d_net, x_real, x_fake, s)
            d_loss = d_fake - d_real + args.L * d_penalty

            d_optim.zero_grad()
            d_loss.backward()
            d_optim.step()

            training_logger.update_meters(
                ['d/real', 'd/fake', 'd/loss', 'd/penalty'], [
                    d_real.mean().item(),
                    d_fake.mean().item(),
                    d_loss.item(),
                    d_penalty.item()
                ], x_real.size(0))

        # **************************************** Generator updates ****************************************
        for p in d_net.parameters():
            p.requires_grad = False
        for p in g_net.parameters():
            p.requires_grad = True
        g_optim.zero_grad()

        [y_fake] = next(label_iter)
        s = _Ss[y_fake]
        x_fake = g_net(s)

        # wgan loss
        d_fake = d_net(x_fake, s).mean()
        g_wganloss = -d_fake

        # f-cls loss
        fcls_loss = 0.0
        if pret_clf is not None:
            fcls_loss = pret_clf.loss(x_fake, _Ss, y_fake)
            training_logger.update_meters(['g/fcls_loss'], [fcls_loss.item()],
                                          x_fake.size(0))

        # cycle-loss
        cycle_loss = 0.0
        if pret_regg is not None:
            cycle_loss = pret_regg.loss(x_fake, s)
            training_logger.update_meters(['g/cycle_loss'],
                                          [cycle_loss.item()], x_fake.size(0))

        g_loss = args.C * fcls_loss + args.R * cycle_loss + g_wganloss
        g_loss.backward()

        # gmn iterations
        for _ in range(args.n_gm_iter):
            c = next(class_iter)[0].item()
            x_real, y_real = next(per_class_iters[c])
            y_fake = y_real.detach().repeat(args.gm_fake_repeat)
            s = _Ss[y_fake]
            x_fake = g_net(s)

            # gm loss
            clf.net.zero_grad()
            if args.clf_type == 'bilinear-comp':
                real_loss = clf.loss(x_real, _Ss, y_real)
                fake_loss = clf.loss(x_fake, _Ss, y_fake)
            elif args.clf_type == 'mlp':
                real_loss = clf.loss(x_real, y_real)
                fake_loss = clf.loss(x_fake, y_fake)

            grad_cossim = []
            grad_mse = []
            for n, p in clf.net.named_parameters():
                # if len(p.shape) == 1: continue

                real_grad = grad([real_loss], [p],
                                 create_graph=True,
                                 only_inputs=True)[0]
                fake_grad = grad([fake_loss], [p],
                                 create_graph=True,
                                 only_inputs=True)[0]

                if len(p.shape) > 1:
                    _cossim = F.cosine_similarity(fake_grad, real_grad,
                                                  dim=1).mean()
                else:
                    _cossim = F.cosine_similarity(fake_grad, real_grad, dim=0)

                # _cossim = F.cosine_similarity(fake_grad, real_grad, dim=1).mean()
                _mse = F.mse_loss(fake_grad, real_grad)
                grad_cossim.append(_cossim)
                grad_mse.append(_mse)

                training_logger.update_meters(
                    ['grad-cossim/{}'.format(n), 'grad-mse/{}'.format(n)],
                    [_cossim.item(), _mse.item()], x_real.size(0))

            grad_cossim = torch.stack(grad_cossim)
            grad_mse = torch.stack(grad_mse)
            gm_loss = (1.0 -
                       grad_cossim).sum() * args.Q + grad_mse.sum() * args.Z
            gm_loss.backward()

            training_logger.update_meters(
                ['gm/real_loss', 'gm/fake_loss'],
                [real_loss.item(), fake_loss.item()], x_real.size(0))

        g_optim.step()

        # **************************************** Classifier update ****************************************
        if args.n_gm_iter > 0:
            if it % args.clf_reset_iter == 0:
                if args.clf_reset_iter == 1:
                    # no need to generate optimizer each time
                    clf.init_params()
                else:
                    clf.reset()
            else:
                x, y_ix = next(xy_iter)
                if args.clf_type == 'bilinear-comp':
                    clf_acc, clf_loss = clf.train_step(x, _Ss, y_ix)
                else:
                    clf_acc, clf_loss = clf.train_step(x, y_ix)
                training_logger.update_meters(
                    ['clf/train_loss', 'clf/train_acc'], [clf_loss, clf_acc],
                    x.size(0))

        # **************************************** Log ****************************************
        if it % 1000 == 0:
            g_net.eval()

            # synthesize samples for seen classes and compute their first 2 moments
            Xs_fake_mean, Xs_fake_std = [], []
            with torch.no_grad():
                for c in range(dset.n_Cs):
                    y = torch.ones(256, device=args.device,
                                   dtype=torch.long) * c
                    a = _Ss[y]
                    x_fake = g_net(a)
                    Xs_fake_mean.append(x_fake.mean(dim=0, keepdim=True))
                    Xs_fake_std.append(x_fake.std(dim=0, keepdim=True))
            Xs_fake_mean = torch.cat(Xs_fake_mean)
            Xs_fake_std = torch.cat(Xs_fake_std)

            # synthesize samples for unseen classes and compute their first 2 moments
            def compute_firsttwo_moments(S, C):
                X_mean, X_std = [], []
                with torch.no_grad():
                    for c in range(dset.n_Cu):
                        y = torch.ones(
                            256, device=args.device, dtype=torch.long) * c
                        a = _Su[y]
                        x_fake = g_net(a)
                        X_mean.append(x_fake.mean(dim=0, keepdim=True))
                        X_std.append(x_fake.std(dim=0, keepdim=True))
                X_mean = torch.cat(X_mean)
                X_std = torch.cat(X_std)

            Xu_fake_mean, Xu_fake_std = [], []
            with torch.no_grad():
                for c in range(dset.n_Cu):
                    y = torch.ones(256, device=args.device,
                                   dtype=torch.long) * c
                    a = _Su[y]
                    x_fake = g_net(a)
                    Xu_fake_mean.append(x_fake.mean(dim=0, keepdim=True))
                    Xu_fake_std.append(x_fake.std(dim=0, keepdim=True))
            Xu_fake_mean = torch.cat(Xu_fake_mean)
            Xu_fake_std = torch.cat(Xu_fake_std)

            g_net.train()

            training_logger.update_meters([
                'mmad/X_s_tr', 'smad/X_s_tr', 'mmad/X_s_te', 'smad/X_s_te',
                'mmad/X_u_te', 'smad/X_u_te'
            ], [
                torch.abs(Xs_tr_mean - Xs_fake_mean).sum(dim=1).mean().item(),
                torch.abs(Xs_tr_std - Xs_fake_std).sum(dim=1).mean().item(),
                torch.abs(Xs_te_mean - Xs_fake_mean).sum(dim=1).mean().item(),
                torch.abs(Xs_te_std - Xs_fake_std).sum(dim=1).mean().item(),
                torch.abs(Xu_te_mean - Xu_fake_mean).sum(dim=1).mean().item(),
                torch.abs(Xu_te_std - Xu_fake_std).sum(dim=1).mean().item()
            ])

            training_logger.flush_meters(it)

            elapsed = time.time() - t0
            per_iter = elapsed / it
            apprx_rem = (args.n_iter - it) * per_iter
            logging.info('Iter:{:06d}/{:06d}, '\
                         '[ET:{:.1e}(min)], ' \
                         '[IT:{:.1f}(ms)], ' \
                         '[REM:{:.1e}(min)]'.format(
                            it, args.n_iter, elapsed / 60., per_iter * 1000., apprx_rem / 60))

        if it % 10000 == 0:
            utils.save_checkpoint(
                {
                    'g_net': g_net.state_dict(),
                    'd_net': d_net.state_dict(),
                    'g_optim': g_optim.state_dict(),
                    'd_optim': d_optim.state_dict(),
                    'iteration': it
                },
                args.exp_dir,
                None,
                it if it % (args.n_iter // args.n_ckpt) == 0 else None,
            )

    training_logger.close()
示例#12
0
def train(**kwargs):
    """train the crnn model"""
    opt.parse(kwargs)
    opt.print_args()

    train_test_split(path=opt.data_path,
                     img_format=opt.img_format,
                     label_format=opt.label_format,
                     generating_again=opt.generating_again,
                     split_rate=opt.split_rate)

    device = t.device('cuda') if opt.use_gpu else t.device('cpu')
    #Step 0 Decide the structure of the model#
    #Step 1 Load the data set#
    dataset, dataloader = \
        GetDataLoader(path = opt.data_path,
                      train = True,
                      img_format = opt.img_format,
                      label_format = opt.label_format,
                      img_height = opt.img_height,
                      img_width = opt.img_width,
                      img_channels = opt.img_channels,
                      batch_size = opt.batch_size)
    #Step 2 Reshape the inputs#
    #Step 3 Normalize the inputs#
    #Step 4 Initialize parameters#
    #Step 5 Forward propagation(Vectorization/Activation functions)#
    crnn_model = CRNN_def(in_c=opt.img_channels,
                          feature_size=512,
                          lstm_hidden=opt.lstm_hidden,
                          output_size=opt.output_size,
                          multilines=opt.multilines,
                          multisteps=opt.multisteps,
                          num_rows=opt.num_rows)
    crnn_model.to(device)
    distilled_crnn_model = Distilled_CRNN_def(in_c=opt.img_channels,
                                              feature_size=512,
                                              lstm_hidden=opt.lstm_hidden,
                                              output_size=opt.output_size,
                                              multilines=opt.multilines,
                                              multisteps=opt.multisteps,
                                              num_rows=opt.num_rows)
    distilled_crnn_model.to(device)

    print('CRNN model : ')
    for name, parameters in crnn_model.named_parameters():
        print('\t', name, '...', parameters.requires_grad)

    print('Distilled CRNN model : ')
    for name, parameters in distilled_crnn_model.named_parameters():
        print('\t', name, '...', parameters.requires_grad)

    #Step 6 Compute cost#
    ctc_loss = t.nn.CTCLoss().to(
        device)  #use CTC to derive the whole loss function
    #Step 7 Backward propagation(Vectorization/Activation functions gradients)#
    if opt.optimizer == 'sgd' or opt.optimizer == 'momentum' or opt.optimizer == 'nesterov':
        crnn_optimizer = t.optim.SGD(
            filter(lambda p: p.requires_grad, crnn_model.parameters()),
            lr=opt.init_lr,
            momentum=0.9 if opt.optimizer == 'momentum'
            or opt.optimizer == 'nesterov' else 0.,
            nesterov=True if opt.optimizer == 'nesterov' else False,
            weight_decay=opt.weight_decay)
        distilled_crnn_optimizer = t.optim.SGD(
            filter(lambda p: p.requires_grad,
                   distilled_crnn_model.parameters()),
            lr=opt.init_lr,
            momentum=0.9 if opt.optimizer == 'momentum'
            or opt.optimizer == 'nesterov' else 0.,
            nesterov=True if opt.optimizer == 'nesterov' else False,
            weight_decay=opt.weight_decay)
    elif opt.optimizer == 'adam' or opt.optimizer == 'amsgrad':
        crnn_optimizer = t.optim.Adam(
            filter(lambda p: p.requires_grad, crnn_model.parameters()),
            lr=opt.init_lr,
            amsgrad=True if opt.optimizer == 'amsgrad' else False,
            weight_decay=opt.weight_decay)
        distilled_crnn_optimizer = t.optim.Adam(
            filter(lambda p: p.requires_grad,
                   distilled_crnn_model.parameters()),
            lr=opt.init_lr,
            amsgrad=True if opt.optimizer == 'amsgrad' else False,
            weight_decay=opt.weight_decay)

    else:
        raise Exception('No other optimizers!')

    crnn_lr_schedule = t.optim.lr_scheduler.MultiStepLR(
        crnn_optimizer,
        milestones=opt.lr_decay_epochs,
        gamma=opt.lr_decay_rate)
    distilled_lr_schedule = t.optim.lr_scheduler.MultiStepLR(
        distilled_crnn_optimizer,
        milestones=opt.lr_decay_epochs,
        gamma=opt.lr_decay_rate)

    _ = model_info(crnn_model)
    _ = model_info(distilled_crnn_model)

    train_crnn_loss = []
    train_crnn_acc = []
    best_crnn_acc = 0.5  #must have better accuracy than random guess of 0.5
    train_distilled_crnn_loss = []
    train_distilled_crnn_acc = []
    best_distilled_crnn_acc = 0.5  #must have better accuracy than random guess of 0.5

    cd_loss = []
    lstm_loss = []
    h_loss = []
    c_loss = []
    softloss = []
    #Step 8 Update parameters#
    for epoch in tqdm.tqdm(range(opt.epochs)):
        print('Epoch : %d / %d.' % (epoch + 1, opt.epochs))
        print('Current epoch learning rate for CRNN: ',
              crnn_optimizer.param_groups[0]['lr'])
        if opt.distilled:
            print('Current epoch learning rate for Distilled_CRNN: ',
                  distilled_crnn_optimizer.param_groups[0]['lr'])
        epoch_crnn_acc = 0.
        epoch_distilled_crnn_acc = 0.
        count = 0
        for i, (batch_x, index, path) in enumerate(dataloader):
            batch_x = batch_x.to(device)
            index = index.to(device)
            batch_x = batch_x.view(batch_x.size(0), opt.img_channels,
                                   opt.img_height, opt.img_width)

            crnn_optimizer.zero_grad()
            if not opt.multisteps:
                labels = get_batch_label(dataset, index)
                text, length = opt.converter.encode(labels)
                outputt, teachers, (hts, cts) = crnn_model(batch_x)
                #output has shape : [m, t, output_size]
                preds_size = [outputt.size(0)] * outputt.size(
                    1)  #batch_size * time_steps
                batch_crnn_cost = ctc_loss(
                    outputt,
                    text.to(t.long).to(device),
                    t.IntTensor(preds_size).to(t.long).to(device),
                    length.to(t.long).to(device))  #ctc loss
            else:
                outputts, teachers, (htss, ctss) = crnn_model(batch_x)
                preds_size = [outputts[0].size(0)] * outputts[0].size(
                    1)  #batch_size * time_steps
                batch_crnn_cost = 0.
                labels = get_batch_label(dataset,
                                         index,
                                         multisteps=opt.multisteps,
                                         num_rows=opt.num_rows)
                for step in range(len(outputts)):
                    outputt = outputts[step]
                    label = labels[step]
                    text, length = opt.converter.encode(label)
                    batch_crnn_cost += ctc_loss(
                        outputt,
                        text.to(t.long).to(device),
                        t.IntTensor(preds_size).to(t.long).to(device),
                        length.to(t.long).to(device))  #ctc loss

                batch_crnn_cost /= len(outputts)

            batch_crnn_cost.backward()
            crnn_optimizer.step()

            if opt.distilled:
                distilled_crnn_optimizer.zero_grad()
                if not opt.multisteps:
                    outputs, students, (hss,
                                        css) = distilled_crnn_model(batch_x)
                    #output has shape : [m, t, output_size]
                    preds_size = [outputs.size(0)] * outputs.size(
                        1)  #batch_size * time_steps
                else:
                    outputss, students, (hsss,
                                         csss) = distilled_crnn_model(batch_x)
                    preds_size = [outputss[0].size(0)] * outputss[0].size(
                        1)  #batch_size * time_steps

                #1. CTC loss
                if not opt.multisteps:
                    batch_distilled_crnn_cost = ctc_loss(
                        outputs,
                        text.to(t.long).to(device),
                        t.IntTensor(preds_size).to(t.long).to(device),
                        length.to(t.long).to(device))

                else:
                    batch_ctc_loss = 0.
                    for step in range(len(outputss)):
                        outputs = outputss[step]
                        label = labels[step]
                        text, length = opt.converter.encode(label)
                        batch_ctc_loss += ctc_loss(
                            outputs,
                            text.to(t.long).to(device),
                            t.IntTensor(preds_size).to(t.long).to(device),
                            length.to(t.long).to(device))
                    batch_distilled_crnn_cost = batch_ctc_loss / (
                        len(outputss) * 1.)

                #2. cd loss
                count_ = 0
                batch_cd_loss = 0.
                for teacher, student in zip(teachers, students):
                    batch_cd_loss += t.mean(t.pow(teacher - student,
                                                  2)).to(device)
                    count_ += 1
                batch_cd_loss /= count_

                batch_distilled_crnn_cost += opt.alpha * batch_cd_loss

                #3. lstm loss
                #3.1 H values
                count_ = 0
                cur_lossh = 0.
                if not opt.multisteps:
                    for ht, hs in zip(hts, hss):
                        cur_lossh += t.mean(t.pow(ht - hs, 2)).to(device)
                        count_ += 1
                else:
                    for hts, hss in zip(htss, hsss):
                        cur_loss = 0.
                        q = 0.
                        for ht, hs in zip(hts, hss):
                            cur_loss += t.mean(t.pow(ht - hs, 2)).to(device)
                            q += 1.

                        cur_lossh += cur_loss / q
                        count_ += 1
                cur_lossh /= count_
                #3.2 C values
                cur_lossc = 0.
                count_ = 0
                if not opt.multisteps:
                    for ct, cs in zip(cts, css):
                        cur_lossc += t.mean(t.pow(ct - cs, 2)).to(device)
                        count_ += 1
                else:
                    for cts, css in zip(ctss, csss):
                        cur_loss = 0.
                        q = 0.
                        for ct, cs in zip(cts, css):
                            cur_loss += t.mean(t.pow(ct - cs, 2)).to(device)
                            q += 1.

                        cur_lossc += cur_loss / q
                        count_ += 1
                cur_lossc /= count_
                batch_lstm_loss = (cur_lossc + cur_lossh) / 2.
                batch_distilled_crnn_cost += opt.beta * batch_lstm_loss

                #4. soft loss
                if not opt.multisteps:
                    batch_softloss = -t.mean(t.sum(F.softmax(outputt.detach() / opt.temperature, dim = 1) * \
                                                    t.log(F.softmax(outputs / opt.temperature, dim = 1) + 1e-10),
                                                    dim = 1)).to(device)
                else:
                    batch_softloss = 0.
                    for outputt, outputs in zip(outputts, outputss):
                        batch_softloss += -t.mean(t.sum(F.softmax(outputt.detach() / opt.temperature, dim = 1) * \
                                                        t.log(F.softmax(outputs / opt.temperature, dim = 1) + 1e-10),
                                                        dim = 1)).to(device)
                    batch_softloss /= len(outputts)

                batch_distilled_crnn_cost += opt.gamma * batch_softloss

                batch_distilled_crnn_cost.backward()
                distilled_crnn_optimizer.step()

            if i % opt.batch_size == 0:
                count += 1
                train_crnn_loss.append(batch_crnn_cost.item())
                crnn_model.eval()
                batch_crnn_acc, predictions = cal_batch_acc(crnn_model,
                                                            opt.converter,
                                                            batch_x,
                                                            labels,
                                                            level=opt.level)

                print('\nCRNN samples predictions: ')
                print('=' * 30)
                print('Labels : ', label)
                print('*' * 20)
                print('Predictions : ', predictions)
                print('=' * 30)
                crnn_model.train()
                train_crnn_acc.append(batch_crnn_acc)

                if opt.distilled:
                    train_distilled_crnn_loss.append(
                        batch_distilled_crnn_cost.item())
                    cd_loss.append(opt.alpha * batch_cd_loss.item())
                    lstm_loss.append(opt.beta * batch_lstm_loss.item())
                    h_loss.append(opt.beta * cur_lossh.item())
                    c_loss.append(opt.beta * cur_lossc.item())
                    softloss.append(opt.gamma * batch_softloss.item())
                    distilled_crnn_model.eval()
                    batch_distilled_crnn_acc, predictions = cal_batch_acc(
                        distilled_crnn_model,
                        opt.converter,
                        batch_x,
                        label,
                        level=opt.level)

                    print('=' * 50)
                    print('Distilled CRNN samples predictions : ')
                    print('=' * 30)
                    print('Labels : ', label)
                    print('*' * 20)
                    print('Predictions : ', predictions)
                    print('=' * 30)

                    distilled_crnn_model.train()
                    train_distilled_crnn_acc.append(batch_distilled_crnn_acc)

                print('\tCRNN : ')
                print('\tBatch %d has crnn cost : %.3f.|| Accuracy : ' %
                      (i + 1, batch_crnn_cost.item()),
                      end='')
                if isinstance(batch_crnn_acc, tuple):
                    print(
                        'Character-level acc : %.2f%%; Image-level acc : %.2f%%.'
                        % (batch_crnn_acc[0] * 100., batch_crnn_acc[1] * 100.))
                    combined_acc = (
                        2. * batch_crnn_acc[0] * batch_crnn_acc[1]) / (
                            batch_crnn_acc[0] + batch_crnn_acc[1] + 1e-7)  #f1
                    epoch_crnn_acc += combined_acc
                else:
                    if opt.level == 'char':
                        print('Character-level acc : %.2f%%.' %
                              (batch_crnn_acc * 100.))
                    elif opt.level == 'whole':
                        print('Image-level acc : %.2f%%.' %
                              (batch_crnn_acc * 100.))
                    else:
                        raise Exception('No other levels!')

                    epoch_crnn_acc += batch_crnn_acc

                if opt.distilled:
                    print('\tDistilled : ')
                    print(
                        '\tBatch %d has distilled crnn cost : %.3f.[softloss %3f & cd loss %.3f & lstm loss %.3f & h_loss %.3f & c_loss %.3f]. --> \n\t\tAccuracy : '
                        % (i + 1, batch_distilled_crnn_cost.item(),
                           opt.gamma * batch_softloss.item(),
                           opt.alpha * batch_cd_loss.item(), opt.beta *
                           batch_lstm_loss.item(), opt.beta * cur_lossh.item(),
                           opt.beta * cur_lossc.item()),
                        end='')
                    if isinstance(batch_distilled_crnn_acc, tuple):
                        print(
                            'Character-level acc : %.2f%%; Image-level acc : %.2f%%.'
                            % (batch_distilled_crnn_acc[0] * 100.,
                               batch_distilled_crnn_acc[1] * 100.))
                        combined_acc = (2. * batch_distilled_crnn_acc[0] *
                                        batch_distilled_crnn_acc[1]) / (
                                            batch_distilled_crnn_acc[0] +
                                            batch_distilled_crnn_acc[1] + 1e-7
                                        )  # f1
                        epoch_distilled_crnn_acc += combined_acc
                    else:
                        if opt.level == 'char':
                            print('Character-level acc : %.2f%%.' %
                                  (batch_distilled_crnn_acc * 100.))
                        elif opt.level == 'whole':
                            print('Image-level acc : %.2f%%.' %
                                  (batch_distilled_crnn_acc * 100.))
                        else:
                            raise Exception('No other levels!')

                        epoch_distilled_crnn_acc += batch_distilled_crnn_acc

        epoch_crnn_acc /= count
        epoch_distilled_crnn_acc /= count

        print('This epoch has crnn acc : {:.2f}%.'.format(epoch_crnn_acc *
                                                          100.))
        if opt.save_best_model:
            if epoch % opt.save_best_model_iter == 0:
                if epoch_crnn_acc > best_crnn_acc:
                    best_crnn_acc = epoch_crnn_acc
                    t.save(
                        crnn_model,
                        './checkpoints/save_best_train_crnn_model_epoch_%d_%s.pkl'
                        % (epoch + 1, opt.model_config))
                else:
                    print(
                        'This epoch has no improvement on training accuracy on crnn model, skipping saving the model!'
                    )

        if opt.distilled:
            print('This epoch has distilled crnn acc : {:.2f}%.'.format(
                epoch_distilled_crnn_acc * 100.))
            if opt.save_best_model:
                if epoch % opt.save_best_model_iter == 0:
                    if epoch_distilled_crnn_acc > best_distilled_crnn_acc:
                        best_distilled_crnn_acc = epoch_distilled_crnn_acc
                        t.save(
                            distilled_crnn_model,
                            './checkpoints/save_best_train_distilled_crnn_model_epoch_%d_%s.pkl'
                            % (epoch + 1, opt.model_config))
                    else:
                        print(
                            'This epoch has no improvement on training accuracy on distilled crnn model, skipping saving the model!'
                        )

        crnn_lr_schedule.step()
        distilled_lr_schedule.step()

    t.save(crnn_model,
           './checkpoints/final_crnn_model_%s.pkl' % opt.model_config)

    f, ax = plt.subplots(1, 2)
    f.suptitle('Useful statistics for CRNN')
    ax[0].plot(range(len(train_crnn_loss)),
               train_crnn_loss,
               label='CRNN training loss')
    ax[0].grid(True)
    ax[0].set_title('CRNN training loss')
    ax[0].legend(loc='best')

    if isinstance(train_crnn_acc[0], tuple):
        char_acc = [c_acc[0] for c_acc in train_crnn_acc]
        whole_acc = [c_acc[1] for c_acc in train_crnn_acc]
        ax[1].plot(range(len(char_acc)), char_acc, label='Character-level acc')
        ax[1].plot(range(len(whole_acc)), whole_acc, label='Image-level acc')

    else:
        if opt.level == 'char':
            ax[1].plot(range(len(train_crnn_acc)),
                       train_crnn_acc,
                       label='Character-level acc')
        elif opt.level == 'whole':
            ax[1].plot(range(len(train_crnn_acc)),
                       train_crnn_acc,
                       label='Image-level acc')
        else:
            raise Exception('No other levels!')

    ax[1].grid(True)
    ax[1].set_title('CRNN training acc')
    ax[1].legend(loc='best')

    plt.savefig('./results/training_crnn_statistics_%s.png' % opt.model_config)
    plt.close()

    if opt.distilled:
        t.save(
            distilled_crnn_model,
            './checkpoints/final_distilled_crnn_model_%s.pkl' %
            opt.model_config)

        f, ax = plt.subplots(1, 5)
        f.suptitle('Useful statistics for Distilled CRNN')
        ax[0].plot(range(len(train_distilled_crnn_loss)),
                   train_distilled_crnn_loss,
                   label='Distilled CRNN training loss')
        ax[0].grid(True)
        ax[0].set_title('Distilled CRNN training loss')
        ax[0].legend(loc='best')

        if isinstance(train_distilled_crnn_acc[0], tuple):
            char_acc = [c_acc[0] for c_acc in train_distilled_crnn_acc]
            whole_acc = [c_acc[1] for c_acc in train_distilled_crnn_acc]
            ax[1].plot(range(len(char_acc)),
                       char_acc,
                       label=' Character-level acc')
            ax[1].plot(range(len(whole_acc)),
                       whole_acc,
                       label='Image-level acc')

        else:
            if opt.level == 'char':
                ax[1].plot(range(len(train_distilled_crnn_acc)),
                           train_distilled_crnn_acc,
                           label='Character-level acc')
            elif opt.level == 'whole':
                ax[1].plot(range(len(train_distilled_crnn_acc)),
                           train_distilled_crnn_acc,
                           label='Image-level acc')
            else:
                raise Exception('No other levels!')

        ax[1].grid(True)
        ax[1].set_title('Distilled training acc')
        ax[1].legend(loc='best')

        ax[2].plot(range(len(cd_loss)),
                   cd_loss,
                   label='Distilled CRNN training cd loss')
        ax[2].grid(True)
        ax[2].set_title('Distilled CRNN training cd loss')
        ax[2].legend(loc='best')

        ax[3].plot(range(len(softloss)),
                   softloss,
                   label='Distilled CRNN training soft loss')
        ax[3].grid(True)
        ax[3].set_title('Distilled CRNN training soft loss')
        ax[3].legend(loc='best')

        ax[4].plot(range(len(lstm_loss)),
                   lstm_loss,
                   label='Distilled CRNN training lstm loss')
        ax[4].plot(range(len(h_loss)),
                   h_loss,
                   label='Distilled CRNN training lstm hidden loss')
        ax[4].plot(range(len(c_loss)),
                   c_loss,
                   label='Distilled CRNN training lstm cell loss')
        ax[4].grid(True)
        ax[4].set_title('Distilled CRNN training lstm loss')
        ax[4].legend(loc='best')

        plt.savefig('./results/training_distilled_crnn_statistics_%s.png' %
                    opt.model_config)
        plt.close()

    print('Training is done!\n')