예제 #1
0
def main():

    transformations = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    train_dataset = CifarDataset(TRAIN_CSV_PATH, TRAIN_IMG_PATH, transformations)
    train_loader = CifarDataloader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    test_dataset = CifarDataset(TEST_CSV_PATH, TEST_IMG_PATH, transformations)
    test_loader = CifarDataloader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

    model = resnet50(pretrained=True, num_classes=10)
    criterion = nn.CrossEntropyLoss()

    if USE_GPU:
        model = model.cuda()
        criterion = criterion.cuda()

    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # load_checkpoint(os.path.join('checkpoint', 'last_checkpoint.pth.tar'), model, optimizer)

    for epoch in range(EPOCHS):
        train(train_loader, model, criterion, optimizer, epoch+1, USE_GPU)
        test(test_loader, model, USE_GPU)
        save_checkpoint({
            'epoch': epoch+1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, os.path.join('checkpoint'))
예제 #2
0
    def log_epoch_info(self, epoch, train_res, eval_res, epoch_elapsed):
        self.tb_sw.add_scalars(
            "epoch",
            {
                "train_loss": train_res["task_loss"],
                "train_acc": train_res["top1_acc"],
                "eval_loss": eval_res["task_loss"],
                "eval_acc": eval_res["top1_acc"],
                "lr": self.lr,
                "elapsed_time": epoch_elapsed,
            },
            global_step=epoch,
        )
        self.t_log.append(
            [
                epoch,
                train_res["task_loss"],
                train_res["top1_acc"],
                eval_res["task_loss"],
                eval_res["top1_acc"],
                self.lr
            ]
        )
        self.logger.info(
            "FIN Epoch %(epoch)d/%(epochs)d LR: %(lr)f | "
            + "Train Loss: %(tloss).4f Acc: %(tacc).2f | "
            + "Eval Loss: %(eloss).4f Acc: %(eacc).2f | "
            + "Took %(dt).1fs (%(tdt).1fs)",
            {
                "epoch": epoch,
                "epochs": self.epochs,
                "lr": self.lr,
                "tloss": train_res["task_loss"],
                "tacc": train_res["top1_acc"],
                "eloss": eval_res["task_loss"],
                "eacc": eval_res["top1_acc"],
                "dt": epoch_elapsed,
                "tdt": time() - self.exp_start,
            },
        )

        is_best = eval_res["top1_acc"] > self.best_acc1
        self.best_acc1 = max(eval_res["top1_acc"], self.best_acc1)
        state_dict = self.model.state_dict()
        if self.gpu_ids and len(self.gpu_ids) > 1:
            # unwrap the torch.nn.DataParallel
            state_dict = list(self.model.children())[0].state_dict()
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "state_dict": state_dict,
                "acc": eval_res["top1_acc"],
                "best_acc1": self.best_acc1,
                "optim_state_dict": self.optimizer.state_dict(),
            },
            is_best,
            checkpoint_dir=self.config["chkpt_dir"],
        )
예제 #3
0
    def run(self):
        modules = list(self.model.modules())

        # Construct the model with smaller architecture
        if type(modules[0]) == VGG:
            binary_masks = [torch.Tensor([1, 1, 1])
                            ]  # VGG Input RGB channels are not masked
            make_layers_config, pack_model = MaskablePackingAgent.gen_vgg_make_layers(
                modules, binary_masks)
            self.logger.info("Packed Model make_layers list: %s",
                             make_layers_config)
            MaskablePackingAgent.transfer_vgg_parameters(
                self.model, pack_model, binary_masks)
            self.logger.info("Packed model: %s", pack_model)

            num_params = sum([p.numel() for p in pack_model.parameters()])
            num_lrn_p = sum([
                p.numel() for p in pack_model.parameters() if p.requires_grad
            ])
            self.logger.info(
                "Num Parameters: %(params)d (%(lrn_params)d requires gradient)",
                {
                    "params": num_params,
                    "lrn_params": num_lrn_p
                },
            )
            save_checkpoint(
                {
                    "make_layers": make_layers_config,
                    "state_dict": pack_model.state_dict(),
                    "params": num_params,
                    "lrn_params": num_lrn_p,
                },
                False,
                checkpoint_dir=self.config["chkpt_dir"],
                filename="vgg-pack-{:.2e}.pth.tar".format(num_params),
            )
        else:
            raise NotImplementedError("Cannot pack sparse module: %s",
                                      modules[0])
예제 #4
0
def Train(Model, args):

    Nd = args.Nd
    beta1_Adam = args.beta1
    beta2_Adam = args.beta2

    if args.cuda:
        Model.cuda()

    optimizer = optim.Adam(Model.parameters(),
                           lr=args.lr,
                           betas=(beta1_Adam, beta2_Adam))
    #optimizer = optim.SGD(Model.parameters(), lr=args.lr)
    Model.train()
    steps = 0
    CUDNN.benchmark = True

    for epoch in range(args.start_epoch, args.epochs + 1):

        if args.step_learning:
            adjust_learning_rate(optimizer, epoch, args)

        transformed_dataset = FaceIdPoseDataset(args.train_csv_file,
                                                transform=transforms.Compose([
                                                    transforms.Resize(256),
                                                    transforms.RandomCrop(224),
                                                    transforms.ToTensor()
                                                ]))
        dataloader = DataLoader(transformed_dataset,
                                batch_size=args.Train_Batch,
                                shuffle=True)

        for i, batch_data in enumerate(dataloader):
            Model.zero_grad()
            batch_image = torch.FloatTensor(batch_data[0].float())
            batch_id_label = batch_data[2]
            if args.cuda:
                batch_image, batch_id_label = batch_image.cuda(
                ), batch_id_label.cuda()
            batch_image, batch_id_label = Variable(batch_image), Variable(
                batch_id_label)

            steps += 1

            Prediction = Model(batch_image)
            Loss = Model.ID_Loss(Prediction, batch_id_label)

            Loss.backward()
            optimizer.step()
            log_learning(epoch, steps, 'VGG16_Model', args.lr, Loss.data, args)
            writer.add_scalar('Train/Train_Loss', Loss, steps)
            # Validation_Process(Model, epoch, writer, args)
        Validation_Process(Model, epoch, writer, args)

        if epoch % args.save_freq == 0:
            if not os.path.isdir(args.snapshot_dir):
                os.makedirs(args.snapshot_dir)
            save_path = os.path.join(args.snapshot_dir,
                                     'epoch{}.pt'.format(epoch))
            torch.save(Model.state_dict(), save_path)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'Model': Model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                save_dir=os.path.join(args.snapshot_dir,
                                      'epoch{}'.format(epoch)))

    # export scalar data to JSON for external processing
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
예제 #5
0
def train_single_fnm(D_model, G_model, C_model, args):

    writer = SummaryWriter()

    D_lr = args.lr
    G_lr = args.lr
    beta1_Adam = args.beta1
    beta2_Adam = args.beta2

    if args.cuda:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if args.cuda:
        D_model.to(device)
        G_model.to(device)
        C_model.to(device)

    optimizer_D = optim.Adam(D_model.parameters(),
                             lr=D_lr,
                             betas=(beta1_Adam, beta2_Adam),
                             weight_decay=args.lambda_reg)
    optimizer_G = optim.Adam(G_model.parameters(),
                             lr=G_lr,
                             betas=(beta1_Adam, beta2_Adam),
                             weight_decay=args.lambda_reg)

    if args.resume:
        checkpoint = torch.load(args.resume)
        optimizer_D.load_state_dict(checkpoint['optimizer_D'])
        optimizer_G.load_state_dict(checkpoint['optimizer_G'])

    steps = 0
    CUDNN.benchmark = True

    for epoch in range(args.start_epoch, args.epochs + 1):

        D_model.train()
        G_model.train()
        C_model.eval()

        # Load augmented data
        profile_dataset = FaceIdPoseDataset(
            args.profile_list,
            args.data_place,
            transform=transforms.Compose([
                torchvision.transforms.Resize(250),
                transforms.RandomCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ]))
        front_dataset = FaceIdPoseDataset(
            args.front_list,
            args.data_place,
            transform=transforms.Compose([
                torchvision.transforms.Resize(224),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ]))
        profile_dataloader = DataLoader(profile_dataset,
                                        batch_size=args.batch_size,
                                        shuffle=True)  #, num_workers=6)
        front_dataloader = DataLoader(front_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True)  # , num_workers=6)

        for idx, _ in enumerate(profile_dataloader):

            batch_profile, imageName_profile = next(iter(profile_dataloader))
            batch_front, imageName_front = next(iter(front_dataloader))

            batch_profile = ((batch_profile + 1) * 127.5).to(device)
            batch_front = ((batch_front + 1) * 127.5).to(device)
            steps += 1

            enable_gradients(D_model)
            disable_gradients(C_model)
            disable_gradients(G_model)

            if steps < 25 and epoch == 1:
                critic = 25
            else:
                critic = args.num_critic_D
            for _ in range(0, critic):
                D_model.zero_grad()
                # Create Encoder Feature Map / Get the Real images' Features

                _, Front_FeaMap = C_model(batch_front)
                _, Profile_FeaMap = C_model(batch_profile)
                gen_f = G_model(Front_FeaMap)
                gen_p = G_model(Profile_FeaMap)

                # Mapping to single unit by using Discriminator
                syn_f_gan = D_model(gen_f)
                syn_p_gan = D_model(gen_p)
                real_gan = D_model(batch_front)

                # Gradient Penalty
                gp_alpha = torch.FloatTensor(batch_front.size()[0], 1, 1,
                                             1).to(device)
                gp_alpha.uniform_()
                interpolates = gen_p.data * gp_alpha + (
                    1 - gp_alpha) * batch_front.data
                interpolates = interpolates.to(
                    device).requires_grad_()  # requires_grad_() 開啟張量
                Loss, Wdis, GP = D_model.CriticWithGP_Loss(
                    syn_f_gan, syn_p_gan, real_gan, interpolates)

                L_D = Loss
                L_D.backward()
                optimizer_D.step()
            writer.add_scalar('Discriminator/Gradient-Penalty', GP, steps)
            writer.add_scalar('Discriminator/Wasserstein-Distance', Wdis,
                              steps)
            writer.add_scalar('Discriminator/D-LOSS', Loss, steps)
            log_learning(epoch, steps, 'D', D_lr, L_D.data, args)

            enable_gradients(G_model)
            disable_gradients(D_model)
            disable_gradients(C_model)
            for _ in range(0, args.num_critic_G):
                G_model.zero_grad()
                """Loss Functions
                    1. Pixel-Wise Loss: front-to-front reconstruct
                    2. Perceptual Loss: Feature distance on space of pretrined face model
                    3. Regulation Loss: L2 weight regulation (Aleady included in nn.Adam)
                    4. Adversarial Loss: Wasserstein Distance
                    5. Symmetric Loss: NOT APPLY
                    6. Drift Loss: NOT APPLY
                    7. Grade Penalty Loss: Grade penalty for Discriminator
                    """

                # Create Encoder Feature Map / Get the Real images' Features

                Front_Fea, Front_FeaMap = C_model(batch_front)
                Profile_Fea, Profile_FeaMap = C_model(batch_profile)

                # Synthesized image / Get the Fake images' Features
                gen_f = G_model(Front_FeaMap)
                gen_p = G_model(Profile_FeaMap)

                Front_Syn_Fea, _ = C_model(gen_f)
                Profile_Syn_Fea, _ = C_model(gen_p)

                # Mapping to single unit by using Discriminator
                syn_f_gan = D_model(gen_f)
                syn_p_gan = D_model(gen_p)

                # Frontalization Loss: L1-Norm
                L1 = G_model.L1Loss(gen_f, batch_front)  #(input, target)
                # Feature Loss: Cosine-Norm / L2-Norm
                L2 = G_model.L2Loss(Front_Syn_Fea, Front_Fea, Profile_Syn_Fea,
                                    Profile_Fea)
                # Adversarial Loss
                L_Gen = G_model.GLoss(syn_f_gan, syn_p_gan)
                # L2 Regulation Loss (L2 regularization on the parameters of the model is already included in most optimizers)

                L_G = args.lambda_l1 * L1 + args.lambda_fea * L2 + args.lambda_gan * L_Gen
                L_G.backward()
                optimizer_G.step()
                writer.add_scalar('Generator/Pixel-Wise-Loss', L1, steps)
                writer.add_scalar('Generator/Perceptual-Loss', L2, steps)
                writer.add_scalar('Generator/Adversarial Loss', L_Gen, steps)
                writer.add_scalar('Generator/G-LOSS', L_Gen, steps)
                log_learning(epoch, steps, 'G', G_lr, L_G.data, args)

            if steps % 500 == 0:

                x_r = vutils.make_grid(batch_front,
                                       normalize=True,
                                       scale_each=True)
                y_r = vutils.make_grid(batch_profile,
                                       normalize=True,
                                       scale_each=True)
                x_f = vutils.make_grid(gen_f, normalize=True, scale_each=True)
                y_f = vutils.make_grid(gen_p, normalize=True, scale_each=True)
                writer.add_image('Image/Front-Real', x_r, steps)
                writer.add_image('Image/Front-Generated', x_f, steps)
                writer.add_image('Image/Profile-Real', y_r, steps)
                writer.add_image('Image/Profile-Generated', y_f, steps)

                save_path_image = os.path.join(
                    args.snapshot_dir, 'epoch{}_FrontInput.jpg'.format(epoch))
                torchvision.utils.save_image(batch_front,
                                             save_path_image,
                                             normalize=True,
                                             scale_each=True)
                save_path_image = os.path.join(
                    args.snapshot_dir,
                    'epoch{}_FrontSynthesized.jpg'.format(epoch))
                torchvision.utils.save_image(gen_f,
                                             save_path_image,
                                             normalize=True,
                                             scale_each=True)

                save_path_image = os.path.join(
                    args.snapshot_dir,
                    'epoch{}_ProfileInput.jpg'.format(epoch))
                torchvision.utils.save_image(batch_profile,
                                             save_path_image,
                                             normalize=True,
                                             scale_each=True)
                save_path_image = os.path.join(
                    args.snapshot_dir,
                    'epoch{}_ProfileSynthesized.jpg'.format(epoch))
                torchvision.utils.save_image(gen_p,
                                             save_path_image,
                                             normalize=True,
                                             scale_each=True)

        if epoch % args.save_freq == 0:
            if not os.path.isdir(args.snapshot_dir):
                os.makedirs(args.snapshot_dir)
            save_path_D = os.path.join(args.snapshot_dir,
                                       'epoch{}_D.pt'.format(epoch))
            torch.save(D_model.state_dict(), save_path_D)
            save_path_G = os.path.join(args.snapshot_dir,
                                       'epoch{}_G.pt'.format(epoch))
            torch.save(G_model.state_dict(), save_path_G)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'D_model': D_model.state_dict(),
                    'optimizer_D': optimizer_D.state_dict(),
                    'G_model': G_model.state_dict(),
                    'optimizer_G': optimizer_G.state_dict(),
                },
                save_dir=os.path.join(args.snapshot_dir,
                                      'epoch{}'.format(epoch)))

    # export scalar data to JSON for external processing
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
예제 #6
0
파일: train.py 프로젝트: ercanburak/O2P2
def main():
    opt = BaseOptions().parse()   # get options
    exp_dir = osp.join(opt.checkpoints_dir, opt.name)
    log_file = osp.join(exp_dir, "trainlog.txt")
    logger = Logger(log_file)
    use_gpu = torch.cuda.is_available()
    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed_all(opt.seed)

    # Read and initialize dataset
    phys_net_data = PhysNetReal(opt.dataroot)

    # Construct train and test transform operations
    transform_train = Compose([
        ToTensor(),
    ])
    transform_test = Compose([
        ToTensor(),
    ])

    # PyTorch Dataset classes for train, validation and test sets
    train_dataset = O2P2Dataset(phys_net_data.train, transform=transform_train)
    val_dataset = O2P2Dataset(phys_net_data.val, transform=transform_test)
    test_dataset = O2P2Dataset(phys_net_data.test, transform=transform_test)

    # PyTorch Dataloaders for train, validation and test sets
    train_loader = DataLoader(train_dataset, batch_size=opt.train_batch_size, shuffle=True, pin_memory=use_gpu)
    val_loader = DataLoader(val_dataset, batch_size=opt.test_batch_size, shuffle=False, pin_memory=use_gpu)
    test_loader = DataLoader(test_dataset, batch_size=opt.test_batch_size, shuffle=False, pin_memory=use_gpu)

    # Initialize model
    percept = Percept()
    physics = Physics()
    render = Render()
    if use_gpu:
        percept = percept.cuda()
        physics = physics.cuda()
        render = render.cuda()

    # Initialize pretrained vgg model for perceptual loss
    vgg = Vgg16(requires_grad=False)
    vgg.eval()
    if use_gpu:
        vgg = vgg.cuda()

    # VGG network expects images that are normalized with these mean and std
    vgg_normalization_mean = torch.tensor([0.485, 0.456, 0.406])
    vgg_normalization_std = torch.tensor([0.229, 0.224, 0.225])
    if use_gpu:
        vgg_normalization_mean = vgg_normalization_mean.cuda()
        vgg_normalization_std = vgg_normalization_std.cuda()

    # Initialize normalizer that is required by vgg model
    vgg_norm = Normalization(vgg_normalization_mean, vgg_normalization_std)
    if use_gpu:
        vgg_norm = vgg_norm.cuda()

    # Define loss and optimizers
    criterion = torch.nn.MSELoss()
    optim_percept = torch.optim.Adam(percept.parameters(), lr=1e-3)
    optim_physics = torch.optim.Adam(physics.parameters(), lr=1e-3)
    optim_render = torch.optim.Adam(render.parameters(), lr=1e-3)

    best_render_loss = np.inf
    best_epoch = 0
    print("==> Start training")

    # Start training
    for epoch in range(opt.max_epoch):
        start_time = time.time()

        # train for one epoch
        percept_loss, physics_loss, render_loss = train(epoch, train_loader, percept, physics, render, criterion,
                                                        vgg, vgg_norm, optim_percept, optim_physics, optim_render,
                                                        use_gpu, exp_dir, logger, opt)

        elapsed_time = time.time() - start_time

        # print training details
        print_train_stats(logger, epoch, elapsed_time, percept_loss, physics_loss, render_loss)

        if (epoch + 1) % opt.eval_freq == 0:
            percept_loss, physics_loss, render_loss = validate(epoch, val_loader, percept, physics, render, criterion,
                     vgg, vgg_norm, use_gpu, exp_dir, logger, opt)

            is_best = render_loss < best_render_loss
            if is_best:
                best_render_loss = render_loss
                best_epoch = epoch + 1

            percept_state_dict = percept.state_dict()
            physics_state_dict = physics.state_dict()
            render_state_dict = render.state_dict()
            save_checkpoint({
                'percept_state_dict': percept_state_dict,
                'physics_state_dict': physics_state_dict,
                'render_state_dict': render_state_dict,
                'epoch': epoch,
            }, is_best, osp.join(exp_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    logger.log("==> Best Render Loss {:.4%}, achieved at epoch {}".format(best_render_loss, best_epoch))
    logger.log("Training completed.")
    def log_epoch_info(self, epoch, train_res, eval_res, epoch_elapsed):
        param_usage = 0
        epoch_sparsity = {}
        for idx, mask_module in enumerate(self.mask_modules):
            mask, factor = mask_module.get_binary_mask()
            mask_sparsity = sum(mask.view(-1))
            param_usage += sum(mask.view(-1) * factor)
            epoch_sparsity["{:02d}".format(idx)] = mask_sparsity

        self.tb_sw.add_scalars("epoch_sparsity", epoch_sparsity, global_step=epoch)
        self.tb_sw.add_scalar("epoch_params", param_usage, global_step=epoch)

        self.tb_sw.add_scalars(
            "epoch",
            {
                "train_acc": train_res["top1_acc"],
                "train_task_loss": train_res["task_loss"],
                "train_kd_loss": train_res["kd_loss"],
                "train_mask_loss": train_res["mask_loss"],
                "eval_acc": eval_res["top1_acc"],
                "eval_task_loss": eval_res["task_loss"],
                "eval_kd_loss": eval_res["kd_loss"],
                "eval_mask_loss": eval_res["mask_loss"],
                "lr": self.lr,
                "elapsed_time": epoch_elapsed,
            },
            global_step=epoch,
        )
        self.t_log.append(
            [
                epoch,
                train_res["task_loss"],
                train_res["kd_loss"],
                train_res["mask_loss"],
                train_res["top1_acc"],
                eval_res["task_loss"],
                eval_res["kd_loss"],
                eval_res["mask_loss"],
                eval_res["top1_acc"],
                param_usage,
                self.lr
            ]
        )
        self.logger.info(
            "FIN Epoch %(epoch)d/%(epochs)d LR: %(lr)f | "
            + "Train Task Loss: %(ttl).4f KDL: %(tkl).4f Mask Loss: %(tml).4f Acc: %(tacc).2f | "
            + "Eval Acc: %(eacc).2f | Params: %(params).2e | "
            + "Took %(dt).1fs (%(tdt).1fs)",
            {
                "epoch": epoch,
                "epochs": self.epochs,
                "lr": self.lr,
                "ttl": train_res["task_loss"],
                "tkl": train_res["kd_loss"],
                "tml": train_res["mask_loss"],
                "tacc": train_res["top1_acc"],
                "eacc": eval_res["top1_acc"],
                "dt": epoch_elapsed,
                "params": param_usage,
                "tdt": time() - self.exp_start,
            },
        )

        is_best_key = "{:.1e}".format(param_usage)
        prev_usage_best_acc = self.best_acc_per_usage.get(is_best_key, 0)
        usage_best_acc = max(eval_res["top1_acc"], prev_usage_best_acc)
        self.best_acc_per_usage[is_best_key] = usage_best_acc
        is_best = eval_res["top1_acc"] > prev_usage_best_acc

        state_dict = self.model.state_dict()
        if self.gpu_ids and len(self.gpu_ids) > 1:
            # unwrap the torch.nn.DataParallel
            state_dict = list(self.model.children())[0].state_dict()
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "state_dict": state_dict,
                "acc": eval_res["top1_acc"],
                "best_acc_per_usage": self.best_acc_per_usage,
                "optim_state_dict": self.optimizer.state_dict(),
                "param_usage": param_usage,
            },
            is_best,
            checkpoint_dir=self.config["chkpt_dir"],
            filename="checkpoint-{}.pth.tar".format(is_best_key),
            best_filename="checkpoint-{}.pth.tar".format(is_best_key),
        )
예제 #8
0
def Train(Model, args):

    writer = SummaryWriter()
    beta1_Adam = args.beta1
    beta2_Adam = args.beta2


    if args.cuda:
        Model.cuda()

    #optimizer = optim.Adam(Model.parameters(), lr=args.lr, betas=(beta1_Adam, beta2_Adam))
    optimizer = optim.SGD(Model.parameters(), lr=args.lr)

    if args.resume:
        checkpoint = torch.load(args.resume)
        optimizer.load_state_dict(checkpoint['optimizer'])

    Model.train()


    steps = 0
    #loss_criterion_Angular = AngleLoss().cuda()
    CUDNN.benchmark = True
    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    if args.dynamic_lr == True:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3000, verbose=False,
                                                   threshold=0.00001, threshold_mode='rel', cooldown=2000, min_lr=0,
                                                   eps=1e-08)
    for epoch in range(args.start_epoch, args.epochs+1):
        #if epoch==3:
            #optimizer = optim.SGD(Model.parameters(), lr=args.lr)
        # Every args.lr_step, changes learning rate by multipling args.lr_decay
                    #adjust_learning_rate(optimizer, epoch, args)

        # Load augmented data
        #transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place,
                                        #transform = transforms.Compose([Resize((256,256)), RandomCrop((224,224))])) #for ResNet256x256->224x224 for VGG110x110->96x96
        # transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place,
        #                                             transforms.Compose([transforms.Resize(256), transforms.RandomCrop(224),transforms.ToTensor()]))  # for ResNet256x256->224x224 for VGG110x110->96x96
        transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place,transforms.Compose([transforms.Resize(256),
                                                                                                         transforms.RandomCrop(224),
                                                                                                         transforms.ToTensor()
                                                                                                         ]))  # for ResNet256x256->224x224 for VGG110x110->96x96

        dataloader = DataLoader(transformed_dataset, batch_size=args.Train_Batch, shuffle=True, num_workers=8)
        if args.stepsize > 0:
            scheduler.step()

        for i, batch_data in enumerate(dataloader):
            # backward() function accumulates gradients, however we don't want to mix up gradients between minibatches
            optimizer.zero_grad()
            batch_image = torch.FloatTensor(batch_data[0].float())

            batch_id_label = batch_data[2]

            if args.cuda:
                batch_image, batch_id_label = batch_image.cuda(), batch_id_label.cuda()

            batch_image, batch_id_label = Variable(batch_image), Variable(batch_id_label)

            steps += 1

            Prediction = Model(batch_image)
            Loss = Model.ID_Loss(Prediction, batch_id_label)
            #Loss = loss_criterion_Angular(Prediction, batch_id_label)

            Loss.backward()
            optimizer.step()
            if args.dynamic_lr == True:
                scheduler.step(Loss)
            log_learning(epoch, steps, 'ResNet50_Model', args.lr, Loss.item(), args)
            writer.add_scalar('Train/Train_Loss', Loss, steps)
            writer.add_scalar('Train/Model_Lr', optimizer.param_groups[0]['lr'], epoch)

            # Validation_Process(Model, epoch, writer, args)
        Validation_Process(Model, epoch, writer, args)

        if epoch % args.save_freq == 0:
            if not os.path.isdir(args.snapshot_dir): os.makedirs(args.snapshot_dir)
            save_path = os.path.join(args.snapshot_dir, 'epoch{}.pt'.format(epoch))
            torch.save(Model.state_dict(), save_path)
            save_checkpoint({
                'epoch': epoch + 1,
                'Model': Model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, save_dir=os.path.join(args.snapshot_dir, 'epoch{}'.format(epoch)))

    # export scalar data to JSON for external processing
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
예제 #9
0
def Train(Model, args):
    #Define num of classes
    Nd = args.Nd
    beta1_Adam = args.beta1
    beta2_Adam = args.beta2

    #Define gpu mode
    if args.cuda:
        Model.cuda()
    #choose your optimizer
    optimizer = optim.Adam(Model.parameters(),
                           lr=args.lr,
                           betas=(beta1_Adam, beta2_Adam))

    if args.resume:
        checkpoint = torch.load(args.resume)
        optimizer.load_state_dict(checkpoint['optimizer'])

    Model.train()

    loss_criterion = nn.CrossEntropyLoss().cuda()

    steps = 0

    CUDNN.benchmark = True

    for epoch in range(args.start_epoch, args.epochs + 1):

        # Every args.lr_step, changes learning rate by multipling args.lr_decay
        if args.step_learning:
            adjust_learning_rate(optimizer, epoch, args)

        # Load augmented data
        transformed_dataset = FaceIdPoseDataset(args.train_csv_file,
                                                args.data_place,
                                                transform=transforms.Compose([
                                                    Resize((256, 256)),
                                                    RandomCrop((224, 224))
                                                ]))
        dataloader = DataLoader(transformed_dataset,
                                batch_size=args.Train_Batch,
                                shuffle=True)

        for i, batch_data in enumerate(dataloader):

            # backward() function accumulates gradients, however we don't want to mix up gradients between minibatches
            Model.zero_grad()
            batch_image = torch.FloatTensor(batch_data[0].float())
            batch_id_label = batch_data[2]

            if args.cuda:
                batch_image, batch_id_label = batch_image.cuda(
                ), batch_id_label.cuda()

            batch_image, batch_id_label = Variable(batch_image), Variable(
                batch_id_label)

            steps += 1

            Prediction = Model(batch_image)
            Loss = loss_criterion(Prediction[:, :Nd], batch_id_label)
            Loss.backward()
            optimizer.step()
            log_learning(epoch, steps, 'VGG16_Model', args.lr, Loss.data[0],
                         args)
            writer.add_scalar('Train/Train_Loss', Loss, steps)

        Validation_Process(Model, epoch, writer, args)

        if epoch % args.save_freq == 0:
            if not os.path.isdir(args.snapshot_dir):
                os.makedirs(args.snapshot_dir)
            save_path = os.path.join(args.snapshot_dir,
                                     'epoch{}.pt'.format(epoch))
            torch.save(Model.state_dict(), save_path)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'Model': Model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                save_dir=os.path.join(args.snapshot_dir,
                                      'epoch{}'.format(epoch)))

    # export scalar data to JSON for external processing
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
    def log_epoch_info(self, epoch, train_res, eval_res, epoch_type,
                       epoch_elapsed):
        param_usage = 0
        epoch_sparsity = {}

        mask_idx = 0
        for module in self.model.modules():
            if len(list(module.children())) > 0:
                # only count leaf node modules
                continue
            elif type(module) == MaskSTE:
                mask, factor = module.get_binary_mask()
                mask_sparsity = sum(mask.view(-1))
                param_usage += sum(mask.view(-1) * factor)
                epoch_sparsity["{:02d}".format(mask_idx)] = mask_sparsity
                mask_idx += 1
        if mask_idx == 0:
            param_usage = sum([p.numel() for p in self.model.parameters()])

        if len(epoch_sparsity) > 0:
            self.tb_sw.add_scalars("epoch_sparsity",
                                   epoch_sparsity,
                                   global_step=epoch)
        self.tb_sw.add_scalar("epoch_params", param_usage, global_step=epoch)

        epoch_scalars = {
            "train_acc": train_res["top1_acc"],
            "train_task_loss": train_res["task_loss"],
            "train_kd_loss": train_res["kd_loss"],
            "eval_acc": eval_res["top1_acc"],
            "eval_task_loss": eval_res["task_loss"],
            "eval_kd_loss": eval_res["kd_loss"],
            "lr": self.lr,
            "elapsed_time": epoch_elapsed,
        }
        if epoch_type == "Sparsity":
            epoch_scalars["train_mask_loss"] = train_res["mask_loss"]
            epoch_scalars["eval_mask_loss"] = eval_res["mask_loss"]
        self.tb_sw.add_scalars("epoch", epoch_scalars, global_step=epoch)
        self.t_log.append([
            epoch, train_res["task_loss"], train_res["kd_loss"],
            train_res["mask_loss"], train_res["top1_acc"],
            eval_res["task_loss"], eval_res["kd_loss"], eval_res["mask_loss"],
            eval_res["top1_acc"], self.lr, param_usage
        ])
        self.logger.info(
            "FIN %(epoch_type)s Epoch %(epoch)d/%(epochs)d LR: %(lr).1e | " +
            "Train Task Loss: %(ttl).4f KDL: %(tkl).4f Mask Loss: %(tml).4f Acc: %(tacc).2f | "
            + "Eval Acc: %(eacc).2f | Params: %(params).2e | " +
            "Took %(dt).1fs (%(tdt).1fs)",
            {
                "epoch_type": epoch_type,
                "epoch": epoch,
                "epochs": self.epochs,
                "lr": self.lr,
                "ttl": train_res["task_loss"],
                "tkl": train_res["kd_loss"],
                "tml": train_res["mask_loss"],
                "tacc": train_res["top1_acc"],
                "eacc": eval_res["top1_acc"],
                "dt": epoch_elapsed,
                "params": param_usage,
                "tdt": time() - self.exp_start,
            },
        )

        is_best_key = "{:.1e}".format(param_usage)
        prev_usage_best_acc = self.best_acc_per_usage.get(is_best_key, 0)
        usage_best_acc = max(eval_res["top1_acc"], prev_usage_best_acc)
        self.best_acc_per_usage[is_best_key] = usage_best_acc
        is_best = eval_res["top1_acc"] > prev_usage_best_acc

        state_dict = self.model.state_dict()
        if self.gpu_ids and len(self.gpu_ids) > 1:
            # unwrap the torch.nn.DataParallel
            state_dict = list(self.model.children())[0].state_dict()
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "state_dict": state_dict,
                "acc": eval_res["top1_acc"],
                "best_acc_per_usage": self.best_acc_per_usage,
                "optim_state_dict": self.optimizer.state_dict(),
                "param_usage": param_usage,
            },
            is_best,
            checkpoint_dir=self.config["chkpt_dir"],
            filename="checkpoint-{}.pth.tar".format(is_best_key),
            best_filename="checkpoint-{}.pth.tar".format(is_best_key),
        )