示例#1
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--ckpt_path',
                        type=str,
                        default='work_dir/bert_ner.pt')
    parser.add_argument('--input_path',
                        type=str,
                        default='work_dir/selected_paras.json')
    parser.add_argument('--output_path',
                        type=str,
                        default='work_dir/entities.json')
    parser.add_argument('--batch_size', type=int, default=32)
    args = parser.parse_args()

    eval_dataset = EvalDataset(args.input_path, debug=False)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                batch_size=args.batch_size,
                                shuffle=False,
                                collate_fn=pad)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = Net(top_rnns=False,
                vocab_size=len(VOCAB),
                device=device,
                finetuning=True).cuda()
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load(args.ckpt_path))
    model.eval()

    eval_para(model, eval_iter, eval_dataset.sent_id, args.output_path)
        optimizer.zero_grad()
        confusion_matrix = net(captions,
                {'face': face, 'audio': audio, 'visual': video, 'motion': flow}, ind, True)
        loss = max_margin(confusion_matrix)
        loss.backward()

        optimizer.step()
        running_loss += loss.data[0]
        
        if (i_batch+1) % n_display == 0:
            print 'Epoch %d, Epoch status: %.2f, Training loss: %.4f'%(epoch + 1,
                    args.batch_size*float(i_batch)/dataset_size,running_loss/n_display)
            running_loss = 0.0

    print 'evaluating epoch %d ...'%(epoch+1)
    net.eval()  

    if args.MSRVTT:
        retrieval_samples = dataset.getRetrievalSamples()

        video = Variable(retrieval_samples['video'].cuda(), volatile=True)
        captions = Variable(retrieval_samples['text'].cuda(), volatile=True)
        audio = Variable(retrieval_samples['audio'].cuda(), volatile=True)
        flow = Variable(retrieval_samples['flow'].cuda(), volatile=True)
        face = Variable(retrieval_samples['face'].cuda(), volatile=True)
        face_ind = retrieval_samples['face_ind']

        ind = {}
        ind['face'] = face_ind
        ind['visual'] = np.ones((len(face_ind)))
        ind['motion'] = np.ones((len(face_ind)))
示例#3
0
                                              eta_min=0.005)

# Training
n_epoch = int(config["Train"]["epoch"])
save_every_epoch = int(config["Train"]["SaveEveryEpoch"])
if save_every_epoch == -1:
    save_every_epoch = n_epoch + 1
dst_dir = os.path.join(ABS_DIR, config["Train"]["RecordDestination"])

for t in range(n_epoch):
    result = []
    for phase in ['train', 'val']:
        if phase == 'train':
            net.train()
        else:
            net.eval()
        # keep track of training and validation loss
        running_loss = 0.0
        running_batch = 0
        running_confusion = np.zeros(shape=(n_class, n_class), dtype=np.float)

        for data, target in data_loader[phase]:
            data, target = data.to(device), target.to(device)
            # print(target.shape)
            with torch.set_grad_enabled(phase == 'train'):
                # feed the input
                preds = net(data)
                # calculate the loss
                loss = loss_func(preds, target)
                if phase == 'train':
                    optimizer.zero_grad()
示例#4
0
文件: run.py 项目: tangjiafu/SRMSD
class FSRCNNTrainer(object):
    def __init__(self, config, training_loader, testing_loader):
        super(FSRCNNTrainer, self).__init__()
        self.CUDA = torch.cuda.is_available()
        self.device = torch.device('cuda' if self.CUDA else 'cpu')
        self.model = None
        self.lr = config.lr
        self.nEpochs = config.nEpochs
        self.criterion = None
        self.optimizer = None
        self.scheduler = None
        self.seed = config.seed
        self.upscale_factor = config.upscale_factor
        self.training_loader = training_loader
        self.testing_loader = testing_loader
        configure('logs', flush_secs=5)

    def build_model(self):
        self.model = Net(num_channels=1, upscale_factor=self.upscale_factor).to(self.device)
        self.model.weight_init(mean=0.0, std=0.2)
        self.criterion = torch.nn.MSELoss()
        torch.manual_seed(self.seed)

        if self.CUDA:
            torch.cuda.manual_seed(self.seed)  # 随机种子
            cudnn.benchmark = True
            self.criterion.cuda()

        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[50, 75, 100],
                                                              gamma=0.5)  # lr decay

    def save_model(self):
        model_out_path = "./checkpoints/model_path.pth"
        torch.save(self.model, model_out_path)
        print("Checkpoint saved to {}".format(model_out_path))

    def train(self, epoch):
        self.model.train()
        train_loss = 0
        for batch_num, (data, target) in enumerate(self.training_loader):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            loss = self.criterion(self.model(data), target)
            train_loss += loss.item()
            loss.backward()
            self.optimizer.step()
            progress_bar(batch_num, len(self.training_loader), 'Loss: %.4f' % (train_loss / (batch_num + 1)))
        print("    Average Loss: {:.4f}".format(train_loss / len(self.training_loader)))
        log_value('train_loss', train_loss / (batch_num + 1), epoch)

    def test(self, epoch):
        self.model.eval()
        avg_psnr = 0

        with torch.no_grad():
            for batch_num, (data, target) in enumerate(self.testing_loader):
                data, target = data.to(self.device), target.to(self.device)
                prediction = self.model(data)
                mse = self.criterion(prediction, target)
                psnr = 10 * log10(1 / mse.item())
                avg_psnr += psnr
                progress_bar(batch_num, len(self.testing_loader), 'PSNR: %.4f' % (avg_psnr / (batch_num + 1)))

        print("    Average PSNR: {:.4f} dB".format(avg_psnr / len(self.testing_loader)))
        log_value('PSNR', avg_psnr / (batch_num + 1), epoch)

    def run(self):
        self.build_model()
        for epoch in range(1, self.nEpochs + 1):
            print("\n===> Epoch {} starts:".format(epoch))
            self.train(epoch)
            self.test(epoch)
            self.scheduler.step(epoch)
            if epoch == self.nEpochs:
                self.save_model()
示例#5
0
class Solver(object):
    def __init__(self, config, source_loader, source_val_loader, target_loader, target_val_loader):
        self.source_loader = source_loader
        self.source_val_loader = source_val_loader
        self.target_loader = target_loader
        self.target_val_loader = target_val_loader
        self.net = None
        self.net_optimizer = None
        self.net_d = None
        self.net_optimizer_d = None
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.train_iters = config.train_iters
        self.pretrain_iters = config.pretrain_iters
        self.batch_size = config.batch_size
        self.lr = config.lr
        self.lr_d = config.lr_d
        self.alpha_s = config.alpha_s
        self.alpha_t = config.alpha_t
        self.beta_c = config.beta_c
        self.beta_sep = config.beta_sep
        self.beta_p = config.beta_p
        self.log_step = config.log_step
        self.model_path = config.model_path
        self.num_classes = config.num_classes
        self.build_model()

    def build_model(self):
        """Builds a generator and a discriminator."""
        self.net = Net()
        self.net_d = Net_D()

        net_params = list(self.net.parameters())
        net_d_params = list(self.net_d.parameters())
        self.net_optimizer = optim.Adam(net_params, self.lr, [self.beta1, self.beta2])
        self.net_optimizer_d = optim.Adam(net_d_params, self.lr_d, [self.beta1, self.beta2])

        if torch.cuda.is_available():
            self.net.cuda()
            self.net_d.cuda()

    def to_var(self, x):
        """Converts numpy to variable."""
        if torch.cuda.is_available():
            x = x.cuda()
        return Variable(x, requires_grad=False)

    def to_data(self, x):
        """Converts variable to numpy."""
        if torch.cuda.is_available():
            x = x.cpu()
        return x.data.numpy()

    def reset_grad(self):
        """Zeros the gradient buffers."""
        self.net_optimizer.zero_grad()
        self.net_optimizer_d.zero_grad()

    def separability_loss(self, labels, latents, imbalance_parameter=1):
        criteria = torch.nn.modules.loss.CosineEmbeddingLoss()
        loss_up = 0
        one_cuda = torch.ones(1).cuda()
        mean = torch.mean(latents, dim=0).cuda().view(1, -1)
        loss_down = 0
        for i in range(self.num_classes):
            indexes = labels.eq(i)
            mean_i = torch.mean(latents[indexes], dim=0).view(1, -1)
            if str(mean_i.norm().item()) != 'nan':
                for latent in latents[indexes]:
                    loss_up += criteria(latent.view(1, -1), mean_i, one_cuda)
                loss_down += criteria(mean, mean_i, one_cuda)
        loss = (loss_up / loss_down) * imbalance_parameter
        return loss

    def initialisation(self):
        self.net.apply(xavier_weights_init)
        self.net_d.apply(xavier_weights_init)
        source_iter = iter(self.source_loader)
        target_iter = iter(self.target_loader)
        source_val_loader = iter(self.source_val_loader)
        target_val_iter = iter(self.target_val_loader)
        source_per_epoch = len(source_iter)
        target_per_epoch = len(target_iter)
        targetval_per_epoch = len(target_val_iter)
        print(source_per_epoch, target_per_epoch, targetval_per_epoch)

        criterion = nn.CrossEntropyLoss()

        f_labels = torch.LongTensor(128)
        f_labels[...] = 10

        t_labels = torch.LongTensor(128)
        t_labels[...] = 1

        # pretrain
        log_pre = 50
        source_iter = iter(self.source_loader)
        source_val_iter = iter(self.source_val_loader)
        target_iter = iter(self.target_loader)
        return criterion, source_per_epoch, target_per_epoch, target_iter, source_iter, log_pre, source_val_iter

    def train(self):
        criterion, source_per_epoch, target_per_epoch, target_iter, source_iter, log_pre, source_val_iter = self.initialisation()

        pre_train = not os.path.exists(os.path.join(self.model_path, 'pre_train.pth'))

        print("Pretrain:\n*********")
        if pre_train:
            for step in range(self.pretrain_iters + 1):
                # ============ Initialization ============#
                # refresh
                if (step + 1) % (source_per_epoch) == 0:
                    source_iter = iter(self.source_loader)
                if (step + 1) % (target_per_epoch) == 0:
                    target_iter = iter(self.target_loader)
                # load the data
                source, s_labels = source_iter.next()
                target, t_labels = target_iter.next()
                target_rgb = target
                target, t_labels = self.to_var(target_rgb), self.to_var(t_labels).long().squeeze()
                source, s_labels = self.to_var(source), self.to_var(s_labels).long().squeeze()

                # ============ Training ============ #
                self.reset_grad()
                # forward
                latent, c = self.net(source)
                # loss
                loss_source_class = criterion(c, s_labels)

                # one step
                loss_source_class.backward()
                self.net_optimizer.step()
                self.reset_grad()
                # ============ Validation ============ #
                if (step + 1) % log_pre == 0:
                    _, c_source = self.net(source)
                    _, c_target = self.net(target)
                    print("[%d/20000] classification loss: %.4f" % (
                        step + 1, loss_source_class.item()))
                    print("source accuracy  %.4f;  target accuracy %.4f" % (
                        accuracy(s_labels, c_source),
                        accuracy(t_labels, c_target)))

            self.save_model()
        else:
            self.load_model()

        # ============ Initialization ============ #
        source_iter = iter(self.source_loader)
        target_iter = iter(self.target_loader)
        source_val_iter = iter(self.source_val_loader)
        maxacc = 0.0
        maximum_acc = 0.0
        max_iter = 0
        net_params = list(self.net.parameters())
        net_d_params = list(self.net_d.parameters())

        self.net_optimizer = optim.Adam(net_params, self.lr, [self.beta1, self.beta2])
        self.net_optimizer_d = optim.Adam(net_d_params, self.lr_d, [self.beta1, self.beta2])
        print("Second:\n******")
        # self.validate_source()
        self.validate_target()


        for step in range(self.train_iters):
            # ============ Initialization ============#

            # refresh
            if (step + 1) % (target_per_epoch) == 0:
                target_iter = iter(self.target_loader)
            if (step + 1) % (source_per_epoch) == 0:
                source_iter = iter(self.source_loader)
                source_val_iter = iter(self.source_val_loader)
            # load the data
            source, s_labels = source_iter.next()
            source, s_labels = self.to_var(source), self.to_var(s_labels).long().squeeze()  # must squeeze

#             source_val, s_val_labels = source_val_iter.next()
#             source_val, s_val_labels = self.to_var(source_val), self.to_var(s_val_labels).long().squeeze() 

            target, t_labels = target_iter.next()
            target_rgb = target
            target, t_labels = self.to_var(target_rgb), self.to_var(t_labels).long().squeeze()

            # ============ train D ============#
            self.reset_grad()

            latent_source, c = self.net(source)
            d = self.net_d(latent_source)
            loss_d_s1 = F.binary_cross_entropy(d, torch.ones_like(d, dtype=torch.float32))
            loss_d_s0 = F.binary_cross_entropy(d, torch.zeros_like(d, dtype=torch.float32))
            loss_c_source = criterion(c, s_labels)

            latent_target, c = self.net(target)
            d = self.net_d(latent_target)
            loss_d_t0 = F.binary_cross_entropy(d, torch.zeros_like(d, dtype=torch.float32))

            loss_p = loss_d_s0
            loss_d = loss_d_s1 + loss_d_t0
            # ============ train pseudo labeling ============#

            chosen_target, pseudo_labels, indexes, imbalance_parameter = pseudo_labeling(target, c)

            if chosen_target is not None:
                loss_c_target = criterion(c[indexes], pseudo_labels)

                latent_target = latent_target[indexes]
                # ============ class loss  ============#
                loss_sep = self.separability_loss(torch.cat((s_labels, pseudo_labels)),
                                                  torch.cat((latent_source, latent_target)),
                                                  imbalance_parameter=imbalance_parameter)
            else:
                loss_c_target = 0
                loss_sep = 0
            loss = self.beta_c * (self.alpha_s * loss_c_source + self.alpha_t * loss_c_target) + \
                   self.beta_p * loss_p + \
                   self.beta_sep * loss_sep

            loss.backward(retain_graph=True)
            self.net_optimizer.step()

            loss_d.backward()
            self.net_optimizer_d.step()

            self.reset_grad()

            # ============ Validation ============ #
            if (step + 1) % self.log_step == 0:
                print("max accuracy:", colored(maximum_acc, "green"), "iteration", max_iter)
                _, c_source = self.net(source)
                _, c_target = self.net(target)
                print("source  accuracy (svhn_train) %.4f;  target accuracy %.4f" % (
                    accuracy(s_labels, c_source), accuracy(t_labels, c_target)))
                
                self.validate_source()
                acc = self.validate_target()

                if acc > maximum_acc:
                    maximum_acc = acc
                max_iter = step

                if acc > maxacc:
                    maxacc = acc
                    torch.save(self.net, "./model_c_" + str(step) + '_' + str(acc) + ".pth")
                    torch.save(self.net_d, "./model_d_" + str(step) + '_' + str(acc) + ".pth")
            self.reset_grad()
        # ============ Save the model ============ #
        torch.save(self.net, "./model_c_final.pth")
        torch.save(self.net_d, "./model_d_final.pth")

    def validate_target(self, ):
        class_correct = [0] * self.num_classes
        class_total = [0.] * self.num_classes
        classes = [str(i) for i in range(self.num_classes)]
        self.net.eval()  # prep model for evaluation

        for data, target in self.target_val_loader:

            # forward pass: compute predicted outputs by passing inputs to the model
            data, target = self.to_var(data), self.to_var(target).long().squeeze()

            data = data.cuda()
            target = target.cuda()

            latent, output = self.net(data)
            _, pred = torch.max(output, 1)
            correct = np.squeeze(pred.eq(target.data.view_as(pred)))
            # calculate test accuracy for each object class
            for i in range(len(target.data)):
                label = target.data[i]
                class_correct[label] += correct[i].item()
                class_total[label] += 1

        for i in range(self.num_classes):
            if class_total[i] > 0:
                print('Test Accuracy (mnist-test) of %5s: %2d%% (%2d/%2d)' % (
                    str(i), 100 * class_correct[i] / class_total[i],
                    np.sum(class_correct[i]), np.sum(class_total[i])))
            else:
                print('Test Accuracy (mnist-test) of %5s: N/A (no training examples)' % (classes[i]))

        print("\nTest Accuracy (mnist-test) (Overall): ", end="")
        print(colored('%2d%% ' % (100. * np.sum(class_correct) / np.sum(class_total)), "red"), end="")
        print("(", end="")
        print(colored(str(int(np.sum(class_correct))), "red"), end=" ")
        print('/%2d)' % (np.sum(class_total)))
        self.net.train()
        return 100. * np.sum(class_correct) / np.sum(class_total)


    def validate_source(self, ):
	    class_correct = [0] * self.num_classes
	    class_total = [0.] * self.num_classes
	    classes = [str(i) for i in range(self.num_classes)]
	    self.net.eval()  # prep model for evaluation

	    for data, target in self.source_val_loader:

	        # forward pass: compute predicted outputs by passing inputs to the model
	        data, target = self.to_var(data), self.to_var(target).long().squeeze()

	        data = data.cuda()
	        target = target.cuda()

	        latent, output = self.net(data)
	        _, pred = torch.max(output, 1)
	        correct = np.squeeze(pred.eq(target.data.view_as(pred)))
	        # calculate test accuracy for each object class
	        for i in range(len(target.data)):
	            label = target.data[i]
	            class_correct[label] += correct[i].item()
	            class_total[label] += 1

	    for i in range(self.num_classes):
	        if class_total[i] > 0:
	            print('Test Accuracy (svhn-test) of %5s: %2d%% (%2d/%2d)' % (
	                str(i), 100 * class_correct[i] / class_total[i],
	                np.sum(class_correct[i]), np.sum(class_total[i])))
	        else:
	            print('Test Accuracy (svhn_test) of %5s: N/A (no training examples)' % (classes[i]))

	    print("\nTest Accuracy (svhn-test) (Overall): ", end="")
	    print(colored('%2d%% ' % (100. * np.sum(class_correct) / np.sum(class_total)), "red"), end="")
	    print("(", end="")
	    print(colored(str(int(np.sum(class_correct))), "red"), end=" ")
	    print('/%2d)' % (np.sum(class_total)))
	    self.net.train()
	    return 100. * np.sum(class_correct) / np.sum(class_total)


    def save_model(self):
        torch.save(self.net, os.path.join(self.model_path, 'pre_train.pth'))

    def load_model(self):
        self.net = torch.load(os.path.join(self.model_path, 'pre_train.pth'))
示例#6
0
class Trainer:

    def __init__(self, cfg):
        self.cfg = cfg

        self.init_env()
        self.init_device()
        self.init_data()
        self.init_model()
        self.init_optimizer()

    def init_env(self):
        self.exp_dir = Path(
            self.cfg.train_log_root).expanduser().joinpath(self.cfg.exp_id)
        self.exp_dir.mkdir(parents=True, exist_ok=True)
        self.log_dir = self.exp_dir.joinpath(self.cfg.log_subdir)
        self.tb_dir = self.exp_dir.joinpath(self.cfg.tb_subdir)
        self.ckpt_dir = self.exp_dir.joinpath(self.cfg.ckpt_subdir)

        self.logger = get_logger(__name__, self.log_dir)
        self.tb = SummaryWriter(self.tb_dir)
        torch.manual_seed(self.cfg.seed)

        self.epoch = 0
        self.acc = 0.

        self.logger.info('Train log location: {}'.format(self.exp_dir))

    def init_device(self):
        self.use_cuda = not self.cfg.no_cuda and torch.cuda.is_available()
        if self.use_cuda:
            self.device = torch.device('cuda')
            self.logger.info('Using gpu')
        else:
            self.device = torch.device('cpu')
            self.logger.info('Using cpu')

    def init_data(self):
        self.logger.info('Initializing data loader...')
        kwargs = {
            'num_workers': 1, 'pin_memory': True} if self.use_cuda else {}
        self.train_loader = torch.utils.data.DataLoader(
            datasets.MNIST(
                self.cfg.data_root, train=True, download=True,
                transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.1307,), (0.3081,))])),
            batch_size=self.cfg.batch_size, shuffle=True, **kwargs)
        self.logger.info('Train loader has been initialized.')
        self.val_loader = torch.utils.data.DataLoader(
            datasets.MNIST(
                self.cfg.data_root, train=False,
                transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.1307,), (0.3081,))])),
            batch_size=self.cfg.val_batch_size, shuffle=True, **kwargs)
        self.logger.info('Test loader has been initialized.')

    def init_model(self):
        self.model = Net()
        data, target = next(iter(self.train_loader))
        self.tb.add_graph(self.model, data)
        self.model = self.model.to(self.device)
        self.logger.info('Model has been initialized.')

    def init_optimizer(self):
        cfg_optim = self.cfg.optim
        optim_func = getattr(optim, cfg_optim.type)
        self.optimizer = optim_func(
            self.model.parameters(), **dict(self.cfg.optim.args))
        self.logger.info('Optimizer has been initialized.')

    def train(self):
        self.model.train()
        for batch_idx, (data, target) in enumerate(self.train_loader):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            self.optimizer.step()

            self.train_loss = loss.item()
            if batch_idx % self.cfg.log_interval == 0:
                self.logger.info(
                    '{:2d}, {}/{} loss: {:.6f}, test acc: {:.2f}%'.format(
                        self.epoch, batch_idx * len(data),
                        len(self.train_loader.dataset), loss.item(), self.acc))
                total_iter = self.epoch * len(self.train_loader) + batch_idx
                self.tb.add_scalar('train/loss', loss.item(), total_iter)

    def test(self):
        self.model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in self.val_loader:
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                test_loss += F.nll_loss(output, target, reduction='sum').item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()
        test_loss /= len(self.val_loader.dataset)
        self.acc = 100. * correct / len(self.val_loader.dataset)
        self.logger.info(
            '{:2d}, test loss: {:.4f}, test acc: {}/{} ({:.2f}%)'.format(
                self.epoch, test_loss, correct, len(self.val_loader.dataset),
                self.acc))
        self.tb.add_scalar('test/acc', self.acc, self.epoch)
        self.tb.add_scalar('test/loss', test_loss, self.epoch)

    def load(self, for_resuming_training=True, label='latest'):
        ckpt_path = self.ckpt_dir.joinpath('{}.pt'.format(label))
        if ckpt_path.is_file():
            self.logger.info('Loading model from {}'.format(ckpt_path))
            ckpt = torch.load(ckpt_path, map_location=self.device)
            self.model.load_state_dict(ckpt['model_state_dict'])
            if for_resuming_training:
                self.optimizer.load_state_dict(ckpt['optimizer_state_dict'])
                self.epoch = ckpt['epoch'] + 1
                self.acc = ckpt['acc']
            self.logger.info(
                'Model of epoch {} loaded.'.format(ckpt['epoch']))
        else:
            self.logger.info('No checkpoint found.')

    def save(self, label='latest'):
        self.logger.info('Saving model...')
        self.ckpt_dir.mkdir(exist_ok=True, parents=True)
        ckpt_path = self.ckpt_dir.joinpath('{}.pt'.format(label))
        torch.save({
            'epoch': self.epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'train_loss': self.train_loss,
            'acc': self.acc
            }, ckpt_path)
        self.logger.info('Model saved to {}.'.format(ckpt_path))

    def start(self):
        self.load(for_resuming_training=True)
        if self.epoch > 0:
            self.logger.info('Training start from epoch {}'.format(self.epoch))
        try:
            for self.epoch in range(self.epoch, self.cfg.epochs):
                self.train()
                self.test()
            self.logger.info('Training is finished.')
        except KeyboardInterrupt:
            self.logger.warning('Keyboard Interrupted.')
        except Exception as e:
            self.logger.exception(repr(e))
        finally:
            if self.epoch > 0:
                self.save()
def train(args):

    check_paths(args)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    try:
        with open(param_path, 'r') as tc:

            trainingParams = json.load(tc)
            ngf = int(trainingParams.get('ngf', args.ngf))
            epochs = int(trainingParams.get('epochs', args.epochs))
            batch_size = int(trainingParams.get('batch_size', args.batch_size))
            log_interval = int(
                trainingParams.get('log_interval', args.log_interval))
            learning_rate = float(
                trainingParams.get('learning_rate', args.learning_rate))
            cuda = int(trainingParams.get('cuda', args.cuda))

            if cuda:
                logger.info("Using CUDA")
                torch.cuda.manual_seed(args.seed)
                kwargs = {'num_workers': 8, 'pin_memory': True}
                logger.info("Using kwarguments: \n" + str(kwargs))
            else:
                kwargs = {}

            transform = transforms.Compose([
                transforms.Scale(args.image_size),
                transforms.CenterCrop(args.image_size),
                transforms.ToTensor(),
                transforms.Lambda(lambda x: x.mul(255))
            ])
            train_dataset = datasets.ImageFolder(args.dataset, transform)
            train_loader = DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      **kwargs)
            style_model = Net(ngf=ngf)

            print(style_model)

            optimizer = Adam(style_model.parameters(), learning_rate)
            mse_loss = torch.nn.MSELoss()

            vgg = Vgg16()

            utils.mod_utils.init_vgg16(args.vgg_model_dir)
            vgg.load_state_dict(
                torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight")))

            if cuda:
                style_model.cuda()
                vgg.cuda()

            style_loader = StyleLoader(args.style_folder, args.style_size)

            for e in range(epochs):
                style_model.train()
                agg_content_loss = 0.
                agg_style_loss = 0.
                count = 0

                for batch_id, (x, _) in enumerate(train_loader):
                    n_batch = len(x)
                    count += n_batch
                    optimizer.zero_grad()
                    x = Variable(preprocess_batch(x))
                    if cuda:
                        x.cuda()

                    style_v = style_loader.get(batch_id)
                    style_model.setTarget(style_v)

                    style_v = utils.img_utils.subtract_imagenet_mean_batch(
                        style_v)
                    features_style = vgg(style_v)
                    gram_style = [
                        utils.img_utils.gram_matrix(y) for y in features_style
                    ]

                    y = style_model(x.cuda())
                    xc = Variable(x.data.clone(), volatile=True)

                    y = utils.img_utils.subtract_imagenet_mean_batch(y)
                    xc = utils.img_utils.subtract_imagenet_mean_batch(xc)

                    features_y = vgg(y)
                    features_xc = vgg(xc.cuda())

                    f_xc_c = Variable(features_xc[1].data, requires_grad=False)

                    content_loss = args.content_weight * \
                        mse_loss(features_y[1], f_xc_c)

                    style_loss = 0.
                    for m in range(len(features_y)):
                        gram_y = utils.img_utils.gram_matrix(features_y[m])
                        gram_s = Variable(gram_style[m].data,
                                          requires_grad=False).repeat(
                                              args.batch_size, 1, 1, 1)
                        style_loss += args.style_weight * mse_loss(
                            gram_y, gram_s[:n_batch, :, :])

                    total_loss = content_loss + style_loss
                    total_loss.backward()
                    optimizer.step()

                    agg_content_loss += content_loss.data[0]
                    agg_style_loss += style_loss.data[0]

                    if (batch_id + 1) % log_interval == 0:
                        msg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format(
                            time.ctime(), e + 1, count, len(train_dataset),
                            agg_content_loss / (batch_id + 1),
                            agg_style_loss / (batch_id + 1),
                            (agg_content_loss + agg_style_loss) /
                            (batch_id + 1))
                        print(msg)

                    if (batch_id + 1) % (20 * log_interval) == 0:
                        # save model
                        style_model.eval()
                        style_model.cpu()
                        save_model_filename = "Epoch_" + str(e) + "_" +\
                                              "iters_" + str(count) + \
                                              "_" + str(time.ctime()).replace(' ','_') + \
                                               "_" + str(args.content_weight) + "_" + \
                                               str(args.style_weight) + ".model"
                        save_model_path = os.path.join(temp_save_model_dir,
                                                       save_model_filename)

                        torch.save(style_model.state_dict(), save_model_path)
                        style_model.train()
                        style_model.cuda()
                        logger.info("Checkpoint, trained model saved at " +
                                    str(save_model_path))

            # save the final model

            style_model.eval()
            style_model.cpu()
            save_final_model_path = os.path.join(model_path,
                                                 final_model_filename)
            torch.save(style_model.state_dict(), save_final_model_path)

            logger.info("Done, trained model saved at " +
                        save_final_model_path)

            # Write out the success file
            with open(os.path.join(output_path, 'success'), 'w') as s:
                s.write('Done')

    except Exception as e:
        with open(os.path.join(output_path, 'failure'), 'w') as s:
            trc = traceback.format_exc()
            logger.info('Exception during training: ' + str(e) + '\n' + trc)
            s.write('Exception during training: ' + str(e) + '\n' + trc)