Пример #1
0
class Tester:
    """
    测试
    """
    def __init__(self, _hparams):
        self.test_loader = get_test_loader(_hparams)
        self.encoder = CNN().to(DEVICE)
        self.decoder = RNN(fea_dim=_hparams.fea_dim,
                           embed_dim=_hparams.embed_dim,
                           hid_dim=_hparams.hid_dim,
                           max_sen_len=_hparams.max_sen_len,
                           vocab_pkl=_hparams.vocab_pkl).to(DEVICE)
        self.test_cap = _hparams.test_cap

    def testing(self, save_path, test_path):
        """
        测试

        :param save_path: 模型的保存地址
        :param test_path: 保存测试过程生成句子的路径
        :return:
        """
        print('*' * 20, 'test', '*' * 20)
        self.load_models(save_path)
        self.set_eval()

        sen_json = []
        with torch.no_grad():
            for val_step, (img, img_id) in tqdm(enumerate(self.test_loader)):
                img = img.to(DEVICE)
                features = self.encoder.forward(img)
                sens, _ = self.decoder.sample(features)
                sen_json.append({'image_id': int(img_id), 'caption': sens[0]})

        with open(test_path, 'w') as f:
            json.dump(sen_json, f)

        result = coco_eval(self.test_cap, test_path)
        for metric, score in result:
            print(metric, score)

    def load_models(self, save_path):
        ckpt = torch.load(save_path,
                          map_location={'cuda:2': 'cuda:0'
                                        })  # 映射是因为解决保存模型的卡与加载模型的卡不一致的问题
        encoder_state_dict = ckpt['encoder_state_dict']
        self.encoder.load_state_dict(encoder_state_dict)
        decoder_state_dict = ckpt['decoder_state_dict']
        self.decoder.load_state_dict(decoder_state_dict)

    def set_eval(self):
        self.encoder.eval()
        self.decoder.eval()
Пример #2
0
                      n_layers=args.n_layers,
                      dropout=args.dropout,
                      bidirectional=args.bidirectional,
                      nonlinearity=args.nonlinearity)

# load the best model
model.load_state_dict(torch.load(args.model_file))
model.eval()  ## enable evaluation modes

# set up output filename
if args.sample_idx is not None:
    output_filename = 'sampled-SMILES-{}.smi'.format(args.sample_idx)
else:
    output_filename = 'sampled-SMILES.smi'

output_file = os.path.join(args.output_dir, output_filename)

# sample a set of SMILES from the final, trained model
sampled_count = 0
batch_size = 512
while sampled_count < args.mols_per_file:
    sampled_smiles, NLLs = model.sample(batch_size,
                                        return_smiles=True,
                                        return_nll=True)
    # write sampled SMILES
    with open(output_file, 'a+') as f:
        for sm, nll in zip(sampled_smiles, NLLs):
            _ = f.write(sm + '\t' + str(round(nll.detach().item(), 6)) + '\n')

    sampled_count += batch_size
Пример #3
0
def main(args):
    # hyperparameters
    batch_size = args.batch_size
    num_workers = 1

    # Image Preprocessing
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    # load COCOs dataset
    IMAGES_PATH = 'data/train2014'
    CAPTION_FILE_PATH = 'data/annotations/captions_train2014.json'

    vocab = load_vocab()
    train_loader = get_coco_data_loader(path=IMAGES_PATH,
                                        json=CAPTION_FILE_PATH,
                                        vocab=vocab,
                                        transform=transform,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=num_workers)

    IMAGES_PATH = 'data/val2014'
    CAPTION_FILE_PATH = 'data/annotations/captions_val2014.json'
    val_loader = get_coco_data_loader(path=IMAGES_PATH,
                                      json=CAPTION_FILE_PATH,
                                      vocab=vocab,
                                      transform=transform,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers)

    losses_val = []
    losses_train = []

    # Build the models
    ngpu = 1
    initial_step = initial_epoch = 0
    embed_size = args.embed_size
    num_hiddens = args.num_hidden
    learning_rate = 1e-3
    num_epochs = 3
    log_step = args.log_step
    save_step = 500
    checkpoint_dir = args.checkpoint_dir

    encoder = CNN(embed_size)
    decoder = RNN(embed_size,
                  num_hiddens,
                  len(vocab),
                  1,
                  rec_unit=args.rec_unit)

    # Loss
    criterion = nn.CrossEntropyLoss()

    if args.checkpoint_file:
        encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models(
            args.checkpoint_file, args.sample)
        initial_step, initial_epoch, losses_train, losses_val = meta
        encoder.load_state_dict(encoder_state_dict)
        decoder.load_state_dict(decoder_state_dict)
    else:
        params = list(decoder.parameters()) + list(
            encoder.linear.parameters()) + list(encoder.batchnorm.parameters())
        optimizer = torch.optim.Adam(params, lr=learning_rate)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    if args.sample:
        return utils.sample(encoder, decoder, vocab, val_loader)

    # Train the Models
    total_step = len(train_loader)
    try:
        for epoch in range(initial_epoch, num_epochs):

            for step, (images, captions,
                       lengths) in enumerate(train_loader, start=initial_step):

                # Set mini-batch dataset
                images = utils.to_var(images, volatile=True)
                captions = utils.to_var(captions)
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]

                # Forward, Backward and Optimize
                decoder.zero_grad()
                encoder.zero_grad()

                if ngpu > 1:
                    # run on multiple GPU
                    features = nn.parallel.data_parallel(
                        encoder, images, range(ngpu))
                    outputs = nn.parallel.data_parallel(
                        decoder, features, range(ngpu))
                else:
                    # run on single GPU
                    features = encoder(images)
                    outputs = decoder(features, captions, lengths)

                train_loss = criterion(outputs, targets)
                losses_train.append(train_loss.data[0])
                train_loss.backward()
                optimizer.step()

                # Run validation set and predict
                if step % log_step == 0:
                    encoder.batchnorm.eval()
                    # run validation set
                    batch_loss_val = []
                    for val_step, (images, captions,
                                   lengths) in enumerate(val_loader):
                        images = utils.to_var(images, volatile=True)
                        captions = utils.to_var(captions, volatile=True)

                        targets = pack_padded_sequence(captions,
                                                       lengths,
                                                       batch_first=True)[0]
                        features = encoder(images)
                        outputs = decoder(features, captions, lengths)
                        val_loss = criterion(outputs, targets)
                        batch_loss_val.append(val_loss.data[0])

                    losses_val.append(np.mean(batch_loss_val))

                    # predict
                    sampled_ids = decoder.sample(features)
                    sampled_ids = sampled_ids.cpu().data.numpy()[0]
                    sentence = utils.convert_back_to_text(sampled_ids, vocab)
                    print('Sample:', sentence)

                    true_ids = captions.cpu().data.numpy()[0]
                    sentence = utils.convert_back_to_text(true_ids, vocab)
                    print('Target:', sentence)

                    print(
                        'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}'
                        .format(epoch, step, losses_train[-1], losses_val[-1]))
                    encoder.batchnorm.train()

                # Save the models
                if (step + 1) % save_step == 0:
                    utils.save_models(encoder, decoder, optimizer, step, epoch,
                                      losses_train, losses_val, checkpoint_dir)
                    utils.dump_losses(
                        losses_train, losses_val,
                        os.path.join(checkpoint_dir, 'losses.pkl'))

    except KeyboardInterrupt:
        pass
    finally:
        # Do final save
        utils.save_models(encoder, decoder, optimizer, step, epoch,
                          losses_train, losses_val, checkpoint_dir)
        utils.dump_losses(losses_train, losses_val,
                          os.path.join(checkpoint_dir, 'losses.pkl'))
Пример #4
0
class Trainer:
    """
    训练
    """
    def __init__(self, _hparams):
        utils.set_seed(_hparams.fixed_seed)

        self.train_loader = get_train_loader(_hparams)
        self.val_loader = get_val_loader(_hparams)
        self.encoder = CNN().to(DEVICE)
        self.decoder = RNN(fea_dim=_hparams.fea_dim,
                           embed_dim=_hparams.embed_dim,
                           hid_dim=_hparams.hid_dim,
                           max_sen_len=_hparams.max_sen_len,
                           vocab_pkl=_hparams.vocab_pkl).to(DEVICE)
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr)
        self.writer = SummaryWriter()

        self.max_sen_len = _hparams.max_sen_len
        self.val_cap = _hparams.val_cap
        self.ft_encoder_lr = _hparams.ft_encoder_lr
        self.ft_decoder_lr = _hparams.ft_decoder_lr
        self.best_CIDEr = 0

    def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path,
                          val_path):
        print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs',
              '*' * 20)
        self.encoder.fine_tune()
        self.optimizer = torch.optim.Adam([
            {
                'params': self.encoder.parameters(),
                'lr': self.ft_encoder_lr
            },
            {
                'params': self.decoder.parameters(),
                'lr': self.ft_decoder_lr
            },
        ])
        self.training(fine_tune_epochs, val_interval, save_path, val_path)
        self.encoder.froze()
        print('*' * 20, 'fine tune encoder complete', '*' * 20)

    def get_params(self):
        """
        模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数

        :return:
        """
        return list(self.decoder.parameters())

    def training(self, max_epochs, val_interval, save_path, val_path):
        """
        训练

        :param val_path: 保存验证过程生成句子的路径
        :param save_path: 保存模型的地址
        :param val_interval: 验证的间隔
        :param max_epochs: 最大训练的轮次
        :return:
        """
        print('*' * 20, 'train', '*' * 20)
        for epoch in range(max_epochs):
            self.set_train()

            epoch_loss = 0
            epoch_steps = len(self.train_loader)
            for step, (img, cap,
                       cap_len) in tqdm(enumerate(self.train_loader)):
                # batch_size * 3 * 224 * 224
                img = img.to(DEVICE)
                cap = cap.to(DEVICE)

                self.optimizer.zero_grad()

                features = self.encoder.forward(img)
                outputs = self.decoder.forward(features, cap)

                outputs = pack_padded_sequence(outputs,
                                               cap_len - 1,
                                               batch_first=True)[0]
                targets = pack_padded_sequence(cap[:, 1:],
                                               cap_len - 1,
                                               batch_first=True)[0]
                train_loss = self.loss_fn(outputs, targets)
                epoch_loss += train_loss.item()
                train_loss.backward()
                self.optimizer.step()

            epoch_loss /= epoch_steps
            self.writer.add_scalar('epoch_loss', epoch_loss, epoch)
            print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch))
            if (epoch + 1) % val_interval == 0:
                CIDEr = self.validating(epoch, val_path)
                if self.best_CIDEr <= CIDEr:
                    self.best_CIDEr = CIDEr
                    self.save_model(save_path, epoch)

    def save_model(self, save_path, train_epoch):
        """
        保存最好的模型

        :param save_path: 保存模型文件的地址
        :param train_epoch: 当前训练的轮次
        :return:
        """
        model_state_dict = {
            'encoder_state_dict': self.encoder.state_dict(),
            'decoder_state_dict': self.decoder.state_dict(),
            'tran_epoch': train_epoch,
        }
        print('*' * 20, 'save model to: ', save_path, '*' * 20)
        torch.save(model_state_dict, save_path)

    def validating(self, train_epoch, val_path):
        """
        验证

        :param val_path: 保存验证过程生成句子的路径
        :param train_epoch: 当前训练的epoch
        :return:
        """
        print('*' * 20, 'validate', '*' * 20)
        self.set_eval()
        sen_json = []
        with torch.no_grad():
            for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)):
                img = img.to(DEVICE)
                features = self.encoder.forward(img)
                sens, _ = self.decoder.sample(features)
                sen_json.append({'image_id': int(img_id), 'caption': sens[0]})

        with open(val_path, 'w') as f:
            json.dump(sen_json, f)

        result = coco_eval(self.val_cap, val_path)
        scores = {}
        for metric, score in result:
            scores[metric] = score
            self.writer.add_scalar(metric, score, train_epoch)

        return scores['CIDEr']

    def set_train(self):
        self.encoder.train()
        self.decoder.train()

    def set_eval(self):
        self.encoder.eval()
        self.decoder.eval()
Пример #5
0
def main(args):
    # hyperparameters
    batch_size = args.batch_size
    num_workers = 2

    # Image Preprocessing
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    vocab = load_vocab()

    loader = get_basic_loader(dir_path=os.path.join(args.image_path),
                              transform=transform,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers)

    # Build the models
    embed_size = args.embed_size
    num_hiddens = args.num_hidden
    checkpoint_path = 'checkpoints'

    encoder = CNN(embed_size)
    decoder = RNN(embed_size,
                  num_hiddens,
                  len(vocab),
                  1,
                  rec_unit=args.rec_unit)

    encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models(
        args.checkpoint_file)
    encoder.load_state_dict(encoder_state_dict)
    decoder.load_state_dict(decoder_state_dict)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Train the Models
    try:
        results = []
        for step, (images, image_ids) in enumerate(loader):
            images = utils.to_var(images, volatile=True)

            features = encoder(images)
            captions = decoder.sample(features)
            captions = captions.cpu().data.numpy()
            captions = [
                utils.convert_back_to_text(cap, vocab) for cap in captions
            ]
            captions_formatted = [{
                'image_id': int(img_id),
                'caption': cap
            } for img_id, cap in zip(image_ids, captions)]
            results.extend(captions_formatted)
            print('Sample:', captions_formatted)
    except KeyboardInterrupt:
        print('Ok bye!')
    finally:
        import json
        file_name = 'captions_model.json'
        with open(file_name, 'w') as f:
            json.dump(results, f)
Пример #6
0
        track_loss(sched_file, model, dataset, epoch,
                   counter, loss.item(), args.batch_size)

    # save SMILES?
    if args.sample_every_epochs is not None:
        sample_smiles(args.output_dir, args.sample_idx, model,
                      args.sample_size, epoch, counter)

    if early_stop.stop:
        break

# append information about final training step
if args.log_every_epochs is not None or args.log_every_steps is not None:
    sched = pd.DataFrame({'epoch': [None],
                          'step': [early_stop.step_at_best],
                          'outcome': ['training loss'],
                          'value': [early_stop.best_loss]})
    sched.to_csv(sched_file, index=False, mode='a', header=False)

# load the best model
model.load_state_dict(torch.load(model_file))
model.eval() ## enable evaluation modes

# sample a set of SMILES from the final, trained model
sampled_smiles = []
while len(sampled_smiles) < args.sample_size:
    sampled_smiles.extend(model.sample(args.batch_size, return_smiles=True))

# write sampled SMILES
write_smiles(sampled_smiles, smiles_file)