示例#1
0
    def _init_models(self, params):
        # self.global_step = None
        with tf.variable_scope('encodervae'):
            encodervae_inputs = (self.x_enc_inp, self.x_dec_inp, self.x_dec_out, self.global_step)
            params['max_len'] = args.enc_max_len
            params['max_dec_len'] = args.enc_max_len + 1
            self.encoder_model = BaseVAE(params, encodervae_inputs, "encoder")
        with tf.variable_scope('decodervae'):
            decodervae_inputs = (self.y_enc_inp, self.y_dec_inp, self.y_dec_out, self.global_step)
            params['max_len'] = args.dec_max_len
            params['max_dec_len'] = args.dec_max_len + 1
            
            if args.isPointer:
                mask_oovs = self.encoder_model.dec_seq_len_mask
                self.decoder_model = BaseVAE(params, decodervae_inputs, "decoder", 
                            self.encoder_model.encoder_outputs, self.x_enc_inp_oovs, self.max_oovs, mask_oovs)
            elif args.isContext:
                self.decoder_model = BaseVAE(params, decodervae_inputs, "decoder", self.encoder_model.encoder_outputs)
            else:
                self.decoder_model = BaseVAE(params, decodervae_inputs, "decoder")

        with tf.variable_scope('transformer'):
            self.transformer = Transformer(self.encoder_model, self.decoder_model, params['graph_type'], self.global_step)
        with tf.variable_scope('decodervae/decoding', reuse=True):
            self.training_logits = self.decoder_model._decoder_training(self.transformer.predition, reuse=True)
            self.predicted_ids_op, self.attens = self.decoder_model._decoder_inference(self.transformer.predition)
示例#2
0
def get_model(input_size, output_size, config):
    model = Transformer(
            input_size, # Source vocabulary size
            config.hidden_size, # Transformer doesn't need word_vec_size.
            output_size, # Target vocabulary size
            n_splits=config.n_splits, # Number of head in Multi-head Attention.
            n_enc_blocks=config.n_layers,# Number of encoder blocks
            n_dec_blocks=config.n_layers,# Number of decoder blocks
            dropout_p=config.dropout, # Dropout rate on each block
        )
    return model
def get_model(input_size, output_size, train_config):
    model = Transformer(
            input_size,
            train_config.hidden_size,
            output_size,
            n_splits=train_config.n_splits,
            n_enc_blocks=train_config.n_layers,
            n_dec_blocks=train_config.n_layers,
            dropout_p=train_config.dropout,
        )
    model.load_state_dict(saved_data['model'])
    model.eval() 
    return model
示例#4
0
import torch


def data_gen(n_vocab, batch_size, n_batch, device):
    for i in range(n_batch):
        data = torch.randint(2, n_vocab, [batch_size, 10])
        data[:, 0] = 1
        data[:, -2:] = 0
        data = data.to(device)
        yield Batch(data, data)


if __name__ == '__main__':
    n_vocab = 10

    model = Transformer(n_vocab)
    criterion = LabelSmoothing(n_vocab, 0.)
    optimizer = scheduled_adam_optimizer(model)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    #device = 'cpu'
    model.to(device)

    for epoch in range(10):
        print("Epoch: {}".format(epoch))
        data_iter = data_gen(n_vocab, 128, 10000, device)
        run_epoch(data_iter, model, criterion, optimizer)

    in_seq = torch.LongTensor([[1, 7, 5, 2, 3, 4, 5, 0]]).to(device)
    out_seq = torch.zeros([1, 20], dtype=torch.int64).to(device)
    out_seq[:, 0] = 1
    model.eval()
示例#5
0
def run(model_dir, max_len, source_train_path, target_train_path,
        source_val_path, target_val_path, enc_max_vocab, dec_max_vocab,
        encoder_emb_size, decoder_emb_size, encoder_units, decoder_units,
        batch_size, epochs, learning_rate, decay_step, decay_percent,
        log_interval, save_interval, compare_interval):

    train_iter, val_iter, source_vocab, target_vocab = create_dataset(
        batch_size, enc_max_vocab, dec_max_vocab, source_train_path,
        target_train_path, source_val_path, target_val_path)
    transformer = Transformer(max_length=max_len,
                              enc_vocab=source_vocab,
                              dec_vocab=target_vocab,
                              enc_emb_size=encoder_emb_size,
                              dec_emb_size=decoder_emb_size,
                              enc_units=encoder_units,
                              dec_units=decoder_units)
    loss_fn = nn.CrossEntropyLoss()
    opt = optim.Adam(transformer.parameters(), lr=learning_rate)
    lr_decay = StepLR(opt, step_size=decay_step, gamma=decay_percent)

    if torch.cuda.is_available():
        transformer.cuda()
        loss_fn.cuda()

    def training_update_function(batch):
        transformer.train()
        lr_decay.step()
        opt.zero_grad()

        softmaxed_predictions, predictions = transformer(batch.src, batch.trg)

        flattened_predictions = predictions.view(-1, len(target_vocab.itos))
        flattened_target = batch.trg.view(-1)

        loss = loss_fn(flattened_predictions, flattened_target)

        loss.backward()
        opt.step()

        return softmaxed_predictions.data, loss.data[0], batch.trg.data

    def validation_inference_function(batch):
        transformer.eval()
        softmaxed_predictions, predictions = transformer(batch.src, batch.trg)

        flattened_predictions = predictions.view(-1, len(target_vocab.itos))
        flattened_target = batch.trg.view(-1)

        loss = loss_fn(flattened_predictions, flattened_target)

        return loss.data[0]

    trainer = Trainer(train_iter, training_update_function, val_iter,
                      validation_inference_function)
    trainer.add_event_handler(TrainingEvents.TRAINING_STARTED,
                              restore_checkpoint_hook(transformer, model_dir))
    trainer.add_event_handler(TrainingEvents.TRAINING_ITERATION_COMPLETED,
                              log_training_simple_moving_average,
                              window_size=10,
                              metric_name="CrossEntropy",
                              should_log=lambda trainer: trainer.
                              current_iteration % log_interval == 0,
                              history_transform=lambda history: history[1])
    trainer.add_event_handler(TrainingEvents.TRAINING_ITERATION_COMPLETED,
                              save_checkpoint_hook(transformer, model_dir),
                              should_save=lambda trainer: trainer.
                              current_iteration % save_interval == 0)
    trainer.add_event_handler(TrainingEvents.TRAINING_ITERATION_COMPLETED,
                              print_current_prediction_hook(target_vocab),
                              should_print=lambda trainer: trainer.
                              current_iteration % compare_interval == 0)
    trainer.add_event_handler(TrainingEvents.VALIDATION_COMPLETED,
                              log_validation_simple_moving_average,
                              window_size=10,
                              metric_name="CrossEntropy")
    trainer.add_event_handler(TrainingEvents.TRAINING_COMPLETED,
                              save_checkpoint_hook(transformer, model_dir),
                              should_save=lambda trainer: True)
    trainer.run(max_epochs=epochs, validate_every_epoch=True)
示例#6
0
 def test_optimizer(self):
     model = Transformer(6)
     optimizer = scheduled_adam_optimizer(model)
def run(model_dir, max_len, source_train_path, target_train_path,
        source_val_path, target_val_path, enc_max_vocab, dec_max_vocab,
        encoder_emb_size, decoder_emb_size, encoder_units, decoder_units,
        batch_size, epochs, learning_rate, decay_step, decay_percent,
        val_interval, save_interval, compare_interval):

    logging.basicConfig(filename="validation.log",
                        filemode="w",
                        level=logging.INFO)

    train_iter, val_iter, source_vocab, target_vocab = create_dataset(
        batch_size, enc_max_vocab, dec_max_vocab, source_train_path,
        target_train_path, source_val_path, target_val_path)
    transformer = Transformer(max_length=max_len,
                              enc_vocab=source_vocab,
                              dec_vocab=target_vocab,
                              enc_emb_size=encoder_emb_size,
                              dec_emb_size=decoder_emb_size,
                              enc_units=encoder_units,
                              dec_units=decoder_units)
    loss_fn = nn.CrossEntropyLoss()
    opt = optim.Adam(transformer.parameters(), lr=learning_rate)
    lr_decay = StepLR(opt, step_size=decay_step, gamma=decay_percent)

    if torch.cuda.is_available():
        transformer.cuda()
        loss_fn.cuda()

    def training_step(engine, batch):
        transformer.train()
        lr_decay.step()
        opt.zero_grad()

        _, predictions = transformer(batch.src, batch.trg)

        flattened_predictions = predictions.view(-1, len(target_vocab.itos))
        flattened_target = batch.trg.view(-1)

        loss = loss_fn(flattened_predictions, flattened_target)

        loss.backward()
        opt.step()

        return loss.cpu().item()

    def validation_step(engine, batch):
        transformer.eval()
        with torch.no_grad():
            softmaxed_predictions, predictions = transformer(
                batch.src, batch.trg)

            flattened_predictions = predictions.view(-1,
                                                     len(target_vocab.itos))
            flattened_target = batch.trg.view(-1)

            loss = loss_fn(flattened_predictions, flattened_target)

            if not engine.state.output:
                predictions = softmaxed_predictions.argmax(
                    -1).cpu().numpy().tolist()
                targets = batch.trg.cpu().numpy().tolist()
            else:
                predictions = engine.state.output[
                    "predictions"] + softmaxed_predictions.argmax(
                        -1).cpu().numpy().tolist()
                targets = engine.state.output["targets"] + batch.trg.cpu(
                ).numpy().tolist()

            return {
                "loss": loss.cpu().item(),
                "predictions": predictions,
                "targets": targets
            }

    trainer = Engine(training_step)
    evaluator = Engine(validation_step)
    checkpoint_handler = ModelCheckpoint(model_dir,
                                         "Transformer",
                                         save_interval=save_interval,
                                         n_saved=10,
                                         require_empty=False)

    timer = Timer(average=True)
    timer.attach(trainer,
                 start=Events.EPOCH_STARTED,
                 resume=Events.ITERATION_STARTED,
                 pause=Events.ITERATION_COMPLETED,
                 step=Events.ITERATION_COMPLETED)

    # Attach training metrics
    RunningAverage(output_transform=lambda x: x).attach(trainer, "train_loss")
    # Attach validation metrics
    RunningAverage(output_transform=lambda x: x["loss"]).attach(
        evaluator, "val_loss")

    pbar = ProgressBar()
    pbar.attach(trainer, ["train_loss"])

    # trainer.add_event_handler(Events.TRAINING_STARTED,
    #                           restore_checkpoint_hook(transformer, model_dir))
    trainer.add_event_handler(Events.ITERATION_COMPLETED,
                              handler=validation_result_hook(
                                  evaluator,
                                  val_iter,
                                  target_vocab,
                                  val_interval,
                                  logger=logging.info))

    trainer.add_event_handler(event_name=Events.ITERATION_COMPLETED,
                              handler=checkpoint_handler,
                              to_save={
                                  "nmt": {
                                      "transformer": transformer,
                                      "opt": opt,
                                      "lr_decay": lr_decay
                                  }
                              })

    # Run the prediction
    trainer.run(train_iter, max_epochs=epochs)
示例#8
0
from modules.transformer import Transformer, create_masks
import tensorflow as tf

if __name__ == '__main__':
    sample_transformer = Transformer(num_layers=2,
                                     d_model=512,
                                     num_heads=8,
                                     dff=2048,
                                     input_size=50,
                                     output_size=512,
                                     pe_input=10000,
                                     pe_target=6000)

    temp_input = tf.random.uniform((3, 62), maxval=20, dtype=tf.int32)  # token
    temp_target = tf.random.uniform((3, 90, 512))
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        temp_input, temp_target)

    prenet_output, stops, post_output, attention_weights = sample_transformer(
        temp_input,
        temp_target,
        training=True,
        enc_padding_mask=enc_padding_mask,
        look_ahead_mask=combined_mask,
        dec_padding_mask=dec_padding_mask)

    print(post_output.shape)  # (batch_size, tar_seq_len, target_vocab_size)