def test_set_parameters(self):
        learning_rate = 1
        optim = Optimizer(torch.optim.SGD, lr=learning_rate)
        params = [torch.nn.Parameter(torch.randn(2, 3, 4))]
        optim.set_parameters(params)

        self.assertTrue(type(optim.optimizer) is torch.optim.SGD)
        self.assertEquals(optim.optimizer.param_groups[0]['lr'], learning_rate)
Exemplo n.º 2
0
 def test_update(self):
     params = [torch.nn.Parameter(torch.randn(2, 3, 4))]
     optimizer = Optimizer(torch.optim.Adam(params, lr=1), max_grad_norm=5)
     scheduler = StepLR(optimizer.optimizer, 1, gamma=0.1)
     optimizer.set_scheduler(scheduler)
     optimizer.step()
     optimizer.update(10, 1)
     self.assertEqual(0.1, optimizer.optimizer.param_groups[0]['lr'])
    def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
Exemplo n.º 4
0
    def train(self, model, data, num_epochs=5,
              resume=False, dev_data=None, 
              monitor_data={}, optimizer=None,
              teacher_forcing_ratio=0,
              learning_rate=0.001, checkpoint_path=None, top_k=5):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
            learing_rate (float, optional): learning rate used by the optimizer (default 0.001)
            checkpoint_path (str, optional): path to load checkpoint from in case training should be resumed
            top_k (int): how many models should be stored during training
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            resume_checkpoint = Checkpoint.load(checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0

            def get_optim(optim_name):
                optims = {'adam': optim.Adam, 'adagrad': optim.Adagrad,
                          'adadelta': optim.Adadelta, 'adamax': optim.Adamax,
                          'rmsprop': optim.RMSprop, 'sgd': optim.SGD,
                           None:optim.Adam}
                return optims[optim_name]

            self.optimizer = Optimizer(get_optim(optimizer)(model.parameters(), lr=learning_rate),
                                       max_grad_norm=5)

        self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler))

        logs = self._train_epoches(data, model, num_epochs,
                            start_epoch, step, dev_data=dev_data,
                            monitor_data=monitor_data,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            top_k=top_k)
        return model, logs
Exemplo n.º 5
0
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100,
                 optimizer=Optimizer(optim.Adam, max_grad_norm=5)):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = optimizer
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size
        self.input_vocab_file = os.path.join(self.expt_dir, 'input_vocab')
        self.output_vocab_file = os.path.join(self.expt_dir, 'output_vocab')

        self.logger = logging.getLogger(__name__)
Exemplo n.º 6
0
    def test_init(self):
        params = [torch.nn.Parameter(torch.randn(2,3,4))]
        try:
            optimizer = Optimizer(torch.optim.Adam(params))
        except:
            self.fail("__init__ failed.")

        self.assertEquals(optimizer.max_grad_norm, 0)
Exemplo n.º 7
0
def initialize_model(
    train,
    input_vocab,
    output_vocab,
    max_len=10,
    hidden_size=256,
    dropout_p=0.5,
    bidirectional=True,
    n_beam=5,
):
    # Initialize model
    encoder = EncoderRNN(
        len(input_vocab),
        max_len,
        hidden_size,
        bidirectional=bidirectional,
        variable_lengths=True,
    )

    decoder = DecoderRNN(
        len(output_vocab),
        max_len,
        hidden_size * (2 if bidirectional else 1),
        dropout_p=dropout_p,
        use_attention=True,
        bidirectional=bidirectional,
        eos_id=train.tgt_field.eos_id,
        sos_id=train.tgt_field.sos_id,
    )
    #     decoder = TopKDecoder(decoder ,n_beam)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq = seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Optimizer and learning rate scheduler can be customized by
    # explicitly constructing the objects and pass to the trainer
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

    return seq2seq, optimizer, scheduler
    def test_init(self):
        try:
            optimizer = Optimizer(torch.optim.SGD)
        except:
            self.fail("__init__ failed.")

        self.assertEquals(optimizer.max_grad_norm, 0)
        self.assertEquals(optimizer.lr_decay, 1)
        self.assertEquals(optimizer.decay_after_epoch, 0)
Exemplo n.º 9
0
def pretrain_generator(model, train, dev):
    # pre-train generator
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    optimizer = Optimizer(torch.optim.Adam(gen.parameters()), max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

    supervised = SupervisedTrainer(loss=loss,
                                   batch_size=32,
                                   random_seed=random_seed,
                                   expt_dir=expt_gen_dir)
    supervised.train(model,
                     train,
                     num_epochs=20,
                     dev_data=dev,
                     optimizer=optimizer,
                     teacher_forcing_ratio=0,
                     resume=resume)
 def test_update(self):
     optim = Optimizer(torch.optim.SGD,
                       lr=1,
                       decay_after_epoch=5,
                       lr_decay=0.5)
     params = [torch.nn.Parameter(torch.randn(2, 3, 4))]
     optim.set_parameters(params)
     optim.update(0, 10)
     self.assertEquals(optim.optimizer.param_groups[0]['lr'], 0.5)
Exemplo n.º 11
0
    def train(self, model, data, n_epochs=5, resume=False,
            dev_data=None, optimizer=None, teacher_forcing_ratio=0):
        """Train a given model.

        Args:
            model (seq2seq.models): model to run training on. If resume=True,
                it will be overwritten by the model loaded from the latest
                checkpoint
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            n_epochs (int): number of epochs to run
            resume(bool): resume training with the latest checkpoint
            dev_data (seq2seq.dataset.dataset.Dataset): dev Dataset
            optimizer (seq2seq.optim.Optimizer): optimizer for training
            teacher_forcing_ratio (float): teaching forcing ratio
        Returns:
            model (seq2seq.models): trained model.
        """
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.experiment_directory)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A work-around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(
                    optim.Adam(model.parameters()), max_grad_norm=5)
            self.optimizer = optimizer

        logger.info('Optimizer: %s, Scheduler: %s',
                    self.optimizer.optimizer, self.optimizer.scheduler)

        self._train_epochs(data, model, n_epochs, 
                            start_epoch, step, dev_data=dev_data, 
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
Exemplo n.º 12
0
    def train(self,
              encoder,
              decoder,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              is_training=0):
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            decoder = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                decoder.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(decoder.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            encoder,
                            decoder,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            is_training=is_training)
        return decoder
Exemplo n.º 13
0
    def train_model(m, poly, pretraining):
        m.train()

        optimizer = Optimizer(torch.optim.Adam(
            m.parameters(), amsgrad=True), max_grad_norm=5)

        t = MirrorTrainer(loss=loss, batch_size=args.batch_size,
                          checkpoint_every=100,
                          expt_dir="./experiments", pretraining=pretraining,
                          polyglot=poly, explosion_train=args.explosion_train, explosion_eval=args.explosion_eval)
        m = t.train(m, train_dataset,
                    n_epochs=args.n_epochs,
                    dev_data=(None if args.no_dev_eval == 1 else dev_dataset),
                    test_data=(None if args.no_test_eval ==
                               1 else test_dataset),
                    optimizer=optimizer,
                    teacher_forcing_ratio=args.teacher_forcing_ratio,
                    resume=False)
        return m
Exemplo n.º 14
0
    def train(self,
              model,
              data,
              start_step=0,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """

        if optimizer is None:
            optimizer = Optimizer(optim.Adam(model.parameters()),
                                  max_grad_norm=5)
        self.optimizer = optimizer

        if not self.multi_gpu or hvd.rank() == 0:
            self.logger.info(
                "Optimizer: %s, Scheduler: %s" %
                (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            start_step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
Exemplo n.º 15
0
def run_training(opt, default_data_dir, num_epochs=100):
    if opt.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)))
        checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        seq2seq = checkpoint.model
        input_vocab = checkpoint.input_vocab
        output_vocab = checkpoint.output_vocab
    else:

        # Prepare dataset
        src = SourceField()
        tgt = TargetField()
        max_len = 50

        data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt')

        logging.info("Starting new Training session on %s", data_file)

        def len_filter(example):
            return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \
                   and (len(example.src) > 0) and (len(example.tgt) > 0)

        train = torchtext.data.TabularDataset(
            path=data_file, format='json',
            fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
            filter_pred=len_filter
        )

        dev = None
        if opt.no_dev is False:
            dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt')
            dev = torchtext.data.TabularDataset(
                path=dev_data_file, format='json',
                fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
                filter_pred=len_filter
            )

        src.build_vocab(train, max_size=50000)
        tgt.build_vocab(train, max_size=50000)
        input_vocab = src.vocab
        output_vocab = tgt.vocab

        # NOTE: If the source field name and the target field name
        # are different from 'src' and 'tgt' respectively, they have
        # to be set explicitly before any training or inference
        # seq2seq.src_field_name = 'src'
        # seq2seq.tgt_field_name = 'tgt'

        # Prepare loss
        weight = torch.ones(len(tgt.vocab))
        pad = tgt.vocab.stoi[tgt.pad_token]
        loss = Perplexity(weight, pad)
        if torch.cuda.is_available():
            logging.info("Yayyy We got CUDA!!!")
            loss.cuda()
        else:
            logging.info("No cuda available device found running on cpu")

        seq2seq = None
        optimizer = None
        if not opt.resume:
            hidden_size = 128
            decoder_hidden_size = hidden_size * 2
            logging.info("EncoderRNN Hidden Size: %s", hidden_size)
            logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size)
            bidirectional = True
            encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 variable_lengths=True)
            decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size,
                                 dropout_p=0, use_attention=True,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 eos_id=tgt.eos_id, sos_id=tgt.sos_id)

            seq2seq = Seq2seq(encoder, decoder)
            if torch.cuda.is_available():
                seq2seq.cuda()

            for param in seq2seq.parameters():
                param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.

        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

        # train

        num_epochs = num_epochs
        batch_size = 32
        checkpoint_every = num_epochs / 10
        print_every = num_epochs / 100

        properties = dict(batch_size=batch_size,
                          checkpoint_every=checkpoint_every,
                          print_every=print_every, expt_dir=opt.expt_dir,
                          num_epochs=num_epochs,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2))
        t = SupervisedTrainer(loss=loss, batch_size=num_epochs,
                              checkpoint_every=checkpoint_every,
                              print_every=print_every, expt_dir=opt.expt_dir)

        seq2seq = t.train(seq2seq, train,
                          num_epochs=num_epochs, dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        evaluator = Evaluator(loss=loss, batch_size=batch_size)

        if opt.no_dev is False:
            dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
            logging.info("Dev Loss: %s", dev_loss)
            logging.info("Accuracy: %s", dev_loss)

    beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4))

    predictor = Predictor(beam_search, input_vocab, output_vocab)
    while True:
        try:
            seq_str = raw_input("Type in a source sequence:")
            seq = seq_str.strip().split()
            results = predictor.predict_n(seq, n=3)
            for i, res in enumerate(results):
                print('option %s: %s\n', i + 1, res)
        except KeyboardInterrupt:
            logging.info("Bye Bye")
            exit(0)
Exemplo n.º 16
0
 def test_step(self, mock_clip_grad_norm):
     params = [torch.nn.Parameter(torch.randn(2,3,4))]
     optim = Optimizer(torch.optim.Adam(params),
                       max_grad_norm=5)
     optim.step()
     mock_clip_grad_norm.assert_called_once()
Exemplo n.º 17
0
                            if multi_gpu else None
        dev = DataLoader(dev_set,
                         batch_size=opt.batch_size,
                         shuffle=False,
                         sampler=dev_sampler,
                         collate_fn=trans_data.collate_fn)

        # Prepare optimizer
        # optimizer = Optimizer(optim.Adam(seq2seq.parameters(), lr=opt.learning_rate), max_grad_norm=opt.clip_grad)
        optimizer = optim.Adam(seq2seq.parameters(), lr=opt.learning_rate)
        if multi_gpu:
            optimizer = hvd.DistributedOptimizer(
                optimizer, named_parameters=seq2seq.named_parameters())
            hvd.broadcast_optimizer_state(optimizer, root_rank=0)
            hvd.broadcast_parameters(seq2seq.state_dict(), root_rank=0)
        optimizer = Optimizer(optimizer, max_grad_norm=opt.clip_grad)
        if opt.decay_factor:
            optimizer.set_scheduler(
                torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer.optimizer,
                    'min',
                    factor=opt.decay_factor,
                    patience=1))

        # Prepare trainer and train
        t = SupervisedTrainer(loss=loss,
                              model_dir=opt.model_dir,
                              best_model_dir=opt.best_model_dir,
                              batch_size=opt.batch_size,
                              checkpoint_every=opt.checkpoint_every,
                              print_every=opt.print_every,
Exemplo n.º 18
0
            # Optimizer and learning rate scheduler can be customized by
            # explicitly constructing the objects and pass to the trainer.
            #
            # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
            # scheduler = StepLR(optimizer.optimizer, 1)
            # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=1000,
                          print_every=10,
                          expt_dir=opt.expt_dir)

    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)
    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=20,
                      dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)

predictor = Predictor(seq2seq, input_vocab, output_vocab)

while True:
    seq_str = raw_input("Type in a source sequence:")
    seq = seq_str.strip().split()
    print(predictor.predict(seq))
Exemplo n.º 19
0
        print(f"\nLoad from {opt.load_checkpoint}\n")
    else:
        for param in seq2seq.parameters():
            param.data.uniform_(-opt.init_weight, opt.init_weight)

    if opt.beam_width > 1 and opt.phase == "infer":
        print(f"Beam Width {opt.beam_width}")
        seq2seq.decoder = TopKDecoder(seq2seq.decoder, opt.beam_width)

    if opt.phase == "train":
        # train
        # optimizer = Optimizer(optim.Adam(seq2seq.parameters(), lr=opt.learning_rate), max_grad_norm=opt.clip_grad)
        optimizer = optim.Adam(seq2seq.parameters(), lr=opt.learning_rate)
        optimizer = hvd.DistributedOptimizer(
            optimizer, named_parameters=seq2seq.named_parameters())
        optimizer = Optimizer(optimizer, max_grad_norm=opt.clip_grad)

        hvd.broadcast_optimizer_state(optimizer.optimizer, root_rank=0)
        hvd.broadcast_parameters(seq2seq.state_dict(), root_rank=0)
        t = SupervisedTrainer(loss=loss,
                              model_dir=opt.model_dir,
                              best_model_dir=opt.best_model_dir,
                              batch_size=opt.batch_size,
                              checkpoint_every=opt.checkpoint_every,
                              print_every=opt.print_every,
                              max_epochs=opt.max_epochs,
                              max_steps=opt.max_steps,
                              max_checkpoints_num=opt.max_checkpoints_num,
                              best_ppl=opt.best_ppl,
                              device=device)
        if torch.cuda.is_available():
            seq2tree.cuda()

        for param in seq2tree.parameters():
            param.data.uniform_(-0.08, 0.08)
            # encoder.embedding.weight.data.set_(input_vocab.vectors)
            # encoder.embedding.weight.data.set_(output_vocab.vectors)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    optimizer = Optimizer(optim.Adam(seq2tree.parameters(), lr=1e-4), max_grad_norm=5)
    # train
    t = SupervisedTrainer(loss=loss, batch_size=1,
                          checkpoint_every=50,
                          print_every=10, expt_dir=opt.expt_dir)

    seq2tree = t.train(seq2tree, train,
                      num_epochs=20, dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)

predictor = Predictor(seq2tree, input_vocab, input_vocab)

while True:
    seq_str = raw_input("Type in a source sequence:")
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            # param.data.uniform_(-0.08, 0.08)
            param.data.normal_(0.0, 0.1)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        optimizer = Optimizer(torch.optim.SGD(seq2seq.parameters(),
                                              lr=0.05,
                                              momentum=0.9),
                              max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

    # train
    t = MultiLabelTrainer(loss=loss,
                          batch_size=64,
                          checkpoint_every=10000,
                          print_every=100,
                          ckpt_dir=opt.ckpt_dir)

    print('Start training')
    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=opt.epochs,
Exemplo n.º 22
0
                     rnn_cell='lstm',
                     dropout_p=0.25,
                     use_attention=True,
                     bidirectional=bidirectional,
                     n_layers=2,
                     eos_id=tgt.eos_id,
                     sos_id=tgt.sos_id)

seq2seq_model = Seq2seq(encoder, decoder)
if torch.cuda.is_available():
    seq2seq_model.cuda()

for param in seq2seq_model.parameters():
    param.data.uniform_(-0.1, 0.1)

optimizer = Optimizer(torch.optim.Adam(seq2seq_model.parameters()),
                      max_grad_norm=5)

# In[20]:

seq2seq_model = torch.nn.DataParallel(seq2seq_model)

# In[21]:

# train

t = SupervisedTrainer(loss=loss,
                      batch_size=8,
                      checkpoint_every=200,
                      print_every=10000,
                      expt_dir='./lstm_model/' + data_tuple[0] + '/Deepregex')
Exemplo n.º 23
0
    def train_iters(self,
                    pairs,
                    n_iters,
                    batch_size=64,
                    print_every=1000,
                    learning_rate=0.0002,
                    teacher_forcing_ratio=0.5):
        """Train for some number of iterations choosing randomly from the list of tensor pairs."""
        print("Initializing training.")
        if self.optimizer == None:
            adam = optim.Adam(self.decoder.parameters(), lr=learning_rate)
            self.optimizer = Optimizer(adam, max_grad_norm=5)
        else:
            print("Using existing optimizer.")
        random.shuffle(pairs)
        if (len(pairs) < batch_size):
            print("Not enough examples for one batch.")
            return

        # Turn the pairs into big tensors.
        # TODO: instead of saving pairs, save tensors directly. Otherwise this operation takes too much space.
        # Input: num_layers x num_examples x embedding_size
        # Target: num_examples x max_output_length+1
        input_tensors = [torch.reshape(i, (1, 1, -1)) for i, j in pairs]
        input_tensor = torch.cat(input_tensors, 1)
        input_tensor = self._create_init_hidden(input_tensor)
        target_tensors = [j for i, j in pairs]
        targets = []
        for target in target_tensors:
            target_tensor = torch.reshape(target, (1, -1))
            if target_tensor.size(1) >= self.max_output_length:
                target_tensor = target_tensor[0][0:self.max_output_length]
                target_tensor = torch.reshape(target_tensor, (1, -1))
            else:
                pad = torch.zeros(
                    1, self.max_output_length - target_tensor.size(1)).long()
                for i in range(self.max_output_length - target_tensor.size(1)):
                    pad[0][i] = self.mask_token
                target_tensor = torch.cat((target_tensor, pad), 1)
            # Add the start token.
            start_tensor = torch.zeros(1, 1).long()
            start_tensor[0][0] = self.SOS_token
            target_tensor = torch.cat((start_tensor, target_tensor), 1)
            targets.append(target_tensor)
        target_tensor = torch.cat(targets, 0)

        if torch.cuda.is_available(): target_tensor = target_tensor.cuda()
        if torch.cuda.is_available(): input_tensor = input_tensor.cuda()

        print("Starting training.")
        print_loss_total = 0  # Reset every print_every.
        batch = 0
        for iter in range(n_iters):
            # Create the batch.
            if (batch + 1) * batch_size > len(pairs):
                print("Finished an epoch!")
                batch = 0
            batch_input = input_tensor[:, batch * batch_size:(batch + 1) *
                                       batch_size, :].contiguous()
            batch_target = target_tensor[batch * batch_size:(batch + 1) *
                                         batch_size, :].contiguous()

            if self.rnn_cell == 'lstm':
                batch_input = (batch_input, batch_input)

            loss = self.train(batch_input,
                              batch_target,
                              teacher_forcing_ratio=teacher_forcing_ratio)
            print_loss_total += loss

            if iter % print_every == print_every - 1:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('Steps: {0}\nAverage loss: {1}'.format(
                    iter, print_loss_avg))
            batch += 1
def train(opt):
    LOG_FORMAT = '%(asctime)s %(levelname)-8s %(message)s'
    logging.basicConfig(format=LOG_FORMAT,
                        level=getattr(logging, opt.log_level.upper()))
    logging.info(opt)
    if int(opt.GPU) >= 0:
        torch.cuda.set_device(int(opt.GPU))
    if opt.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME,
                         opt.load_checkpoint)))
        checkpoint_path = os.path.join(opt.expt_dir,
                                       Checkpoint.CHECKPOINT_DIR_NAME,
                                       opt.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        seq2tree = checkpoint.model
        input_vocab = checkpoint.input_vocab

    else:
        # Prepare dataset
        src = SourceField()
        nt = NTField()
        pos = PosField()
        tgt_tree = TreeField()
        comp = CompField()
        max_len = opt.max_len

        def len_filter(example):
            return len(example.src) <= max_len

        train = torchtext.data.TabularDataset(path=opt.train_path,
                                              format='tsv',
                                              fields=[('src', src), ('nt', nt),
                                                      ('pos', pos),
                                                      ('tree', tgt_tree)],
                                              filter_pred=len_filter)
        dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                            format='tsv',
                                            fields=[('src', src), ('nt', nt),
                                                    ('pos', pos),
                                                    ('tree', tgt_tree)],
                                            filter_pred=len_filter)
        src.build_vocab(train, max_size=50000)
        comp.build_vocab(train, max_size=50000)
        nt.build_vocab(train, max_size=50000)
        pos.build_vocab(train, max_size=50000)
        # src_tree.build_vocab(train, max_size=50000)
        pos_in_nt = set()
        for Pos in pos.vocab.stoi:
            if nt.vocab.stoi[Pos] > 1:
                pos_in_nt.add(nt.vocab.stoi[Pos])
        hidden_size = opt.hidden_size
        input_vocab = src.vocab
        nt_vocab = nt.vocab

        def tree_to_id(tree):
            tree.set_label(nt_vocab.stoi[tree.label()])
            if len(tree) == 1 and str(tree[0])[0] is not '(':
                tree[0] = input_vocab.stoi[tree[0]]
                return
            else:
                for subtree in tree:
                    tree_to_id(subtree)
                tree.append(Tree(nt_vocab.stoi['<eos>'], []))
                return tree

        # train.examples = [str(tree_to_id(ex.tree)) for ex in train.examples]
        # dev.examples = [str(tree_to_id(ex.tree)) for ex in dev.examples]
        for ex in train.examples:
            ex.tree = str(tree_to_id(Tree.fromstring(ex.tree)))
        for ex in dev.examples:
            ex.tree = str(tree_to_id(Tree.fromstring(ex.tree)))
        # train.examples = [tree_to_id(Tree.fromstring(ex.tree)) for ex in train.examples]
        # dev.examples = [str(tree_to_id(Tree.fromstring(ex.tree))) for ex in dev.examples]
        if opt.word_embedding is not None:
            input_vocab.load_vectors([opt.word_embedding])

        loss = NLLLoss()
        if torch.cuda.is_available():
            loss.cuda()
        loss.reset()
        seq2tree = None
        optimizer = None
        if not opt.resume:
            # Initialize model
            bidirectional = opt.bidirectional_encoder
            encoder = EncoderRNN(len(src.vocab),
                                 opt.word_embedding_size,
                                 max_len,
                                 hidden_size,
                                 bidirectional=bidirectional,
                                 variable_lengths=True)
            decoder = DecoderTree(len(src.vocab),
                                  opt.word_embedding_size,
                                  opt.nt_embedding_size,
                                  len(nt.vocab),
                                  max_len,
                                  hidden_size *
                                  2 if bidirectional else hidden_size,
                                  sos_id=nt_vocab.stoi['<sos>'],
                                  eos_id=nt_vocab.stoi['<eos>'],
                                  dropout_p=0.2,
                                  use_attention=True,
                                  bidirectional=bidirectional,
                                  pos_in_nt=pos_in_nt)

            seq2tree = Seq2tree(encoder, decoder)
            if torch.cuda.is_available():
                seq2tree.cuda()

            for param in seq2tree.parameters():
                param.data.uniform_(-0.08, 0.08)
                # encoder.embedding.weight.data.set_(input_vocab.vectors)
                # encoder.embedding.weight.data.set_(output_vocab.vectors)

            # Optimizer and learning rate scheduler can be customized by
            # explicitly constructing the objects and pass to the trainer.
            #
            # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
            # scheduler = StepLR(optimizer.optimizer, 1)
            # optimizer.set_scheduler(scheduler)

            optimizer = Optimizer(optim.Adam(seq2tree.parameters(), lr=opt.lr),
                                  max_grad_norm=5)
        # train
        t = SupervisedTrainer(loss=loss,
                              batch_size=opt.batch_size,
                              checkpoint_every=opt.checkpoint_every,
                              print_every=10,
                              expt_dir=opt.expt_dir,
                              lr=opt.lr)

        seq2tree = t.train(seq2tree,
                           train,
                           num_epochs=opt.epoch,
                           dev_data=dev,
                           optimizer=optimizer,
                           teacher_forcing_ratio=0,
                           resume=opt.resume)

    predictor = Predictor(seq2tree, input_vocab, nt_vocab)
    return predictor, dev, train
Exemplo n.º 25
0
        encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                             n_layers=2, bidirectional=bidirectional, variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size,
                             n_layers=2, dropout_p=0.5, use_attention=True, bidirectional=bidirectional,
                             eos_id=tgt.eos_id, sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), amsgrad=True, weight_decay=0.0005), max_grad_norm=10)
        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=10)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss, batch_size=128,
                          checkpoint_every=200,
                          print_every=200, expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq, train,
                      num_epochs=5, dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)

predictor = Predictor(seq2seq, input_vocab, output_vocab)
Exemplo n.º 26
0
def main(option):
    random.seed(option.random_seed)
    torch.manual_seed(option.random_seed)

    LOG_FORMAT = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    logging.basicConfig(format=LOG_FORMAT, level='INFO', stream=sys.stdout)

    glove = Glove(option.emb_file)
    logging.info('loaded embeddings from ' + option.emb_file)

    src_vocab = Vocab.build_from_glove(glove)
    tgt_vocab = Vocab.load(option.intent_vocab)

    train_dataset = load_intent_prediction_dataset(option.train_dataset,
                                                   src_vocab,
                                                   tgt_vocab,
                                                   device=option.device)
    dev_dataset = load_intent_prediction_dataset(option.dev_dataset,
                                                 src_vocab,
                                                 tgt_vocab,
                                                 device=option.device)

    train_data_loader = DataLoader(train_dataset,
                                   batch_size=option.batch_size,
                                   shuffle=True)
    dev_data_loader = DataLoader(dev_dataset,
                                 batch_size=len(dev_dataset),
                                 shuffle=False)

    src_vocab_size = len(src_vocab)
    tgt_vocab_size = len(tgt_vocab)

    # Prepare loss
    weight = torch.ones(tgt_vocab_size)
    pad = tgt_vocab.stoi[tgt_vocab.pad_token]
    loss = Perplexity(weight, pad)
    loss.criterion.to(option.device)

    # Initialize model
    encoder = NeuralTensorNetwork(nn.Embedding(src_vocab_size, option.emb_dim),
                                  option.em_k)
    decoder = DecoderRNN(tgt_vocab_size,
                         option.im_max_len,
                         option.im_hidden_size,
                         use_attention=False,
                         bidirectional=False,
                         eos_id=tgt_vocab.stoi[tgt_vocab.eos_token],
                         sos_id=tgt_vocab.stoi[tgt_vocab.bos_token])
    encoder.to(option.device)
    decoder.to(option.device)

    init_model(encoder)
    init_model(decoder)

    encoder.embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float())

    optimizer_params = [{
        'params': encoder.parameters()
    }, {
        'params': decoder.parameters()
    }]
    optimizer = Optimizer(optim.Adam(optimizer_params, lr=option.lr),
                          max_grad_norm=5)
    trainer = NTNTrainer(loss,
                         print_every=option.report_every,
                         device=option.device)
    encoder, decoder = trainer.train(
        encoder,
        decoder,
        optimizer,
        train_data_loader,
        num_epochs=option.epochs,
        dev_data_loader=dev_data_loader,
        teacher_forcing_ratio=option.im_teacher_forcing_ratio)

    predictor = NTNPredictor(encoder, decoder, src_vocab, tgt_vocab,
                             option.device)
    samples = [
        ("PersonX", "eventually told", "___"),
        ("PersonX", "tells", "PersonY 's tale"),
        ("PersonX", "always played", " ___"),
        ("PersonX", "would teach", "PersonY"),
        ("PersonX", "gets", "a ride"),
    ]
    for sample in samples:
        subj, verb, obj = sample
        subj = subj.lower().split(' ')
        verb = verb.lower().split(' ')
        obj = obj.lower().split(' ')
        print(sample, predictor.predict(subj, verb, obj))
SpeakerDataset.concat(args.num_sentence, (train, dev, test))

################  define model ##################

model, input_vocab, output_vocab = init_model()

# Define loss
weight = torch.ones(len(output_vocab))
pad = output_vocab.stoi[tgt.pad_token]
loss = Perplexity(weight, pad)
if torch.cuda.is_available():
    loss.cuda()

# Define Optimizer
optimizer = Optimizer(optim.Adam(model.parameters()),
                      max_grad_norm=args.max_grad_norm)

###############  train model ################

t = SpkTrainer(args=args,
               loss=loss,
               batch_size=args.batch_size,
               checkpoint_every=args.ckpt_every,
               random_seed=args.seed,
               print_every=args.verbose,
               expt_dir=args.expt_dir)

discrim = t.train(model=model,
                  data=train,
                  num_epochs=args.epochs,
                  dev_data=dev,
Exemplo n.º 28
0
            param.data.uniform_(-0.08, 0.08)
            print(param.data[0:3])
        _, _, norm_val = encoder.vectors_stats()
        encoder.init_vectors(src.vocab.vectors)
        # encoder.scale_vectors(0.08)
        encoder.normalize_vectors(norm_val)
        encoder.vectors_stats()
        for param in seq2seq.parameters():
            print(param.data[0:3])

        if torch.cuda.is_available():
            seq2seq.cuda()

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr=0.001),
                              max_grad_norm=5)
        # optimizer = Optimizer(torch.optim.SGD(seq2seq.parameters(), lr=0.01, momentum=0.9), max_grad_norm=5)
        # scheduler = torch.optim.lr_scheduler.StepLR(optimizer.optimizer, step_size=10, gamma=0.5)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer.optimizer,
            mode='min',
            factor=0.5,
            patience=5,
            verbose=True,
            threshold=0.0001,
            threshold_mode='rel',
            cooldown=0,
            min_lr=0,
            eps=1e-08)
        optimizer.set_scheduler(scheduler)
Exemplo n.º 29
0
def main():
    ''' Main function '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-data', required=True)

    parser.add_argument('-epoch', type=int, default=3)
    parser.add_argument('-batch_size', type=int, default=64)

    parser.add_argument('-d_model', type=int, default=1024)
    parser.add_argument('-n_layer', type=int, default=1)

    parser.add_argument('-dropout', type=float, default=0)

    parser.add_argument('-log', default=None)
    parser.add_argument('-save_model', default=None)
    parser.add_argument('-save_mode',
                        type=str,
                        choices=['all', 'best'],
                        default='best')

    parser.add_argument('-seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-teacher_forcing_ratio', type=float, default=0.5)

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda
    opt.d_word_vec = opt.d_model
    opt.log = opt.save_model

    random.seed(opt.seed)
    np.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    if opt.cuda:
        torch.cuda.manual_seed_all(opt.seed)

    #========= Loading Dataset =========#
    data = torch.load(opt.data)
    opt.max_token_seq_len = data['settings'].max_token_seq_len

    training_data, validation_data = prepare_dataloaders(data, opt)

    opt.src_vocab_size = training_data.dataset.src_vocab_size
    opt.tgt_vocab_size = training_data.dataset.tgt_vocab_size

    #========= Preparing Model =========#
    print(opt)
    device = torch.device('cuda' if opt.cuda else 'cpu')

    # model
    opt.bidirectional = True
    encoder = EncoderRNN(opt.src_vocab_size,
                         opt.max_token_seq_len,
                         opt.d_model,
                         bidirectional=opt.bidirectional,
                         variable_lengths=True)
    decoder = DecoderRNN(opt.tgt_vocab_size,
                         opt.max_token_seq_len,
                         opt.d_model * 2 if opt.bidirectional else opt.d_model,
                         n_layers=opt.n_layer,
                         dropout_p=opt.dropout,
                         use_attention=True,
                         bidirectional=opt.bidirectional,
                         eos_id=Constants.BOS,
                         sos_id=Constants.EOS)
    seq2seq = Seq2seq(encoder, decoder).to(device)
    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    seq2seq = nn.DataParallel(seq2seq)

    # loss
    weight = torch.ones(opt.tgt_vocab_size)
    pad = Constants.PAD
    loss = Perplexity(weight, pad)
    if opt.cuda:
        loss.cuda()

    # optimizer
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)

    train(seq2seq, training_data, validation_data, loss, optimizer, device,
          opt)