示例#1
0
    def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
    def train(self,
              model,
              data,
              teacher_model=None,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):

        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)

            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('param', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step

        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            teacher_model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
    validation_float_data = scaler.transform(validation_data)
    validation_array_list.append(validation_float_data)

seqence_len = 72
output_dim = 3
delay = 36

if opt.load_checkpoint is not None:
    logging.info("loading checkpoint from {}".format(
        os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME,
                     opt.load_checkpoint)))
    checkpoint_path = os.path.join(opt.expt_dir,
                                   Checkpoint.CHECKPOINT_DIR_NAME,
                                   opt.load_checkpoint)
    checkpoint = Checkpoint.load(checkpoint_path)
    seq2seq = checkpoint.model.to(device)
else:
    # Prepare dataset
    train = WFDataset(train_array_list,
                      delay=delay,
                      seq_len=seqence_len,
                      outdim=3,
                      transform=transforms.Compose([ToTensor()]))
    dev = WFDataset(validation_array_list,
                    delay=delay,
                    seq_len=seqence_len,
                    outdim=3,
                    transform=transforms.Compose([ToTensor()]),
                    begin_index=100)
    if opt.use_custome_loss:
示例#4
0
    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       dev_data=None,
                       test_data=None):
        labeled_dataset = torchtext.data.Dataset(data,
                                                 fields=[('text',
                                                          self.TEXT_field),
                                                         ('label',
                                                          self.LABEL_field)])
        label_batch_iter = torchtext.data.BucketIterator(
            dataset=labeled_dataset,
            batch_size=128,
            sort_key=lambda x: len(x.text),
            sort_within_batch=True,
            device=self.device,
            repeat=False,
            shuffle=True)
        log = self.logger

        early_stopping = EarlyStopping(patience=2, verbose=True)
        best_accuracy = 0

        for epoch in range(0, n_epochs):
            model.train()
            loss_total = 0
            step = 0
            for batch in label_batch_iter:
                input_variables, input_lengths = batch.text
                target_variables = batch.label
                loss = self._train_batch(input_variables,
                                         input_lengths.tolist(),
                                         target_variables, model)
                loss_total += loss.item()
                step += 1
                del loss, batch

            epoch_loss_avg = loss_total / step
            log_msg = "Finished epoch %d: SSL Train %s: %.4f" % (
                epoch, 'Cross_Entropy', epoch_loss_avg)
            with torch.no_grad():
                if dev_data is not None:
                    model.eval()
                    dev_loss, dev_acc = self.evaluator.evaluate(
                        model, dev_data)
                    self.dev_acc = dev_acc
                    log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                        'Cross_Entropy', dev_loss, dev_acc)
                    log.info(log_msg)
                    early_stopping(dev_loss, model, self.optimizer, epoch,
                                   step, self.input_vocab, self.expt_dir)
                    print('early stopping : ', early_stopping.counter)
                    if self.dev_acc > best_accuracy:  ######################## dev_acc는 global한 best acc 변수로
                        best_accuracy = self.dev_acc
                        Checkpoint(model=model,
                                   optimizer=self.optimizer,
                                   epoch=epoch,
                                   step=step,
                                   input_vocab=self.input_vocab).save(
                                       self.expt_dir + '/best_accuracy')
                        print('*' * 100)
                        print('SAVE MODEL (BEST DEV ACC)')

                if test_data is not None:
                    model.eval()
                    test_loss, accuracy = self.evaluator.evaluate(
                        model, test_data)
                    log_msg += ", Test %s: %.4f, Accuracy: %.4f" % (
                        'Cross_Entropy', test_loss, accuracy)
                    log.info(log_msg)

                if early_stopping.early_stop:
                    print(
                        "-------------------Early Stopping---------------------"
                    )
                    checkpoint = Checkpoint.get_latest_checkpoint(
                        self.expt_dir + '/best_accuracy')
                    checkpoint = Checkpoint.load(checkpoint)

                    model = checkpoint.model  ## deep copy
                    for param_tensor in model.state_dict():
                        print(param_tensor, '\t',
                              model.state_dict()[param_tensor].size())

                    # config
                    optimizer = checkpoint.optimizer
                    resume_optim = checkpoint.optimizer.optimizer

                    del checkpoint

                    defaults = resume_optim.param_groups[0]
                    defaults.pop('params', None)
                    defaults.pop('initial_lr', None)
                    optimizer.optimizer = resume_optim.__class__(
                        model.parameters(), **defaults)
                    self.optimizer = optimizer
                    loss, accuracy = self.evaluator.evaluate(model, test_data)
                    print('LOAD BEST ACCURACY MODEL ::: loss > {} accuracy{}'.
                          format(loss, accuracy))
                    break
        return model
示例#5
0
    def _train_epoches(self, data, model, n_epochs,
                       start_epoch, start_step, dev_data=None, test_data=None):
        log = self.logger
        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch
        
        device = torch.device('cuda:0') if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data, batch_size=self.batch_size,
            sort=False, sort_within_batch=True,
            sort_key=lambda x: len(x.text),
            device=device, repeat=False, shuffle=True)
        
        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs
        
        step = start_step
        step_elapsed = 0
        best_accuracy = 0
        
        early_stopping = EarlyStopping(patience = 10, verbose=True)
        
        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))            
            batch_generator = batch_iterator.__iter__()
            
            # consuming seen batches from previous training
            for idx in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)
            
            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1

                input_variables, input_lengths = getattr(batch, 'text')
                target_variables = getattr(batch, 'label')
                loss  = self._train_batch(input_variables, input_lengths.tolist(), target_variables, model)
                
                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Progress: %d%%, Train %s: %.4f' % (
                        step / total_steps * 100,
                        self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)

#             intersections = self.get_intersection(self.pos_lexicons, self.neg_lexicons, epoch)
#             if intersections is not None:
#                 self.filter_common_word(intersections, self.neg_lexicons, epoch)
#             self.save_lexicons(self.lexicon_dir +'/neg_epoch:{}'.format(epoch), self.neg_lexicons)
#             self.save_lexicons(self.lexicon_dir +'/pos_epoch:{}'.format(epoch), self.pos_lexicons)
            
            # reset neg/pos/intersection lexcions
            self.neg_lexicons = []
            self.pos_lexicons = []
            
            if step_elapsed == 0: continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch, step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (epoch, self.loss.name, epoch_loss_avg)
            if dev_data is not None:
                model.eval()
                dev_loss, dev_accuracy = self.evaluator.evaluate(model, dev_data)
#                 self.optimizer.update(dev_loss, epoch)
                early_stopping(dev_loss, model, self.optimizer, epoch, step, self.input_vocab, self.expt_dir)
                if dev_accuracy > best_accuracy:
                    best_accuracy = dev_accuracy
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch, step=step,
                               input_vocab=data.fields['text'].vocab).save(self.expt_dir +'/best_accuracy')
                    print(self.expt_dir +'/best_accuracy')

                test_loss, test_acc = self.evaluator.evaluate(model, test_data)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (self.loss.name, dev_loss, dev_accuracy)
                log_msg += ", test %s: %.4f, test Accuracy: %.4f" % (self.loss.name, test_loss, test_acc)
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)
            
            log.info(log_msg)
            if early_stopping.early_stop:
                print("Early Stopping")
                break
示例#6
0
    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       dev_data=None,
                       teacher_forcing_ratio=0):
        log = self.logger

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=device,
            repeat=False)

        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))

            batch_generator = batch_iterator.__iter__()
            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1

                input_variables, input_lengths = getattr(batch, "src")
                target_variables = getattr(batch, "tgt")

                loss = self._train_batch(input_variables,
                                         input_lengths.tolist(),
                                         target_variables, model,
                                         teacher_forcing_ratio)

                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Progress: %d%%, Train %s: %.4f' % (
                        step / total_steps * 100, self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)

                    n_iter = step / self.print_every

                    # tensor board
                    self.writer.add_scalar("train_loss", print_loss_avg,
                                           n_iter)

                    # log networks parameters
                    for name, param in model.named_parameters():
                        name = name.replace('.', '/')
                        self.writer.add_histogram(
                            name,
                            param.clone().cpu().data.numpy(), n_iter)
                        self.writer.add_histogram(
                            name + '/grad',
                            param.grad.data.cpu().numpy(), n_iter)

                    if dev_data is not None:
                        dev_loss, accuracy = self.evaluator.evaluate(
                            model, dev_data, self.writer, n_iter)
                        self.optimizer.update(dev_loss, epoch)
                        model.train(mode=True)

                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch,
                               step=step,
                               input_vocab=data.fields["src"].vocab,
                               output_vocab=data.fields["tgt"].vocab).save(
                                   self.expt_dir)

            if step_elapsed == 0: continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch,
                                                    step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (
                epoch, self.loss.name, epoch_loss_avg)
            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                    self.loss.name, dev_loss, accuracy)
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            log.info(log_msg)
    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       dev_data=None):
        log = self.logger

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch

        dataloader = DataLoader(dataset=data,
                                batch_size=self.batch_size,
                                shuffle=True,
                                num_workers=0)
        steps_per_epoch = len(dataloader)
        total_steps = steps_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))

            model.train(True)
            for batch in dataloader:

                step += 1
                step_elapsed += 1

                input_variables = batch['X'].to(self.device)
                target_variables = batch['y'].to(self.device)
                day_ago_data = None
                #print(batch.keys())
                if model.use_day_ago_info:
                    day_ago_data = batch['one_day_ago'].to(self.device)
                loss = self._train_batch(input_variables, target_variables,
                                         model, day_ago_data)

                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Progress: %d%%, Train %s: %.4f' % (
                        step / total_steps * 100, self.loss, print_loss_avg)
                    log.info(log_msg)

                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch,
                               step=step).save(self.expt_dir)

            if step_elapsed == 0: continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch,
                                                    step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (epoch, self.loss,
                                                             epoch_loss_avg)
            if dev_data is not None:
                dev_loss, rmse = self.evaluator.evaluate(
                    model, dev_data, self.device)
                train_loss, _ = self.evaluator.evaluate(
                    model, data, self.device)
                self.optimizer.update(train_loss, epoch)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Train: %.4f" % (
                    self.loss, dev_loss, train_loss)

                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            log.info(log_msg)
    def _train_epoches(self,
                       data,
                       model,
                       teacher_model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       dev_data,
                       teacher_forcing_ratio=0):
        log = self.logger
        print_loss_total = 0
        epoch_loss_total = 0

        device = None if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=device,
            repeat=False)

        step_per_epoch = len(batch_iterator)

        total_steps = step_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))

            batch_generator = batch_iterator.__iter__()
            for _ in range((epoch - 1) * step_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1
                input_var, input_length = getattr(batch, 'src')
                target_var = getattr(batch, 'tgt')

                loss = self._train_batch(input_variable=input_var,
                                         input_lengths=input_length,
                                         target_variable=target_var,
                                         model=model,
                                         teacher_model=teacher_model)

                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Progress: %d%%, Train %s: %.4f' % (
                        step / total_steps * 100, self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)
                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch,
                               step=step,
                               input_vocab=data.fields['src'].vocab,
                               output_vocab=data.fields['tgt'].vocab).save(
                                   self.export_dir)

            if step_elapsed == 0:
                continue

            epoch_loss_avg = epoch_loss_total / \
                min(step_per_epoch, step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (
                epoch, self.loss.name, epoch_loss_avg)
            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                    self.loss.name, dev_loss, accuracy)
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            log.info(log_msg)
dst_dir = '../result/seq2seq_feature48'
if not os.path.exists(dst_dir):
    os.mkdir(dst_dir)

if __name__ == '__main__':
    seqence_len = 72
    output_dim = 3
    delay = 36
    t2m_checkpoint_path = os.path.join(
        '../checkpoints/seq72_feature48_global_t2m_best')
    rh2m_checkpoint_path = os.path.join(
        '../checkpoints/seq72_feature48_global_rh2m_best')
    w10m_checkpoint_path = os.path.join(
        '../checkpoints/seq72_feature48_global_w10m_best')

    t2m_checkpoint = Checkpoint.load(t2m_checkpoint_path)
    rh2m_checkpoint = Checkpoint.load(rh2m_checkpoint_path)
    w10m_checkpoint = Checkpoint.load(w10m_checkpoint_path)

    t2m_predictor = Predictor(t2m_checkpoint.model.to(device))
    rh2m_predictor = Predictor(rh2m_checkpoint.model.to(device))
    w10m_predictor = Predictor(w10m_checkpoint.model.to(device))

    foretimes = 37

    for begin_date, dst_date, end_date in zip(begin_dates, dst_dates,
                                              end_dates):
        submit_csv = None
        end_date = end_date + ' 12-00-00'
        for i in range(90001, 90011):
            df = pd.read_csv(os.path.join(data_dir,