Exemplo n.º 1
0
 def load(self, conf, problem, emb_matrix):
     # load dictionary when (not finetune) and (cache valid)
     if not conf.pretrained_model_path and not self.dictionary_invalid:
         problem.load_problem(conf.problem_path)
         if not self.embedding_invalid:
             emb_matrix = np.array(load_from_pkl(conf.emb_pkl_path))
         logging.info('[Cache] loading dictionary successfully')
     
     if not self.encoding_invalid:
         pass  
     return problem, emb_matrix
Exemplo n.º 2
0
    def load_problem(self, problem_path):
        info_dict = load_from_pkl(problem_path)
        for name in info_dict:
            if isinstance(getattr(self, name), CellDict):
                getattr(self, name).load_cell_dict(info_dict[name])

            else:
                setattr(self, name, info_dict[name])
            # the type of input_dicts is dict
            # elif name == 'input_dicts' and isinstance(getattr(self, name), type(info_dict[name])):
            #     setattr(self, name, info_dict[name])
        logging.debug("Problem loaded")
Exemplo n.º 3
0
    def test(self, loss_fn, test_data_path=None, predict_output_path=None):
        if test_data_path is None:
            # test_data_path in the parameter is prior to self.conf.test_data_path
            test_data_path = self.conf.test_data_path

        if not test_data_path.endswith('.pkl'):
            test_data, test_length, test_target = self.problem.encode(test_data_path, self.conf.file_columns, self.conf.input_types,
                self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
                min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
                show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
        else:
            test_pkl_data = load_from_pkl(test_data_path)
            test_data, test_length, test_target = test_pkl_data['data'], test_pkl_data['length'], test_pkl_data['target']

        if not predict_output_path:
            self.evaluate(test_data, test_length, test_target,
                self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, phase="test")
        else:
            self.evaluate(test_data, test_length, test_target,
                self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, phase="test",
                origin_data_path=test_data_path, predict_output_path=predict_output_path)
Exemplo n.º 4
0
def main(params):
    conf = ModelConf("train", params.conf_path, version, params, mode=params.mode)

    shutil.copy(params.conf_path, conf.save_base_dir)
    logging.info('Configuration file is backed up to %s' % (conf.save_base_dir))

    if ProblemTypes[conf.problem_type] == ProblemTypes.sequence_tagging:
        problem = Problem(conf.problem_type, conf.input_types, conf.answer_column_name,
            source_with_start=True, source_with_end=True, source_with_unk=True, source_with_pad=True,
            target_with_start=True, target_with_end=True, target_with_unk=True, target_with_pad=True, same_length=True,
            with_bos_eos=conf.add_start_end_for_seq, tagging_scheme=conf.tagging_scheme,
            remove_stopwords=conf.remove_stopwords, DBC2SBC=conf.DBC2SBC, unicode_fix=conf.unicode_fix)
    elif ProblemTypes[conf.problem_type] == ProblemTypes.classification \
            or ProblemTypes[conf.problem_type] == ProblemTypes.regression:
        problem = Problem(conf.problem_type, conf.input_types, conf.answer_column_name,
            source_with_start=True, source_with_end=True, source_with_unk=True, source_with_pad=True,
            target_with_start=False, target_with_end=False, target_with_unk=False, target_with_pad=False,
            same_length=False, with_bos_eos=conf.add_start_end_for_seq, remove_stopwords=conf.remove_stopwords,
            DBC2SBC=conf.DBC2SBC, unicode_fix=conf.unicode_fix)
    elif ProblemTypes[conf.problem_type] == ProblemTypes.mrc:
        problem = Problem(conf.problem_type, conf.input_types, conf.answer_column_name,
                          source_with_start=True, source_with_end=True, source_with_unk=True, source_with_pad=True,
                          target_with_start=False, target_with_end=False, target_with_unk=False, target_with_pad=False,
                          same_length=False, with_bos_eos=False, remove_stopwords=conf.remove_stopwords,
                          DBC2SBC=conf.DBC2SBC, unicode_fix=conf.unicode_fix)

    cache_load_flag = False
    if not conf.pretrained_model_path:
        # first time training, load cache if appliable
        if conf.use_cache:
            cache_conf_path = os.path.join(conf.cache_dir, 'conf_cache.json')
            if os.path.isfile(cache_conf_path):
                params_cache = copy.deepcopy(params)
                '''
                for key in vars(params_cache):
                    setattr(params_cache, key, None)
                params_cache.mode = params.mode
                '''
                try:
                    cache_conf = ModelConf('cache', cache_conf_path, version, params_cache)
                except Exception as e:
                    cache_conf = None
                if cache_conf is None or verify_cache(cache_conf, conf) is not True:
                    logging.info('Found cache that is ineffective')
                    if params.mode == 'philly' or params.force is True:
                        renew_option = 'yes'
                    else:
                        renew_option = input('There exists ineffective cache %s for old models. Input "yes" to renew cache and "no" to exit. (default:no): ' % os.path.abspath(conf.cache_dir))
                    if renew_option.lower() != 'yes':
                        exit(0)
                    else:
                        shutil.rmtree(conf.cache_dir)
                        time.sleep(2)  # sleep 2 seconds since the deleting is asynchronous
                        logging.info('Old cache is deleted')
                else:
                    logging.info('Found cache that is appliable to current configuration...')

            elif os.path.isdir(conf.cache_dir):
                renew_option = input('There exists ineffective cache %s for old models. Input "yes" to renew cache and "no" to exit. (default:no): ' % os.path.abspath(conf.cache_dir))
                if renew_option.lower() != 'yes':
                    exit(0)
                else:
                    shutil.rmtree(conf.cache_dir)
                    time.sleep(2)  # Sleep 2 seconds since the deleting is asynchronous
                    logging.info('Old cache is deleted')

            if not os.path.exists(conf.cache_dir):
                os.makedirs(conf.cache_dir)
                shutil.copy(params.conf_path, os.path.join(conf.cache_dir, 'conf_cache.json'))

        # first time training, load problem from cache, and then backup the cache to model_save_dir/.necessary_cache/
        if conf.use_cache and os.path.isfile(conf.problem_path):
            problem.load_problem(conf.problem_path)
            if conf.emb_pkl_path is not None:
                if os.path.isfile(conf.emb_pkl_path):
                    emb_matrix = np.array(load_from_pkl(conf.emb_pkl_path))
                    cache_load_flag = True
                else:
                    if params.mode == 'normal':
                        renew_option = input('The cache is invalid because the embedding matrix does not exist in the cache directory. Input "yes" to renew cache and "no" to exit. (default:no): ')
                        if renew_option.lower() != 'yes':
                            exit(0)
                    else:
                        # by default, renew cache
                        renew_option = 'yes'
            else:
                emb_matrix = None
                cache_load_flag = True
            if cache_load_flag:
                logging.info("Cache loaded!")

        if cache_load_flag is False:
            logging.info("Preprocessing... Depending on your corpus size, this step may take a while.")
            if conf.pretrained_emb_path:
                emb_matrix = problem.build(conf.train_data_path, conf.file_columns, conf.input_types, conf.file_with_col_header,
                                           conf.answer_column_name, word2vec_path=conf.pretrained_emb_path,
                                           word_emb_dim=conf.pretrained_emb_dim, format=conf.pretrained_emb_type,
                                           file_type=conf.pretrained_emb_binary_or_text, involve_all_words=conf.involve_all_words_in_pretrained_emb,
                                           show_progress=True if params.mode == 'normal' else False, max_vocabulary=conf.max_vocabulary,
                                           word_frequency=conf.min_word_frequency)
            else:
                emb_matrix = problem.build(conf.train_data_path, conf.file_columns, conf.input_types, conf.file_with_col_header,
                                           conf.answer_column_name, word2vec_path=None, word_emb_dim=None, format=None,
                                           file_type=None, involve_all_words=conf.involve_all_words_in_pretrained_emb,
                                           show_progress=True if params.mode == 'normal' else False,
                                           max_vocabulary=conf.max_vocabulary, word_frequency=conf.min_word_frequency)

            if conf.mode == 'philly' and conf.emb_pkl_path.startswith('/hdfs/'):
                with HDFSDirectTransferer(conf.problem_path, with_hdfs_command=True) as transferer:
                    transferer.pkl_dump(problem.export_problem(conf.problem_path, ret_without_save=True))
            else:
                problem.export_problem(conf.problem_path)
            if conf.use_cache:
                logging.info("Cache saved to %s" % conf.problem_path)
                if emb_matrix is not None and conf.emb_pkl_path is not None:
                    if conf.mode == 'philly' and conf.emb_pkl_path.startswith('/hdfs/'):
                        with HDFSDirectTransferer(conf.emb_pkl_path, with_hdfs_command=True) as transferer:
                            transferer.pkl_dump(emb_matrix)
                    else:
                        dump_to_pkl(emb_matrix, conf.emb_pkl_path)
                    logging.info("Embedding matrix saved to %s" % conf.emb_pkl_path)
            else:
                logging.debug("Cache saved to %s" % conf.problem_path)

        # Back up the problem.pkl to save_base_dir/.necessary_cache. During test phase, we would load cache from save_base_dir/.necessary_cache/problem.pkl
        cache_bakup_path = os.path.join(conf.save_base_dir, 'necessary_cache/')
        logging.debug('Prepare dir: %s' % cache_bakup_path)
        prepare_dir(cache_bakup_path, True, allow_overwrite=True, clear_dir_if_exist=True)

        shutil.copy(conf.problem_path, cache_bakup_path)
        logging.debug("Problem %s is backed up to %s" % (conf.problem_path, cache_bakup_path))
        if problem.output_dict:
            logging.debug("Problem target cell dict: %s" % (problem.output_dict.cell_id_map))

        if params.make_cache_only:
            logging.info("Finish building cache!")
            return

        vocab_info = dict() # include input_type's vocab_size & init_emd_matrix
        vocab_sizes = problem.get_vocab_sizes()
        for input_cluster in vocab_sizes:
            vocab_info[input_cluster] = dict()
            vocab_info[input_cluster]['vocab_size'] = vocab_sizes[input_cluster]
            # add extra info for char_emb
            if input_cluster.lower() == 'char':
                for key, value in conf.input_types[input_cluster].items():
                    if key != 'cols':
                        vocab_info[input_cluster][key] = value
            if input_cluster == 'word' and emb_matrix is not None:
                vocab_info[input_cluster]['init_weights'] = emb_matrix
            else:
                vocab_info[input_cluster]['init_weights'] = None

        lm = LearningMachine('train', conf, problem, vocab_info=vocab_info, initialize=True, use_gpu=conf.use_gpu)
    else:
        # when finetuning, load previous saved problem
        problem.load_problem(conf.saved_problem_path)
        lm = LearningMachine('train', conf, problem, vocab_info=None, initialize=False, use_gpu=conf.use_gpu)

    if len(conf.metrics_post_check) > 0:
        for metric_to_chk in conf.metrics_post_check:
            metric, target = metric_to_chk.split('@')
            if not problem.output_dict.has_cell(target):
                raise Exception("The target %s of %s does not exist in the training data." % (target, metric_to_chk))

    if conf.pretrained_model_path:
        logging.info('Loading the pretrained model: %s...' % conf.pretrained_model_path)
        lm.load_model(conf.pretrained_model_path)

    loss_conf = conf.loss
    loss_conf['output_layer_id'] = conf.output_layer_id
    loss_conf['answer_column_name'] = conf.answer_column_name
    # loss_fn = eval(loss_conf['type'])(**loss_conf['conf'])
    loss_fn = Loss(**loss_conf)
    if conf.use_gpu is True:
        loss_fn.cuda()

    optimizer = eval(conf.optimizer_name)(lm.model.parameters(), **conf.optimizer_params)

    lm.train(optimizer, loss_fn)

    # test the best model with the best model saved
    lm.load_model(conf.model_save_path)
    if conf.test_data_path is not None:
        test_path = conf.test_data_path
    elif conf.valid_data_path is not None:
        test_path = conf.valid_data_path
    logging.info('Testing the best model saved at %s, with %s' % (conf.model_save_path, test_path))
    if not test_path.endswith('pkl'):
        lm.test(loss_fn, test_path, predict_output_path=conf.predict_output_path)
    else:
        lm.test(loss_fn, test_path)
Exemplo n.º 5
0
 def _load_encoding_cache_generator(cache_dir, file_index):
     for index in file_index:
         file_path = os.path.join(cache_dir, index[0])
         yield load_from_pkl(file_path)
Exemplo n.º 6
0
    def train(self, optimizer, loss_fn):
        self.model.train()
        if not self.conf.train_data_path.endswith('.pkl'):
            train_data, train_length, train_target = self.problem.encode(self.conf.train_data_path, self.conf.file_columns,
                self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
                min_sentence_len = self.conf.min_sentence_len, extra_feature=self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
                show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
        else:
            train_pkl_data = load_from_pkl(self.conf.train_data_path)
            train_data, train_length, train_target = train_pkl_data['data'], train_pkl_data['length'], train_pkl_data['target']

        if not self.conf.valid_data_path.endswith('.pkl'):
            valid_data, valid_length, valid_target = self.problem.encode(self.conf.valid_data_path, self.conf.file_columns,
                self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
                min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
                show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
        else:
            valid_pkl_data = load_from_pkl(self.conf.valid_data_path)
            valid_data, valid_length, valid_target = valid_pkl_data['data'], valid_pkl_data['length'], valid_pkl_data['target']

        if self.conf.test_data_path is not None:
            if not self.conf.test_data_path.endswith('.pkl'):
                test_data, test_length, test_target = self.problem.encode(self.conf.test_data_path, self.conf.file_columns, self.conf.input_types,
                    self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
                    min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths,
                    file_format='tsv', show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
            else:
                test_pkl_data = load_from_pkl(self.conf.test_data_path)
                test_data, test_length, test_target = test_pkl_data['data'], test_pkl_data['length'], test_pkl_data['target']

        stop_training = False
        epoch = 1
        best_result = None
        show_result_cnt = 0
        lr_scheduler = LRScheduler(optimizer, self.conf.lr_decay, self.conf.minimum_lr, self.conf.epoch_start_lr_decay)

        if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
            streaming_recoder = StreamingRecorder(['prediction', 'pred_scores', 'pred_scores_all', 'target'])
        elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
            streaming_recoder = StreamingRecorder(['prediction', 'pred_scores', 'target'])
        elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
            streaming_recoder = StreamingRecorder(['prediction', 'target'])
        elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
            streaming_recoder = StreamingRecorder(['prediction', 'answer_text'])

        while not stop_training and epoch <= self.conf.max_epoch:
            logging.info('Training: Epoch ' + str(epoch))

            data_batches, length_batches, target_batches = \
                get_batches(self.problem, train_data, train_length, train_target, self.conf.batch_size_total,
                    self.conf.input_types, None, permutate=True, transform_tensor=True)

            whole_batch_num = len(target_batches)
            valid_batch_num = max(len(target_batches) // self.conf.valid_times_per_epoch, 1)
            if torch.cuda.device_count() > 1:
                small_batch_num = whole_batch_num * torch.cuda.device_count()       # total batch num over all the gpus
                valid_batch_num_show = valid_batch_num * torch.cuda.device_count()      # total batch num over all the gpus to do validation
            else:
                small_batch_num = whole_batch_num
                valid_batch_num_show = valid_batch_num

            streaming_recoder.clear_records()
            all_costs = []

            logging.info('There are %d batches during an epoch; validation are conducted every %d batch' % (small_batch_num, valid_batch_num_show))

            if self.conf.mode == 'normal':
                progress = tqdm(range(len(target_batches)))
            elif self.conf.mode == 'philly':
                progress = range(len(target_batches))
            for i in progress:
                # the result shape: for classification: [batch_size, # of classes]; for sequence tagging: [batch_size, seq_len, # of tags]
                param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[i], length_batches[i])
                logits_softmax = self.model(inputs_desc, length_desc, *param_list)

                # check the output
                if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
                    logits_softmax = list(logits_softmax.values())[0]
                    assert len(logits_softmax.shape) == 2, 'The dimension of your output is %s, but we need [batch_size*GPUs, class num]' % (str(list(logits_softmax.shape)))
                    assert logits_softmax.shape[1] == self.problem.output_target_num(), 'The dimension of your output layer %d is inconsistent with your type number %d!' % (logits_softmax.shape[1], self.problem.output_target_num())
                    # for auc metric
                    prediction_scores = logits_softmax[:, self.conf.pos_label].cpu().data.numpy()
                    if self.evaluator.has_auc_type_specific:
                        prediction_scores_all = logits_softmax.cpu().data.numpy()
                    else:
                        prediction_scores_all = None
                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
                    logits_softmax = list(logits_softmax.values())[0]
                    assert len(logits_softmax.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence length, representation dim]' % (str(list(logits_softmax.shape)), )
                    prediction_scores = None
                    prediction_scores_all = None
                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
                    logits_softmax = list(logits_softmax.values())[0]
                    assert len(logits_softmax.shape) == 2 and logits_softmax.shape[1] == 1, 'The dimension of your output is %s, but we need [batch_size*GPUs, 1]' % (str(list(logits_softmax.shape)))
                    prediction_scores = None
                    prediction_scores_all = None
                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
                    for single_value in logits_softmax.values():
                        assert len(single_value.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence_len, 1]' % (str(list(single_value.shape)))
                    prediction_scores = None
                    prediction_scores_all = None

                logits_softmax_flat = dict()
                if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
                    # Transform output shapes for metric evaluation
                    # for seq_tag_f1 metric
                    prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()    # [batch_size, seq_len]
                    streaming_recoder.record_one_row([self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
                                                      prediction_scores, self.problem.decode(target_batches[i][self.conf.answer_column_name[0]],
                                                                                             length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)

                    # pytorch's CrossEntropyLoss only support this
                    logits_softmax_flat[self.conf.output_layer_id[0]] = logits_softmax.view(-1, logits_softmax.size(2))    # [batch_size * seq_len, # of tags]
                    #target_batches[i] = target_batches[i].view(-1)                      # [batch_size * seq_len]
                    # [batch_size * seq_len]
                    target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][self.conf.answer_column_name[0]].reshape(-1)

                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
                    prediction_indices = logits_softmax.detach().max(1)[1].cpu().numpy()
                    # Should not decode!
                    streaming_recoder.record_one_row([prediction_indices, prediction_scores, prediction_scores_all, target_batches[i][self.conf.answer_column_name[0]].numpy()])
                    logits_softmax_flat[self.conf.output_layer_id[0]] = logits_softmax
                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
                    temp_logits_softmax_flat = logits_softmax.squeeze(1)
                    prediction_scores = temp_logits_softmax_flat.detach().cpu().numpy()
                    streaming_recoder.record_one_row([prediction_scores, target_batches[i][self.conf.answer_column_name[0]].numpy()])
                    logits_softmax_flat[self.conf.output_layer_id[0]] = temp_logits_softmax_flat
                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
                    for key, value in logits_softmax.items():
                        logits_softmax[key] = value.squeeze()
                    passage_identify = None
                    for type_key in data_batches[i].keys():
                        if 'p' in type_key.lower():
                            passage_identify = type_key
                            break
                    if not passage_identify:
                        raise Exception('MRC task need passage information.')
                    prediction = self.problem.decode(logits_softmax, lengths=length_batches[i][passage_identify],
                                                     batch_data=data_batches[i][passage_identify])
                    logits_softmax_flat = logits_softmax
                    mrc_answer_target = None
                    for single_target in target_batches[i]:
                        if isinstance(target_batches[i][single_target][0], str):
                            mrc_answer_target = target_batches[i][single_target]
                    streaming_recoder.record_one_row([prediction, mrc_answer_target])

                if self.use_gpu:
                    for single_target in self.conf.answer_column_name:
                        if isinstance(target_batches[i][single_target], torch.Tensor):
                            target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
                loss = loss_fn(logits_softmax_flat, target_batches[i])

                all_costs.append(loss.item())
                optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.conf.clip_grad_norm_max_norm)
                optimizer.step()

                del loss, logits_softmax, logits_softmax_flat
                del prediction_scores
                if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging \
                        or ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
                    del prediction_indices

                if show_result_cnt == self.conf.batch_num_to_show_results:
                    if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
                        result = self.evaluator.evaluate(streaming_recoder.get('target'),
                            streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
                            y_pred_scores_all=streaming_recoder.get('pred_scores_all'), formatting=True)
                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
                        result = self.evaluator.evaluate(streaming_recoder.get('target'),
                            streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
                            formatting=True)
                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
                        result = self.evaluator.evaluate(streaming_recoder.get('target'),
                            streaming_recoder.get('prediction'), y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
                        result = self.evaluator.evaluate(streaming_recoder.get('answer_text'), streaming_recoder.get('prediction'),
                                                         y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)

                    if torch.cuda.device_count() > 1:
                        logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
                            (epoch, i * torch.cuda.device_count(), lr_scheduler.get_lr(), np.mean(all_costs), result))
                    else:
                        logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
                            (epoch, i, lr_scheduler.get_lr(), np.mean(all_costs), result))
                    show_result_cnt = 0
                    # The loss and other metrics printed during a training epoch are just the result of part of the training data.
                    all_costs = []
                    streaming_recoder.clear_records()

                if (i != 0 and i % valid_batch_num == 0) or i == len(target_batches) - 1:
                    torch.cuda.empty_cache()    # actually useless
                    logging.info('Valid & Test : Epoch ' + str(epoch))
                    new_result = self.evaluate(valid_data, valid_length, valid_target,
                        self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, cur_best_result=best_result,
                        model_save_path=self.conf.model_save_path, phase="valid", epoch=epoch)
                    renew_flag = best_result != new_result
                    best_result = new_result

                    if renew_flag and self.conf.test_data_path is not None:
                        self.evaluate(test_data, test_length, test_target,
                            self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, phase="test", epoch=epoch)
                    self.model.train()
                show_result_cnt += 1

            del data_batches, length_batches, target_batches
            lr_scheduler.step()
            epoch += 1