示例#1
0
    def per_epoch_summary_step1(self, ranker, train_data,
                                list_fold_k_train_eval_track, test_data,
                                list_fold_k_test_eval_track, vali_eval_v,
                                list_fold_k_vali_eval_track, cutoffs, do_vali):

        fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks(
            ranker=ranker,
            test_data=train_data,
            ks=cutoffs,
            gpu=self.gpu,
            device=self.device,
            label_type=self.data_setting.data_dict['label_type'])
        np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu(
        ).numpy() if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy()
        list_fold_k_train_eval_track.append(np_fold_k_epoch_k_train_ndcg_ks)

        fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks(
            ranker=ranker,
            test_data=test_data,
            ks=cutoffs,
            gpu=self.gpu,
            device=self.device,
            label_type=self.data_setting.data_dict['label_type'])
        np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu(
        ).numpy() if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy()
        list_fold_k_test_eval_track.append(np_fold_k_epoch_k_test_ndcg_ks)

        #fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu().numpy() if gpu else torch_fold_k_epoch_k_loss.data.numpy()
        #list_epoch_loss.append(fold_k_epoch_k_loss)

        if do_vali: list_fold_k_vali_eval_track.append(vali_eval_v)
示例#2
0
    def per_epoch_summary_step1(self, ranker, train_data,
                                list_fold_k_train_eval_track, test_data,
                                list_fold_k_test_eval_track, vali_eval_v,
                                list_fold_k_vali_eval_track, cutoffs, do_vali):

        fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks(
            ranker=ranker,
            test_data=train_data,
            ks=cutoffs,
            multi_level_rele=self.data_setting.data_dict['multi_level_rele'],
            batch_mode=True)
        np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu(
        ).numpy() if gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy()
        list_fold_k_train_eval_track.append(np_fold_k_epoch_k_train_ndcg_ks)

        fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks(
            ranker=ranker,
            test_data=test_data,
            ks=cutoffs,
            multi_level_rele=self.data_setting.data_dict['multi_level_rele'],
            batch_mode=True)
        np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu(
        ).numpy() if gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy()
        list_fold_k_test_eval_track.append(np_fold_k_epoch_k_test_ndcg_ks)

        #fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu().numpy() if gpu else torch_fold_k_epoch_k_loss.data.numpy()
        #list_epoch_loss.append(fold_k_epoch_k_loss)

        if do_vali: list_fold_k_vali_eval_track.append(vali_eval_v)
示例#3
0
文件: ltr.py 项目: pipipiu/ptranking
    def naive_train(self,
                    ranker,
                    eval_dict,
                    train_data=None,
                    test_data=None,
                    vali_data=None):
        """
        A simple train and test, namely train based on training data & test based on testing data
        :param ranker:
        :param eval_dict:
        :param train_data:
        :param test_data:
        :param vali_data:
        :return:
        """
        ranker.reset_parameters()  # reset with the same random initialization

        assert train_data is not None
        assert test_data is not None

        list_losses = []
        list_train_ndcgs = []
        list_test_ndcgs = []

        epochs, cutoffs = eval_dict['epochs'], eval_dict['cutoffs']

        for i in range(epochs):
            epoch_loss = torch.zeros(1).to(device) if gpu else torch.zeros(1)
            for qid, batch_rankings, batch_stds in train_data:
                if gpu:
                    batch_rankings, batch_stds = batch_rankings.to(
                        device), batch_stds.to(device)
                batch_loss, stop_training = ranker.train(batch_rankings,
                                                         batch_stds,
                                                         qid=qid)
                epoch_loss += batch_loss.item()

            np_epoch_loss = epoch_loss.cpu().numpy(
            ) if gpu else epoch_loss.data.numpy()
            list_losses.append(np_epoch_loss)

            test_ndcg_ks = ndcg_at_ks(ranker=ranker,
                                      test_data=test_data,
                                      ks=cutoffs,
                                      multi_level_rele=True)
            np_test_ndcg_ks = test_ndcg_ks.data.numpy()
            list_test_ndcgs.append(np_test_ndcg_ks)

            train_ndcg_ks = ndcg_at_ks(ranker=ranker,
                                       test_data=train_data,
                                       ks=cutoffs,
                                       multi_level_rele=True)
            np_train_ndcg_ks = train_ndcg_ks.data.numpy()
            list_train_ndcgs.append(np_train_ndcg_ks)

        test_ndcgs = np.vstack(list_test_ndcgs)
        train_ndcgs = np.vstack(list_train_ndcgs)

        return list_losses, train_ndcgs, test_ndcgs
示例#4
0
    def kfold_cv_eval(self,
                      data_dict=None,
                      eval_dict=None,
                      sf_para_dict=None,
                      model_para_dict=None):
        """
        Evaluation learning-to-rank methods via k-fold cross validation if there are k folds, otherwise one fold.
        :param data_dict:       settings w.r.t. data
        :param eval_dict:       settings w.r.t. evaluation
        :param sf_para_dict:    settings w.r.t. scoring function
        :param model_para_dict: settings w.r.t. the ltr_adhoc model
        :return:
        """
        self.display_information(data_dict, model_para_dict)
        self.check_consistency(data_dict, eval_dict, sf_para_dict)
        self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict)

        model_id = model_para_dict['model_id']
        fold_num = data_dict['fold_num']
        # for quick access of common evaluation settings
        epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided']
        vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[
            'log_step'], eval_dict['cutoffs']
        do_vali, do_summary = eval_dict['do_validation'], eval_dict[
            'do_summary']

        ranker = self.load_ranker(model_para_dict=model_para_dict,
                                  sf_para_dict=sf_para_dict)

        time_begin = datetime.datetime.now()  # timing
        l2r_cv_avg_scores = np.zeros(len(cutoffs))  # fold average

        for fold_k in range(1, fold_num + 1):  # evaluation over k-fold data
            ranker.reset_parameters(
            )  # reset with the same random initialization

            train_data, test_data, vali_data = self.load_data(
                eval_dict, data_dict, fold_k)

            if do_vali: fold_optimal_ndcgk = 0.0
            if do_summary:                list_epoch_loss, list_fold_k_train_eval_track, list_fold_k_test_eval_track, list_fold_k_vali_eval_track = [], [], [], []
            if not do_vali and loss_guided:
                first_round = True
                threshold_epoch_loss = torch.cuda.FloatTensor([
                    10000000.0
                ]) if self.gpu else torch.FloatTensor([10000000.0])

            for epoch_k in range(1, epochs + 1):
                torch_fold_k_epoch_k_loss, stop_training = self.train_ranker(
                    ranker=ranker,
                    train_data=train_data,
                    model_para_dict=model_para_dict,
                    epoch_k=epoch_k)

                ranker.scheduler.step(
                )  # adaptive learning rate with step_size=40, gamma=0.5

                if stop_training:
                    print('training is failed !')
                    break

                if (do_summary
                        or do_vali) and (epoch_k % log_step == 0
                                         or epoch_k == 1):  # stepwise check
                    if do_vali:  # per-step validation score
                        vali_eval_tmp = ndcg_at_k(ranker=ranker,
                                                  test_data=vali_data,
                                                  k=vali_k,
                                                  gpu=self.gpu,
                                                  device=self.device,
                                                  label_type=self.data_setting.
                                                  data_dict['label_type'])
                        vali_eval_v = vali_eval_tmp.data.numpy()
                        if epoch_k > 1:  # further validation comparison
                            curr_vali_ndcg = vali_eval_v
                            if (curr_vali_ndcg > fold_optimal_ndcgk) or (
                                    epoch_k == epochs
                                    and curr_vali_ndcg == fold_optimal_ndcgk
                            ):  # we need at least a reference, in case all zero
                                print('\t', epoch_k,
                                      '- nDCG@{} - '.format(vali_k),
                                      curr_vali_ndcg)
                                fold_optimal_ndcgk = curr_vali_ndcg
                                fold_optimal_checkpoint = '-'.join(
                                    ['Fold', str(fold_k)])
                                fold_optimal_epoch_val = epoch_k
                                ranker.save(
                                    dir=self.dir_run +
                                    fold_optimal_checkpoint + '/',
                                    name='_'.join(
                                        ['net_params_epoch',
                                         str(epoch_k)]) +
                                    '.pkl')  # buffer currently optimal model
                            else:
                                print('\t\t', epoch_k,
                                      '- nDCG@{} - '.format(vali_k),
                                      curr_vali_ndcg)

                    if do_summary:  # summarize per-step performance w.r.t. train, test
                        fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks(
                            ranker=ranker,
                            test_data=train_data,
                            ks=cutoffs,
                            gpu=self.gpu,
                            device=self.device,
                            label_type=self.data_setting.
                            data_dict['label_type'])
                        np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu(
                        ).numpy(
                        ) if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy(
                        )
                        list_fold_k_train_eval_track.append(
                            np_fold_k_epoch_k_train_ndcg_ks)

                        fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks(
                            ranker=ranker,
                            test_data=test_data,
                            ks=cutoffs,
                            gpu=self.gpu,
                            device=self.device,
                            label_type=self.data_setting.
                            data_dict['label_type'])
                        np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu(
                        ).numpy(
                        ) if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy(
                        )
                        list_fold_k_test_eval_track.append(
                            np_fold_k_epoch_k_test_ndcg_ks)

                        fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu(
                        ).numpy(
                        ) if self.gpu else torch_fold_k_epoch_k_loss.data.numpy(
                        )
                        list_epoch_loss.append(fold_k_epoch_k_loss)

                        if do_vali:
                            list_fold_k_vali_eval_track.append(vali_eval_v)

                elif loss_guided:  # stopping check via epoch-loss
                    if first_round and torch_fold_k_epoch_k_loss >= threshold_epoch_loss:
                        print('Bad threshold: ', torch_fold_k_epoch_k_loss,
                              threshold_epoch_loss)

                    if torch_fold_k_epoch_k_loss < threshold_epoch_loss:
                        first_round = False
                        print('\tFold-', str(fold_k), ' Epoch-', str(epoch_k),
                              'Loss: ', torch_fold_k_epoch_k_loss)
                        threshold_epoch_loss = torch_fold_k_epoch_k_loss
                    else:
                        print('\tStopped according epoch-loss!',
                              torch_fold_k_epoch_k_loss, threshold_epoch_loss)
                        break

            if do_summary:  # track
                sy_prefix = '_'.join(['Fold', str(fold_k)])
                fold_k_train_eval = np.vstack(list_fold_k_train_eval_track)
                fold_k_test_eval = np.vstack(list_fold_k_test_eval_track)
                pickle_save(fold_k_train_eval,
                            file=self.dir_run +
                            '_'.join([sy_prefix, 'train_eval.np']))
                pickle_save(fold_k_test_eval,
                            file=self.dir_run +
                            '_'.join([sy_prefix, 'test_eval.np']))

                fold_k_epoch_loss = np.hstack(list_epoch_loss)
                pickle_save(
                    (fold_k_epoch_loss, train_data.__len__()),
                    file=self.dir_run + '_'.join([sy_prefix, 'epoch_loss.np']))
                if do_vali:
                    fold_k_vali_eval = np.hstack(list_fold_k_vali_eval_track)
                    pickle_save(fold_k_vali_eval,
                                file=self.dir_run +
                                '_'.join([sy_prefix, 'vali_eval.np']))

            if do_vali:  # using the fold-wise optimal model for later testing based on validation data
                buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(fold_optimal_epoch_val)]) + '.pkl'
                ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                            buffered_model)
                fold_optimal_ranker = ranker
            else:  # buffer the model after a fixed number of training-epoches if no validation is deployed
                fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)])
                ranker.save(dir=self.dir_run + fold_optimal_checkpoint + '/',
                            name='_'.join(['net_params_epoch',
                                           str(epoch_k)]) + '.pkl')
                fold_optimal_ranker = ranker

            torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                gpu=self.gpu,
                device=self.device,
                label_type=self.data_setting.data_dict['label_type'])
            fold_ndcg_ks = torch_fold_ndcg_ks.data.numpy()

            performance_list = [model_id + ' Fold-' + str(fold_k)
                                ]  # fold-wise performance
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, fold_ndcg_ks[i]))
            performance_str = '\t'.join(performance_list)
            print('\t', performance_str)

            l2r_cv_avg_scores = np.add(
                l2r_cv_avg_scores,
                fold_ndcg_ks)  # sum for later cv-performance

        time_end = datetime.datetime.now()  # overall timing
        elapsed_time_str = str(time_end - time_begin)
        print('Elapsed time:\t', elapsed_time_str + "\n\n")

        l2r_cv_avg_scores = np.divide(l2r_cv_avg_scores, fold_num)
        eval_prefix = str(
            fold_num) + '-fold cross validation scores:' if do_vali else str(
                fold_num) + '-fold average scores:'
        print(model_id, eval_prefix,
              metric_results_to_string(list_scores=l2r_cv_avg_scores,
                                       list_cutoffs=cutoffs)
              )  # print either cv or average performance

        return l2r_cv_avg_scores
示例#5
0
    def ad_cv_eval(self,
                   data_dict=None,
                   eval_dict=None,
                   ad_para_dict=None,
                   sf_para_dict=None):
        """
        Adversarial training and evaluation
        :param data_dict:
        :param eval_dict:
        :param ad_para_dict:
        :param sf_para_dict:
        :return:
        """
        self.check_consistency(data_dict, eval_dict)
        self.display_information(data_dict, model_para_dict=ad_para_dict)
        self.setup_eval(data_dict,
                        eval_dict,
                        sf_para_dict,
                        model_para_dict=ad_para_dict)

        model_id = ad_para_dict['model_id']
        fold_num = data_dict['fold_num']
        # for quick access of common evaluation settings
        epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided']
        vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[
            'log_step'], eval_dict['cutoffs']
        do_vali, do_summary = eval_dict['do_validation'], eval_dict[
            'do_summary']

        if sf_para_dict['id'] == 'ffnns':
            sf_para_dict['ffnns'].update(
                dict(num_features=data_dict['num_features']))
        else:
            raise NotImplementedError

        ad_machine = self.get_ad_machine(eval_dict=eval_dict,
                                         data_dict=data_dict,
                                         sf_para_dict=sf_para_dict,
                                         ad_para_dict=ad_para_dict)

        time_begin = datetime.datetime.now()  # timing
        g_l2r_cv_avg_scores, d_l2r_cv_avg_scores = np.zeros(
            len(cutoffs)), np.zeros(len(cutoffs))  # fold average

        for fold_k in range(1, fold_num + 1):
            dict_buffer = dict()  # for buffering frequently used objs
            ad_machine.reset_generator_discriminator()

            fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)])

            train_data, test_data, vali_data = self.load_data(
                eval_dict, data_dict, fold_k)

            if do_vali: g_fold_optimal_ndcgk, d_fold_optimal_ndcgk = 0.0, 0.0
            if do_summary:
                list_epoch_loss = []  # not used yet
                g_list_fold_k_train_eval_track, g_list_fold_k_test_eval_track, g_list_fold_k_vali_eval_track = [], [], []
                d_list_fold_k_train_eval_track, d_list_fold_k_test_eval_track, d_list_fold_k_vali_eval_track = [], [], []

            for _ in range(10):
                ad_machine.burn_in(train_data=train_data)

            for epoch_k in range(1, epochs + 1):

                if model_id == 'IR_GMAN_List':
                    stop_training = ad_machine.mini_max_train(
                        train_data=train_data,
                        generator=ad_machine.generator,
                        pool_discriminator=ad_machine.pool_discriminator,
                        dict_buffer=dict_buffer)

                    g_ranker = ad_machine.get_generator()
                    d_ranker = ad_machine.pool_discriminator[0]
                else:
                    stop_training = ad_machine.mini_max_train(
                        train_data=train_data,
                        generator=ad_machine.generator,
                        discriminator=ad_machine.discriminator,
                        dict_buffer=dict_buffer)

                    g_ranker = ad_machine.get_generator()
                    d_ranker = ad_machine.get_discriminator()

                if stop_training:
                    print('training is failed !')
                    break

                if (do_summary
                        or do_vali) and (epoch_k % log_step == 0
                                         or epoch_k == 1):  # stepwise check
                    if do_vali:
                        g_vali_eval_tmp = ndcg_at_k(
                            ranker=g_ranker,
                            test_data=vali_data,
                            k=vali_k,
                            multi_level_rele=self.data_setting.
                            data_dict['multi_level_rele'],
                            batch_mode=True)
                        d_vali_eval_tmp = ndcg_at_k(
                            ranker=d_ranker,
                            test_data=vali_data,
                            k=vali_k,
                            multi_level_rele=self.data_setting.
                            data_dict['multi_level_rele'],
                            batch_mode=True)
                        g_vali_eval_v, d_vali_eval_v = g_vali_eval_tmp.data.numpy(
                        ), d_vali_eval_tmp.data.numpy()

                        if epoch_k > 1:
                            g_buffer, g_tmp_metric_val, g_tmp_epoch = \
                                self.per_epoch_validation(ranker=g_ranker, curr_metric_val=g_vali_eval_v,
                                                          fold_optimal_metric_val=g_fold_optimal_ndcgk, curr_epoch=epoch_k,
                                                          id_str='G', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs)
                            # observe better performance
                            if g_buffer:
                                g_fold_optimal_ndcgk, g_fold_optimal_epoch_val = g_tmp_metric_val, g_tmp_epoch

                            d_buffer, d_tmp_metric_val, d_tmp_epoch = \
                                self.per_epoch_validation(ranker=d_ranker, curr_metric_val=d_vali_eval_v,
                                                          fold_optimal_metric_val=d_fold_optimal_ndcgk, curr_epoch=epoch_k,
                                                          id_str='D', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs)
                            if d_buffer:
                                d_fold_optimal_ndcgk, d_fold_optimal_epoch_val = d_tmp_metric_val, d_tmp_epoch

                    if do_summary:  # summarize per-step performance w.r.t. train, test
                        self.per_epoch_summary_step1(
                            ranker=g_ranker,
                            train_data=train_data,
                            test_data=test_data,
                            list_fold_k_train_eval_track=
                            g_list_fold_k_train_eval_track,
                            list_fold_k_test_eval_track=
                            g_list_fold_k_test_eval_track,
                            vali_eval_v=g_vali_eval_v,
                            list_fold_k_vali_eval_track=
                            g_list_fold_k_vali_eval_track,
                            cutoffs=cutoffs,
                            do_vali=do_vali)

                        self.per_epoch_summary_step1(
                            ranker=d_ranker,
                            train_data=train_data,
                            test_data=test_data,
                            list_fold_k_train_eval_track=
                            d_list_fold_k_train_eval_track,
                            list_fold_k_test_eval_track=
                            d_list_fold_k_test_eval_track,
                            vali_eval_v=d_vali_eval_v,
                            list_fold_k_vali_eval_track=
                            d_list_fold_k_vali_eval_track,
                            cutoffs=cutoffs,
                            do_vali=do_vali)

            if do_summary:
                self.per_epoch_summary_step2(
                    id_str='G',
                    fold_k=fold_k,
                    list_fold_k_train_eval_track=g_list_fold_k_train_eval_track,
                    list_fold_k_test_eval_track=g_list_fold_k_test_eval_track,
                    do_vali=do_vali,
                    list_fold_k_vali_eval_track=g_list_fold_k_vali_eval_track)

                self.per_epoch_summary_step2(
                    id_str='D',
                    fold_k=fold_k,
                    list_fold_k_train_eval_track=d_list_fold_k_train_eval_track,
                    list_fold_k_test_eval_track=d_list_fold_k_test_eval_track,
                    do_vali=do_vali,
                    list_fold_k_vali_eval_track=d_list_fold_k_vali_eval_track)

            if do_vali:  # using the fold-wise optimal model for later testing based on validation data #
                g_buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(g_fold_optimal_epoch_val), 'G']) + '.pkl'
                g_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                              g_buffered_model)
                g_fold_optimal_ranker = g_ranker

                d_buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(d_fold_optimal_epoch_val), 'D']) + '.pkl'
                d_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                              d_buffered_model)
                d_fold_optimal_ranker = d_ranker

            else:  # using default G # buffer the model after a fixed number of training-epoches if no validation is deployed
                g_ranker.save(
                    dir=self.dir_run + fold_optimal_checkpoint + '/',
                    name='_'.join(['net_params_epoch',
                                   str(epoch_k), 'G']) + '.pkl')
                g_fold_optimal_ranker = g_ranker

                d_ranker.save(
                    dir=self.dir_run + fold_optimal_checkpoint + '/',
                    name='_'.join(['net_params_epoch',
                                   str(epoch_k), 'D']) + '.pkl')
                d_fold_optimal_ranker = d_ranker

            g_torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=g_fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                multi_level_rele=self.data_setting.
                data_dict['multi_level_rele'],
                batch_mode=True)
            g_fold_ndcg_ks = g_torch_fold_ndcg_ks.data.numpy()

            d_torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=d_fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                multi_level_rele=self.data_setting.
                data_dict['multi_level_rele'],
                batch_mode=True)
            d_fold_ndcg_ks = d_torch_fold_ndcg_ks.data.numpy()

            performance_list = [' Fold-' + str(fold_k)
                                ]  # fold-wise performance
            performance_list.append('Generator')
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, g_fold_ndcg_ks[i]))

            performance_list.append('\nDiscriminator')
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, d_fold_ndcg_ks[i]))

            performance_str = '\t'.join(performance_list)
            print('\t', performance_str)

            g_l2r_cv_avg_scores = np.add(
                g_l2r_cv_avg_scores,
                g_fold_ndcg_ks)  # sum for later cv-performance
            d_l2r_cv_avg_scores = np.add(d_l2r_cv_avg_scores, d_fold_ndcg_ks)

        time_end = datetime.datetime.now()  # overall timing
        elapsed_time_str = str(time_end - time_begin)
        print('Elapsed time:\t', elapsed_time_str + "\n\n")

        # begin to print either cv or average performance
        g_l2r_cv_avg_scores = np.divide(g_l2r_cv_avg_scores, fold_num)
        d_l2r_cv_avg_scores = np.divide(d_l2r_cv_avg_scores, fold_num)

        if do_vali:
            eval_prefix = str(fold_num) + '-fold cross validation scores:'
        else:
            eval_prefix = str(fold_num) + '-fold average scores:'

        print(
            'Generator', eval_prefix,
            metric_results_to_string(list_scores=g_l2r_cv_avg_scores,
                                     list_cutoffs=cutoffs))
        print(
            'Discriminator', eval_prefix,
            metric_results_to_string(list_scores=d_l2r_cv_avg_scores,
                                     list_cutoffs=cutoffs))