Exemplo n.º 1
0
    def fold_evaluation(self, ranker, test_data, max_label, fold_k, model_id):
        avg_ndcg_at_ks, avg_nerr_at_ks, avg_ap_at_ks, avg_p_at_ks = \
            ranker.adhoc_performance_at_ks(test_data=test_data, ks=self.cutoffs, device='cpu', max_label=max_label)
        fold_ndcg_ks = avg_ndcg_at_ks.data.numpy()
        fold_nerr_ks = avg_nerr_at_ks.data.numpy()
        fold_ap_ks = avg_ap_at_ks.data.numpy()
        fold_p_ks = avg_p_at_ks.data.numpy()

        self.ndcg_cv_avg_scores = np.add(self.ndcg_cv_avg_scores, fold_ndcg_ks)
        self.nerr_cv_avg_scores = np.add(self.nerr_cv_avg_scores, fold_nerr_ks)
        self.ap_cv_avg_scores = np.add(self.ap_cv_avg_scores, fold_ap_ks)
        self.p_cv_avg_scores = np.add(self.p_cv_avg_scores, fold_p_ks)

        list_metric_strs = []
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_ndcg_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='nDCG'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_nerr_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='nERR'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_ap_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='AP'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_p_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='P'))
        metric_string = '\n\t'.join(list_metric_strs)
        print("\n{} on Fold - {}\n\t{}".format(model_id, str(fold_k),
                                               metric_string))
Exemplo n.º 2
0
    def fold_evaluation_reproduce(self,
                                  ranker,
                                  test_data,
                                  dir_run,
                                  max_label,
                                  fold_k,
                                  model_id,
                                  device='cpu'):
        self.dir_run = dir_run
        subdir = '-'.join(['Fold', str(fold_k)])
        run_fold_k_dir = os.path.join(dir_run, subdir)
        fold_k_buffered_model_names = os.listdir(run_fold_k_dir)
        fold_opt_model_name = get_opt_model(fold_k_buffered_model_names)
        fold_opt_model = os.path.join(run_fold_k_dir, fold_opt_model_name)
        ranker.load(file_model=fold_opt_model, device=device)

        avg_ndcg_at_ks, avg_nerr_at_ks, avg_ap_at_ks, avg_p_at_ks, list_per_q_ndcg, list_per_q_nerr, list_per_q_ap,\
        list_per_q_p = ranker.adhoc_performance_at_ks(test_data=test_data, ks=self.cutoffs, device='cpu',
                                                      max_label=max_label, need_per_q=True)
        fold_ndcg_ks = avg_ndcg_at_ks.data.numpy()
        fold_nerr_ks = avg_nerr_at_ks.data.numpy()
        fold_ap_ks = avg_ap_at_ks.data.numpy()
        fold_p_ks = avg_p_at_ks.data.numpy()

        self.list_per_q_p.extend(list_per_q_p)
        self.list_per_q_ap.extend(list_per_q_ap)
        self.list_per_q_nerr.extend(list_per_q_nerr)
        self.list_per_q_ndcg.extend(list_per_q_ndcg)

        self.ndcg_cv_avg_scores = np.add(self.ndcg_cv_avg_scores, fold_ndcg_ks)
        self.nerr_cv_avg_scores = np.add(self.nerr_cv_avg_scores, fold_nerr_ks)
        self.ap_cv_avg_scores = np.add(self.ap_cv_avg_scores, fold_ap_ks)
        self.p_cv_avg_scores = np.add(self.p_cv_avg_scores, fold_p_ks)

        list_metric_strs = []
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_ndcg_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='nDCG'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_nerr_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='nERR'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_ap_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='AP'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_p_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='P'))
        metric_string = '\n\t'.join(list_metric_strs)
        print("\n{} on Fold - {}\n\t{}".format(model_id, str(fold_k),
                                               metric_string))
Exemplo n.º 3
0
    def log_max(self,
                data_dict=None,
                max_cv_avg_scores=None,
                sf_para_dict=None,
                eval_dict=None,
                log_para_str=None):
        ''' Log the best performance across grid search and the corresponding setting '''
        dir_root, cutoffs = eval_dict['dir_root'], eval_dict['cutoffs']
        data_id = data_dict['data_id']

        sf_str = self.sf_parameter.to_para_string(log=True)

        data_eval_str = self.data_setting.to_data_setting_string(
            log=True) + '\n' + self.eval_setting.to_eval_setting_string(
                log=True)

        with open(file=dir_root + '/' +
                  '_'.join([data_id, sf_para_dict['sf_id'], 'max.txt']),
                  mode='w') as max_writer:
            max_writer.write('\n\n'.join([
                data_eval_str, sf_str, log_para_str,
                metric_results_to_string(max_cv_avg_scores,
                                         cutoffs,
                                         metric='aNDCG')
            ]))
Exemplo n.º 4
0
    def kfold_cv_eval(self,
                      data_dict=None,
                      eval_dict=None,
                      sf_para_dict=None,
                      model_para_dict=None):
        """
        Evaluation learning-to-rank methods via k-fold cross validation if there are k folds, otherwise one fold.
        :param data_dict:       settings w.r.t. data
        :param eval_dict:       settings w.r.t. evaluation
        :param sf_para_dict:    settings w.r.t. scoring function
        :param model_para_dict: settings w.r.t. the ltr_adhoc model
        :return:
        """
        self.display_information(data_dict, model_para_dict)
        self.check_consistency(data_dict, eval_dict, sf_para_dict)
        self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict)

        model_id = model_para_dict['model_id']
        fold_num = data_dict['fold_num']
        # for quick access of common evaluation settings
        epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided']
        vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[
            'log_step'], eval_dict['cutoffs']
        do_vali, do_summary = eval_dict['do_validation'], eval_dict[
            'do_summary']

        ranker = self.load_ranker(model_para_dict=model_para_dict,
                                  sf_para_dict=sf_para_dict)

        time_begin = datetime.datetime.now()  # timing
        l2r_cv_avg_scores = np.zeros(len(cutoffs))  # fold average

        for fold_k in range(1, fold_num + 1):  # evaluation over k-fold data
            ranker.reset_parameters(
            )  # reset with the same random initialization

            train_data, test_data, vali_data = self.load_data(
                eval_dict, data_dict, fold_k)

            if do_vali: fold_optimal_ndcgk = 0.0
            if do_summary:                list_epoch_loss, list_fold_k_train_eval_track, list_fold_k_test_eval_track, list_fold_k_vali_eval_track = [], [], [], []
            if not do_vali and loss_guided:
                first_round = True
                threshold_epoch_loss = torch.cuda.FloatTensor([
                    10000000.0
                ]) if self.gpu else torch.FloatTensor([10000000.0])

            for epoch_k in range(1, epochs + 1):
                torch_fold_k_epoch_k_loss, stop_training = self.train_ranker(
                    ranker=ranker,
                    train_data=train_data,
                    model_para_dict=model_para_dict,
                    epoch_k=epoch_k)

                ranker.scheduler.step(
                )  # adaptive learning rate with step_size=40, gamma=0.5

                if stop_training:
                    print('training is failed !')
                    break

                if (do_summary
                        or do_vali) and (epoch_k % log_step == 0
                                         or epoch_k == 1):  # stepwise check
                    if do_vali:  # per-step validation score
                        vali_eval_tmp = ndcg_at_k(ranker=ranker,
                                                  test_data=vali_data,
                                                  k=vali_k,
                                                  gpu=self.gpu,
                                                  device=self.device,
                                                  label_type=self.data_setting.
                                                  data_dict['label_type'])
                        vali_eval_v = vali_eval_tmp.data.numpy()
                        if epoch_k > 1:  # further validation comparison
                            curr_vali_ndcg = vali_eval_v
                            if (curr_vali_ndcg > fold_optimal_ndcgk) or (
                                    epoch_k == epochs
                                    and curr_vali_ndcg == fold_optimal_ndcgk
                            ):  # we need at least a reference, in case all zero
                                print('\t', epoch_k,
                                      '- nDCG@{} - '.format(vali_k),
                                      curr_vali_ndcg)
                                fold_optimal_ndcgk = curr_vali_ndcg
                                fold_optimal_checkpoint = '-'.join(
                                    ['Fold', str(fold_k)])
                                fold_optimal_epoch_val = epoch_k
                                ranker.save(
                                    dir=self.dir_run +
                                    fold_optimal_checkpoint + '/',
                                    name='_'.join(
                                        ['net_params_epoch',
                                         str(epoch_k)]) +
                                    '.pkl')  # buffer currently optimal model
                            else:
                                print('\t\t', epoch_k,
                                      '- nDCG@{} - '.format(vali_k),
                                      curr_vali_ndcg)

                    if do_summary:  # summarize per-step performance w.r.t. train, test
                        fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks(
                            ranker=ranker,
                            test_data=train_data,
                            ks=cutoffs,
                            gpu=self.gpu,
                            device=self.device,
                            label_type=self.data_setting.
                            data_dict['label_type'])
                        np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu(
                        ).numpy(
                        ) if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy(
                        )
                        list_fold_k_train_eval_track.append(
                            np_fold_k_epoch_k_train_ndcg_ks)

                        fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks(
                            ranker=ranker,
                            test_data=test_data,
                            ks=cutoffs,
                            gpu=self.gpu,
                            device=self.device,
                            label_type=self.data_setting.
                            data_dict['label_type'])
                        np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu(
                        ).numpy(
                        ) if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy(
                        )
                        list_fold_k_test_eval_track.append(
                            np_fold_k_epoch_k_test_ndcg_ks)

                        fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu(
                        ).numpy(
                        ) if self.gpu else torch_fold_k_epoch_k_loss.data.numpy(
                        )
                        list_epoch_loss.append(fold_k_epoch_k_loss)

                        if do_vali:
                            list_fold_k_vali_eval_track.append(vali_eval_v)

                elif loss_guided:  # stopping check via epoch-loss
                    if first_round and torch_fold_k_epoch_k_loss >= threshold_epoch_loss:
                        print('Bad threshold: ', torch_fold_k_epoch_k_loss,
                              threshold_epoch_loss)

                    if torch_fold_k_epoch_k_loss < threshold_epoch_loss:
                        first_round = False
                        print('\tFold-', str(fold_k), ' Epoch-', str(epoch_k),
                              'Loss: ', torch_fold_k_epoch_k_loss)
                        threshold_epoch_loss = torch_fold_k_epoch_k_loss
                    else:
                        print('\tStopped according epoch-loss!',
                              torch_fold_k_epoch_k_loss, threshold_epoch_loss)
                        break

            if do_summary:  # track
                sy_prefix = '_'.join(['Fold', str(fold_k)])
                fold_k_train_eval = np.vstack(list_fold_k_train_eval_track)
                fold_k_test_eval = np.vstack(list_fold_k_test_eval_track)
                pickle_save(fold_k_train_eval,
                            file=self.dir_run +
                            '_'.join([sy_prefix, 'train_eval.np']))
                pickle_save(fold_k_test_eval,
                            file=self.dir_run +
                            '_'.join([sy_prefix, 'test_eval.np']))

                fold_k_epoch_loss = np.hstack(list_epoch_loss)
                pickle_save(
                    (fold_k_epoch_loss, train_data.__len__()),
                    file=self.dir_run + '_'.join([sy_prefix, 'epoch_loss.np']))
                if do_vali:
                    fold_k_vali_eval = np.hstack(list_fold_k_vali_eval_track)
                    pickle_save(fold_k_vali_eval,
                                file=self.dir_run +
                                '_'.join([sy_prefix, 'vali_eval.np']))

            if do_vali:  # using the fold-wise optimal model for later testing based on validation data
                buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(fold_optimal_epoch_val)]) + '.pkl'
                ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                            buffered_model)
                fold_optimal_ranker = ranker
            else:  # buffer the model after a fixed number of training-epoches if no validation is deployed
                fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)])
                ranker.save(dir=self.dir_run + fold_optimal_checkpoint + '/',
                            name='_'.join(['net_params_epoch',
                                           str(epoch_k)]) + '.pkl')
                fold_optimal_ranker = ranker

            torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                gpu=self.gpu,
                device=self.device,
                label_type=self.data_setting.data_dict['label_type'])
            fold_ndcg_ks = torch_fold_ndcg_ks.data.numpy()

            performance_list = [model_id + ' Fold-' + str(fold_k)
                                ]  # fold-wise performance
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, fold_ndcg_ks[i]))
            performance_str = '\t'.join(performance_list)
            print('\t', performance_str)

            l2r_cv_avg_scores = np.add(
                l2r_cv_avg_scores,
                fold_ndcg_ks)  # sum for later cv-performance

        time_end = datetime.datetime.now()  # overall timing
        elapsed_time_str = str(time_end - time_begin)
        print('Elapsed time:\t', elapsed_time_str + "\n\n")

        l2r_cv_avg_scores = np.divide(l2r_cv_avg_scores, fold_num)
        eval_prefix = str(
            fold_num) + '-fold cross validation scores:' if do_vali else str(
                fold_num) + '-fold average scores:'
        print(model_id, eval_prefix,
              metric_results_to_string(list_scores=l2r_cv_avg_scores,
                                       list_cutoffs=cutoffs)
              )  # print either cv or average performance

        return l2r_cv_avg_scores
Exemplo n.º 5
0
    def get_cv_performance(self):
        time_end = datetime.datetime.now()  # overall timing
        elapsed_time_str = str(time_end - self.time_begin)

        ndcg_cv_avg_scores = np.divide(self.ndcg_cv_avg_scores, self.fold_num)
        nerr_cv_avg_scores = np.divide(self.nerr_cv_avg_scores, self.fold_num)
        ap_cv_avg_scores = np.divide(self.ap_cv_avg_scores, self.fold_num)
        p_cv_avg_scores = np.divide(self.p_cv_avg_scores, self.fold_num)

        eval_prefix = str(self.fold_num) + '-fold cross validation scores:' if self.do_validation \
                      else str(self.fold_num) + '-fold average scores:'

        list_metric_strs = []
        list_metric_strs.append(
            metric_results_to_string(list_scores=ndcg_cv_avg_scores,
                                     list_cutoffs=self.cutoffs,
                                     metric='nDCG'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=nerr_cv_avg_scores,
                                     list_cutoffs=self.cutoffs,
                                     metric='nERR'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=ap_cv_avg_scores,
                                     list_cutoffs=self.cutoffs,
                                     metric='AP'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=p_cv_avg_scores,
                                     list_cutoffs=self.cutoffs,
                                     metric='P'))
        metric_string = '\n'.join(list_metric_strs)
        print("\n{} {}\n{}".format(self.model_id, eval_prefix, metric_string))
        print('Elapsed time:\t', elapsed_time_str + "\n\n")

        if self.reproduce:
            torch_mat_per_q_p = torch.cat(self.list_per_q_p, dim=0)
            torch_mat_per_q_ap = torch.cat(self.list_per_q_ap, dim=0)
            torch_mat_per_q_nerr = torch.cat(self.list_per_q_nerr, dim=0)
            torch_mat_per_q_ndcg = torch.cat(self.list_per_q_ndcg, dim=0)
            #print('torch_mat_per_q_ndcg', torch_mat_per_q_ndcg.size())
            mat_per_q_p = torch_mat_per_q_p.data.numpy()
            mat_per_q_ap = torch_mat_per_q_ap.data.numpy()
            mat_per_q_nerr = torch_mat_per_q_nerr.data.numpy()
            mat_per_q_ndcg = torch_mat_per_q_ndcg.data.numpy()

            pickle_save(target=mat_per_q_p,
                        file=self.dir_run +
                        '_'.join([self.model_id, 'all_fold_p_at_ks_per_q.np']))
            pickle_save(
                target=mat_per_q_ap,
                file=self.dir_run +
                '_'.join([self.model_id, 'all_fold_ap_at_ks_per_q.np']))
            pickle_save(
                target=mat_per_q_nerr,
                file=self.dir_run +
                '_'.join([self.model_id, 'all_fold_nerr_at_ks_per_q.np']))
            pickle_save(
                target=mat_per_q_ndcg,
                file=self.dir_run +
                '_'.join([self.model_id, 'all_fold_ndcg_at_ks_per_q.np']))

        return ndcg_cv_avg_scores
Exemplo n.º 6
0
    def ad_cv_eval(self,
                   data_dict=None,
                   eval_dict=None,
                   ad_para_dict=None,
                   sf_para_dict=None):
        """
        Adversarial training and evaluation
        :param data_dict:
        :param eval_dict:
        :param ad_para_dict:
        :param sf_para_dict:
        :return:
        """
        self.check_consistency(data_dict, eval_dict)
        self.display_information(data_dict, model_para_dict=ad_para_dict)
        self.setup_eval(data_dict,
                        eval_dict,
                        sf_para_dict,
                        model_para_dict=ad_para_dict)

        model_id = ad_para_dict['model_id']
        fold_num = data_dict['fold_num']
        # for quick access of common evaluation settings
        epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided']
        vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[
            'log_step'], eval_dict['cutoffs']
        do_vali, do_summary = eval_dict['do_validation'], eval_dict[
            'do_summary']

        if sf_para_dict['id'] == 'ffnns':
            sf_para_dict['ffnns'].update(
                dict(num_features=data_dict['num_features']))
        else:
            raise NotImplementedError

        ad_machine = self.get_ad_machine(eval_dict=eval_dict,
                                         data_dict=data_dict,
                                         sf_para_dict=sf_para_dict,
                                         ad_para_dict=ad_para_dict)

        time_begin = datetime.datetime.now()  # timing
        g_l2r_cv_avg_scores, d_l2r_cv_avg_scores = np.zeros(
            len(cutoffs)), np.zeros(len(cutoffs))  # fold average

        for fold_k in range(1, fold_num + 1):
            dict_buffer = dict()  # for buffering frequently used objs
            ad_machine.reset_generator_discriminator()

            fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)])

            train_data, test_data, vali_data = self.load_data(
                eval_dict, data_dict, fold_k)

            if do_vali: g_fold_optimal_ndcgk, d_fold_optimal_ndcgk = 0.0, 0.0
            if do_summary:
                list_epoch_loss = []  # not used yet
                g_list_fold_k_train_eval_track, g_list_fold_k_test_eval_track, g_list_fold_k_vali_eval_track = [], [], []
                d_list_fold_k_train_eval_track, d_list_fold_k_test_eval_track, d_list_fold_k_vali_eval_track = [], [], []

            for _ in range(10):
                ad_machine.burn_in(train_data=train_data)

            for epoch_k in range(1, epochs + 1):

                if model_id == 'IR_GMAN_List':
                    stop_training = ad_machine.mini_max_train(
                        train_data=train_data,
                        generator=ad_machine.generator,
                        pool_discriminator=ad_machine.pool_discriminator,
                        dict_buffer=dict_buffer)

                    g_ranker = ad_machine.get_generator()
                    d_ranker = ad_machine.pool_discriminator[0]
                else:
                    stop_training = ad_machine.mini_max_train(
                        train_data=train_data,
                        generator=ad_machine.generator,
                        discriminator=ad_machine.discriminator,
                        dict_buffer=dict_buffer)

                    g_ranker = ad_machine.get_generator()
                    d_ranker = ad_machine.get_discriminator()

                if stop_training:
                    print('training is failed !')
                    break

                if (do_summary
                        or do_vali) and (epoch_k % log_step == 0
                                         or epoch_k == 1):  # stepwise check
                    if do_vali:
                        g_vali_eval_tmp = ndcg_at_k(
                            ranker=g_ranker,
                            test_data=vali_data,
                            k=vali_k,
                            multi_level_rele=self.data_setting.
                            data_dict['multi_level_rele'],
                            batch_mode=True)
                        d_vali_eval_tmp = ndcg_at_k(
                            ranker=d_ranker,
                            test_data=vali_data,
                            k=vali_k,
                            multi_level_rele=self.data_setting.
                            data_dict['multi_level_rele'],
                            batch_mode=True)
                        g_vali_eval_v, d_vali_eval_v = g_vali_eval_tmp.data.numpy(
                        ), d_vali_eval_tmp.data.numpy()

                        if epoch_k > 1:
                            g_buffer, g_tmp_metric_val, g_tmp_epoch = \
                                self.per_epoch_validation(ranker=g_ranker, curr_metric_val=g_vali_eval_v,
                                                          fold_optimal_metric_val=g_fold_optimal_ndcgk, curr_epoch=epoch_k,
                                                          id_str='G', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs)
                            # observe better performance
                            if g_buffer:
                                g_fold_optimal_ndcgk, g_fold_optimal_epoch_val = g_tmp_metric_val, g_tmp_epoch

                            d_buffer, d_tmp_metric_val, d_tmp_epoch = \
                                self.per_epoch_validation(ranker=d_ranker, curr_metric_val=d_vali_eval_v,
                                                          fold_optimal_metric_val=d_fold_optimal_ndcgk, curr_epoch=epoch_k,
                                                          id_str='D', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs)
                            if d_buffer:
                                d_fold_optimal_ndcgk, d_fold_optimal_epoch_val = d_tmp_metric_val, d_tmp_epoch

                    if do_summary:  # summarize per-step performance w.r.t. train, test
                        self.per_epoch_summary_step1(
                            ranker=g_ranker,
                            train_data=train_data,
                            test_data=test_data,
                            list_fold_k_train_eval_track=
                            g_list_fold_k_train_eval_track,
                            list_fold_k_test_eval_track=
                            g_list_fold_k_test_eval_track,
                            vali_eval_v=g_vali_eval_v,
                            list_fold_k_vali_eval_track=
                            g_list_fold_k_vali_eval_track,
                            cutoffs=cutoffs,
                            do_vali=do_vali)

                        self.per_epoch_summary_step1(
                            ranker=d_ranker,
                            train_data=train_data,
                            test_data=test_data,
                            list_fold_k_train_eval_track=
                            d_list_fold_k_train_eval_track,
                            list_fold_k_test_eval_track=
                            d_list_fold_k_test_eval_track,
                            vali_eval_v=d_vali_eval_v,
                            list_fold_k_vali_eval_track=
                            d_list_fold_k_vali_eval_track,
                            cutoffs=cutoffs,
                            do_vali=do_vali)

            if do_summary:
                self.per_epoch_summary_step2(
                    id_str='G',
                    fold_k=fold_k,
                    list_fold_k_train_eval_track=g_list_fold_k_train_eval_track,
                    list_fold_k_test_eval_track=g_list_fold_k_test_eval_track,
                    do_vali=do_vali,
                    list_fold_k_vali_eval_track=g_list_fold_k_vali_eval_track)

                self.per_epoch_summary_step2(
                    id_str='D',
                    fold_k=fold_k,
                    list_fold_k_train_eval_track=d_list_fold_k_train_eval_track,
                    list_fold_k_test_eval_track=d_list_fold_k_test_eval_track,
                    do_vali=do_vali,
                    list_fold_k_vali_eval_track=d_list_fold_k_vali_eval_track)

            if do_vali:  # using the fold-wise optimal model for later testing based on validation data #
                g_buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(g_fold_optimal_epoch_val), 'G']) + '.pkl'
                g_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                              g_buffered_model)
                g_fold_optimal_ranker = g_ranker

                d_buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(d_fold_optimal_epoch_val), 'D']) + '.pkl'
                d_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                              d_buffered_model)
                d_fold_optimal_ranker = d_ranker

            else:  # using default G # buffer the model after a fixed number of training-epoches if no validation is deployed
                g_ranker.save(
                    dir=self.dir_run + fold_optimal_checkpoint + '/',
                    name='_'.join(['net_params_epoch',
                                   str(epoch_k), 'G']) + '.pkl')
                g_fold_optimal_ranker = g_ranker

                d_ranker.save(
                    dir=self.dir_run + fold_optimal_checkpoint + '/',
                    name='_'.join(['net_params_epoch',
                                   str(epoch_k), 'D']) + '.pkl')
                d_fold_optimal_ranker = d_ranker

            g_torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=g_fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                multi_level_rele=self.data_setting.
                data_dict['multi_level_rele'],
                batch_mode=True)
            g_fold_ndcg_ks = g_torch_fold_ndcg_ks.data.numpy()

            d_torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=d_fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                multi_level_rele=self.data_setting.
                data_dict['multi_level_rele'],
                batch_mode=True)
            d_fold_ndcg_ks = d_torch_fold_ndcg_ks.data.numpy()

            performance_list = [' Fold-' + str(fold_k)
                                ]  # fold-wise performance
            performance_list.append('Generator')
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, g_fold_ndcg_ks[i]))

            performance_list.append('\nDiscriminator')
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, d_fold_ndcg_ks[i]))

            performance_str = '\t'.join(performance_list)
            print('\t', performance_str)

            g_l2r_cv_avg_scores = np.add(
                g_l2r_cv_avg_scores,
                g_fold_ndcg_ks)  # sum for later cv-performance
            d_l2r_cv_avg_scores = np.add(d_l2r_cv_avg_scores, d_fold_ndcg_ks)

        time_end = datetime.datetime.now()  # overall timing
        elapsed_time_str = str(time_end - time_begin)
        print('Elapsed time:\t', elapsed_time_str + "\n\n")

        # begin to print either cv or average performance
        g_l2r_cv_avg_scores = np.divide(g_l2r_cv_avg_scores, fold_num)
        d_l2r_cv_avg_scores = np.divide(d_l2r_cv_avg_scores, fold_num)

        if do_vali:
            eval_prefix = str(fold_num) + '-fold cross validation scores:'
        else:
            eval_prefix = str(fold_num) + '-fold average scores:'

        print(
            'Generator', eval_prefix,
            metric_results_to_string(list_scores=g_l2r_cv_avg_scores,
                                     list_cutoffs=cutoffs))
        print(
            'Discriminator', eval_prefix,
            metric_results_to_string(list_scores=d_l2r_cv_avg_scores,
                                     list_cutoffs=cutoffs))
Exemplo n.º 7
0
    def fold_evaluation_reproduce(self, ranker, test_data, dir_run, max_label,
                                  fold_k, model_id):
        self.dir_run = dir_run
        subdir = '-'.join(['Fold', str(fold_k)])
        run_fold_k_dir = os.path.join(dir_run, subdir)
        fold_k_buffered_model_names = os.listdir(run_fold_k_dir)
        fold_opt_model_name = get_opt_model(fold_k_buffered_model_names)
        fold_opt_model = os.path.join(run_fold_k_dir, fold_opt_model_name)
        ranker.load(file_model=fold_opt_model)

        avg_andcg_at_ks, avg_err_ia_at_ks, avg_nerr_ia_at_ks, list_per_q_andcg = \
            ranker.srd_performance_at_ks(test_data=test_data, ks=self.cutoffs, device='cpu', max_label=max_label,
                                         generate_div_run=True, dir=run_fold_k_dir,fold_k=fold_k, need_per_q_andcg=True)

        fold_andcg_ks = avg_andcg_at_ks.data.numpy()
        fold_err_ia_ks = avg_err_ia_at_ks.data.numpy()
        fold_nerr_ia_ks = avg_nerr_ia_at_ks.data.numpy()
        self.list_per_q_andcg.extend(list_per_q_andcg)

        self.andcg_cv_avg_scores = np.add(self.andcg_cv_avg_scores,
                                          fold_andcg_ks)
        self.err_ia_cv_avg_scores = np.add(self.err_ia_cv_avg_scores,
                                           fold_err_ia_ks)
        self.nerr_ia_cv_avg_scores = np.add(self.nerr_ia_cv_avg_scores,
                                            fold_nerr_ia_ks)

        list_metric_strs = []
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_andcg_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='aNDCG'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_err_ia_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='ERR-IA'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=fold_nerr_ia_ks,
                                     list_cutoffs=self.cutoffs,
                                     metric='nERR-IA'))
        metric_string = '\n\t'.join(list_metric_strs)
        print("\n{} on Fold - {}\n\t{}".format(model_id, str(fold_k),
                                               metric_string))

        p_ndeval = subprocess.Popen([
            '../../ptranking/metric/srd/ndeval',
            '../../ptranking/metric/srd/WT_Div_0912_Implicit_qrels.txt',
            run_fold_k_dir + '/fold_run.txt'
        ],
                                    shell=False,
                                    stdout=subprocess.PIPE,
                                    bufsize=-1)
        output_eval_q = p_ndeval.communicate()
        #print(output_eval_q)
        output_eval_q = output_eval_q[-2].decode().split("\n")[-2]
        output_eval_q = output_eval_q.split(',')
        #print('output_eval_q\n', output_eval_q)
        err_ia_5, err_ia_10, err_ia_20 = float(output_eval_q[2]), float(
            output_eval_q[3]), float(output_eval_q[4])
        nerr_ia_5, nerr_ia_10, nerr_ia_20 = float(output_eval_q[5]), float(
            output_eval_q[6]), float(output_eval_q[7])
        andcg_5, andcg_10, andcg_20 = float(output_eval_q[11]), float(
            output_eval_q[12]), float(output_eval_q[13])

        ndeval_err_ia_ks = np.asarray([err_ia_5, err_ia_10, err_ia_20])
        ndeval_nerr_ia_ks = np.asarray([nerr_ia_5, nerr_ia_10, nerr_ia_20])
        ndeval_andcg_ks = np.asarray([andcg_5, andcg_10, andcg_20])

        self.ndeval_err_ia_cv_avg_scores = np.add(
            self.ndeval_err_ia_cv_avg_scores, ndeval_err_ia_ks)
        self.ndeval_nerr_ia_cv_avg_scores = np.add(
            self.ndeval_nerr_ia_cv_avg_scores, ndeval_nerr_ia_ks)
        self.ndeval_andcg_cv_avg_scores = np.add(
            self.ndeval_andcg_cv_avg_scores, ndeval_andcg_ks)

        list_metric_strs = []
        list_metric_strs.append(
            metric_results_to_string(list_scores=ndeval_andcg_ks,
                                     list_cutoffs=self.ndeval_cutoffs,
                                     metric='aNDCG(ndeval)'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=ndeval_err_ia_ks,
                                     list_cutoffs=self.ndeval_cutoffs,
                                     metric='ERR-IA(ndeval)'))
        list_metric_strs.append(
            metric_results_to_string(list_scores=ndeval_nerr_ia_ks,
                                     list_cutoffs=self.ndeval_cutoffs,
                                     metric='nERR-IA(ndeval)'))
        metric_string = '\n\t'.join(list_metric_strs)
        print("\n{} on Fold - {} (ndeval)\n\t{}".format(
            model_id, str(fold_k), metric_string))