def set_data_setting(self, data_json=None, debug=False, data_id=None, dir_data=None): if data_json is not None: self.data_setting = DataSetting(data_json=data_json) else: self.data_setting = DataSetting(debug=debug, data_id=data_id, dir_data=dir_data)
class LTREvaluator(): """ The class for evaluating different ltr_adhoc methods. """ def __init__(self, frame_id=LTRFRAME_TYPE.Adhoc, cuda=None): self.frame_id = frame_id if cuda is None: self.gpu, self.device = False, 'cpu' else: self.gpu, self.device = True, 'cuda:' + str(cuda) torch.cuda.set_device(cuda) def display_information(self, data_dict, model_para_dict): """ Display some information. :param data_dict: :param model_para_dict: :return: """ if self.gpu: print('-- GPU({}) is launched --'.format(self.device)) print(' '.join([ '\nStart {} on {} >>>'.format(model_para_dict['model_id'], data_dict['data_id']) ])) def check_consistency(self, data_dict, eval_dict, sf_para_dict): """ Check whether the settings are reasonable in the context of adhoc learning-to-rank """ ''' Part-1: data loading ''' if data_dict['data_id'] == 'Istella': assert eval_dict[ 'do_validation'] is not True # since there is no validation data if data_dict['data_id'] in MSLETOR_SEMI: assert data_dict[ 'train_presort'] is not True # due to the non-labeled documents if data_dict[ 'binary_rele']: # for unsupervised dataset, it is required for binarization due to '-1' labels assert data_dict['unknown_as_zero'] else: assert data_dict[ 'unknown_as_zero'] is not True # since there is no non-labeled documents if data_dict[ 'data_id'] in MSLETOR_LIST: # for which the standard ltr_adhoc of each query is unique assert 1 == data_dict['train_batch_size'] if data_dict['scale_data']: scaler_level = data_dict[ 'scaler_level'] if 'scaler_level' in data_dict else None assert not scaler_level == 'DATASET' # not supported setting assert data_dict[ 'validation_presort'] # Rule of thumb setting for adhoc learning-to-rank assert data_dict[ 'test_presort'] # Rule of thumb setting for adhoc learning-to-rank assert 1 == data_dict[ 'validation_batch_size'] # Rule of thumb setting for adhoc learning-to-rank assert 1 == data_dict[ 'test_batch_size'] # Rule of thumb setting for adhoc learning-to-rank ''' Part-2: evaluation setting ''' if eval_dict[ 'mask_label']: # True is aimed to use supervised data to mimic semi-supervised data by masking assert not data_dict['data_id'] in MSLETOR_SEMI ''' Part-1: network setting ''' if data_dict['train_batch_size'] > 1: assert sf_para_dict['one_fits_all'][ 'BN'] == False # a kind of feature normalization def determine_files(self, data_dict, fold_k=None): """ Determine the file path correspondingly. :param data_dict: :param fold_k: :return: """ if data_dict['data_id'] in YAHOO_LTR: file_train, file_vali, file_test = os.path.join(data_dict['dir_data'], data_dict['data_id'].lower() + '.train.txt'),\ os.path.join(data_dict['dir_data'], data_dict['data_id'].lower() + '.valid.txt'),\ os.path.join(data_dict['dir_data'], data_dict['data_id'].lower() + '.test.txt') elif data_dict['data_id'] in ISTELLA_LTR: if data_dict['data_id'] == 'Istella_X' or data_dict[ 'data_id'] == 'Istella_S': file_train, file_vali, file_test = data_dict[ 'dir_data'] + 'train.txt', data_dict[ 'dir_data'] + 'vali.txt', data_dict[ 'dir_data'] + 'test.txt' else: file_vali = None file_train, file_test = data_dict[ 'dir_data'] + 'train.txt', data_dict[ 'dir_data'] + 'test.txt' else: print('Fold-', fold_k) fold_k_dir = data_dict['dir_data'] + 'Fold' + str(fold_k) + '/' file_train, file_vali, file_test = fold_k_dir + 'train.txt', fold_k_dir + 'vali.txt', fold_k_dir + 'test.txt' return file_train, file_vali, file_test def load_data(self, eval_dict, data_dict, fold_k): """ Load the dataset correspondingly. :param eval_dict: :param data_dict: :param fold_k: :param model_para_dict: :return: """ file_train, file_vali, file_test = self.determine_files(data_dict, fold_k=fold_k) train_batch_size, train_presort = data_dict[ 'train_batch_size'], data_dict['train_presort'] input_eval_dict = eval_dict if eval_dict[ 'mask_label'] else None # required when enabling masking data train_data = LTRDataset(file=file_train, split_type=SPLIT_TYPE.Train, batch_size=train_batch_size, shuffle=True, presort=train_presort, data_dict=data_dict, eval_dict=input_eval_dict) test_data = LTRDataset(file=file_test, split_type=SPLIT_TYPE.Test, shuffle=False, data_dict=data_dict, batch_size=data_dict['test_batch_size']) if eval_dict['do_validation'] or eval_dict[ 'do_summary']: # vali_data is required vali_data = LTRDataset( file=file_vali, split_type=SPLIT_TYPE.Validation, shuffle=False, batch_size=data_dict['validation_batch_size'], data_dict=data_dict) else: vali_data = None return train_data, test_data, vali_data def load_ranker(self, sf_para_dict, model_para_dict): """ Load a ranker correspondingly :param sf_para_dict: :param model_para_dict: :param kwargs: :return: """ model_id = model_para_dict['model_id'] if model_id in ['RankMSE', 'ListNet', 'RankCosine']: ranker = globals()[model_id](sf_para_dict=sf_para_dict, gpu=self.gpu, device=self.device) elif model_id in [ 'RankNet', 'ListMLE', 'LambdaRank', 'STListNet', 'ApproxNDCG', 'DirectOpt', 'LambdaLoss', 'MarginLambdaLoss' ]: ranker = globals()[model_id](sf_para_dict=sf_para_dict, model_para_dict=model_para_dict, gpu=self.gpu, device=self.device) elif model_id == 'WassRank': ranker = WassRank(sf_para_dict=sf_para_dict, wass_para_dict=model_para_dict, dict_cost_mats=self.dict_cost_mats, dict_std_dists=self.dict_std_dists, gpu=self.gpu, device=self.device) else: raise NotImplementedError return ranker def setup_output(self, data_dict=None, eval_dict=None): """ Update output directory :param data_dict: :param eval_dict: :param sf_para_dict: :param model_para_dict: :return: """ model_id = self.model_parameter.model_id grid_search, do_vali, dir_output = eval_dict['grid_search'], eval_dict[ 'do_validation'], eval_dict['dir_output'] mask_label = eval_dict['mask_label'] if grid_search: dir_root = dir_output + '_'.join([ 'gpu', 'grid', model_id ]) + '/' if self.gpu else dir_output + '_'.join(['grid', model_id ]) + '/' else: dir_root = dir_output eval_dict['dir_root'] = dir_root if not os.path.exists(dir_root): os.makedirs(dir_root) sf_str = self.sf_parameter.to_para_string() data_eval_str = '_'.join([ self.data_setting.to_data_setting_string(), self.eval_setting.to_eval_setting_string() ]) if mask_label: data_eval_str = '_'.join([ data_eval_str, 'MaskLabel', 'Ratio', '{:,g}'.format(eval_dict['mask_ratio']) ]) file_prefix = '_'.join([model_id, 'SF', sf_str, data_eval_str]) if data_dict['scale_data']: if data_dict['scaler_level'] == 'QUERY': file_prefix = '_'.join( [file_prefix, 'QS', data_dict['scaler_id']]) else: file_prefix = '_'.join( [file_prefix, 'DS', data_dict['scaler_id']]) dir_run = dir_root + file_prefix + '/' # run-specific outputs model_para_string = self.model_parameter.to_para_string() if len(model_para_string) > 0: dir_run = dir_run + model_para_string + '/' eval_dict['dir_run'] = dir_run if not os.path.exists(dir_run): os.makedirs(dir_run) return dir_run def setup_eval(self, data_dict, eval_dict, sf_para_dict, model_para_dict): """ Finalize the evaluation setting correspondingly :param data_dict: :param eval_dict: :param sf_para_dict: :param model_para_dict: :return: """ # update data_meta given the debug mode if sf_para_dict['id'] == 'ffnns': sf_para_dict['ffnns'].update( dict(num_features=data_dict['num_features'])) else: raise NotImplementedError self.dir_run = self.setup_output(data_dict, eval_dict) if eval_dict['do_log']: sys.stdout = open(self.dir_run + 'log.txt', "w") #if self.do_summary: self.summary_writer = SummaryWriter(self.dir_run + 'summary') def log_max(self, data_dict=None, max_cv_avg_scores=None, sf_para_dict=None, eval_dict=None, log_para_str=None): ''' Log the best performance across grid search and the corresponding setting ''' dir_root, cutoffs = eval_dict['dir_root'], eval_dict['cutoffs'] data_id = data_dict['data_id'] sf_str = self.sf_parameter.to_para_string(log=True) data_eval_str = self.data_setting.to_data_setting_string( log=True) + '\n' + self.eval_setting.to_eval_setting_string( log=True) with open(file=dir_root + '/' + data_id + '_max.txt', mode='w') as max_writer: max_writer.write('\n\n'.join([ data_eval_str, sf_str, log_para_str, metric_results_to_string(max_cv_avg_scores, cutoffs) ])) def train_ranker(self, ranker, train_data, model_para_dict=None, epoch_k=None, reranking=False): ''' One-epoch train of the given ranker ''' presort, label_type = train_data.presort, train_data.label_type epoch_loss = torch.cuda.FloatTensor( [0.0]) if self.gpu else torch.FloatTensor([0.0]) for qid, batch_rankings, batch_stds in train_data: # size: _, [batch, ranking_size, num_features], [batch, ranking_size] if self.gpu: batch_rankings, batch_stds = batch_rankings.to( self.device), batch_stds.to(self.device) batch_loss, stop_training = ranker.train(batch_rankings, batch_stds, qid=qid, epoch_k=epoch_k, presort=presort, label_type=label_type) if stop_training: break else: epoch_loss += batch_loss.item() return epoch_loss, stop_training def kfold_cv_eval(self, data_dict=None, eval_dict=None, sf_para_dict=None, model_para_dict=None): """ Evaluation learning-to-rank methods via k-fold cross validation if there are k folds, otherwise one fold. :param data_dict: settings w.r.t. data :param eval_dict: settings w.r.t. evaluation :param sf_para_dict: settings w.r.t. scoring function :param model_para_dict: settings w.r.t. the ltr_adhoc model :return: """ self.display_information(data_dict, model_para_dict) self.check_consistency(data_dict, eval_dict, sf_para_dict) self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict) model_id = model_para_dict['model_id'] fold_num = data_dict['fold_num'] # for quick access of common evaluation settings epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided'] vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[ 'log_step'], eval_dict['cutoffs'] do_vali, do_summary = eval_dict['do_validation'], eval_dict[ 'do_summary'] ranker = self.load_ranker(model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) time_begin = datetime.datetime.now() # timing l2r_cv_avg_scores = np.zeros(len(cutoffs)) # fold average for fold_k in range(1, fold_num + 1): # evaluation over k-fold data ranker.reset_parameters( ) # reset with the same random initialization train_data, test_data, vali_data = self.load_data( eval_dict, data_dict, fold_k) if do_vali: fold_optimal_ndcgk = 0.0 if do_summary: list_epoch_loss, list_fold_k_train_eval_track, list_fold_k_test_eval_track, list_fold_k_vali_eval_track = [], [], [], [] if not do_vali and loss_guided: first_round = True threshold_epoch_loss = torch.cuda.FloatTensor([ 10000000.0 ]) if self.gpu else torch.FloatTensor([10000000.0]) for epoch_k in range(1, epochs + 1): torch_fold_k_epoch_k_loss, stop_training = self.train_ranker( ranker=ranker, train_data=train_data, model_para_dict=model_para_dict, epoch_k=epoch_k) ranker.scheduler.step( ) # adaptive learning rate with step_size=40, gamma=0.5 if stop_training: print('training is failed !') break if (do_summary or do_vali) and (epoch_k % log_step == 0 or epoch_k == 1): # stepwise check if do_vali: # per-step validation score vali_eval_tmp = ndcg_at_k(ranker=ranker, test_data=vali_data, k=vali_k, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) vali_eval_v = vali_eval_tmp.data.numpy() if epoch_k > 1: # further validation comparison curr_vali_ndcg = vali_eval_v if (curr_vali_ndcg > fold_optimal_ndcgk) or ( epoch_k == epochs and curr_vali_ndcg == fold_optimal_ndcgk ): # we need at least a reference, in case all zero print('\t', epoch_k, '- nDCG@{} - '.format(vali_k), curr_vali_ndcg) fold_optimal_ndcgk = curr_vali_ndcg fold_optimal_checkpoint = '-'.join( ['Fold', str(fold_k)]) fold_optimal_epoch_val = epoch_k ranker.save( dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join( ['net_params_epoch', str(epoch_k)]) + '.pkl') # buffer currently optimal model else: print('\t\t', epoch_k, '- nDCG@{} - '.format(vali_k), curr_vali_ndcg) if do_summary: # summarize per-step performance w.r.t. train, test fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=train_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu( ).numpy( ) if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy( ) list_fold_k_train_eval_track.append( np_fold_k_epoch_k_train_ndcg_ks) fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=test_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu( ).numpy( ) if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy( ) list_fold_k_test_eval_track.append( np_fold_k_epoch_k_test_ndcg_ks) fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu( ).numpy( ) if self.gpu else torch_fold_k_epoch_k_loss.data.numpy( ) list_epoch_loss.append(fold_k_epoch_k_loss) if do_vali: list_fold_k_vali_eval_track.append(vali_eval_v) elif loss_guided: # stopping check via epoch-loss if first_round and torch_fold_k_epoch_k_loss >= threshold_epoch_loss: print('Bad threshold: ', torch_fold_k_epoch_k_loss, threshold_epoch_loss) if torch_fold_k_epoch_k_loss < threshold_epoch_loss: first_round = False print('\tFold-', str(fold_k), ' Epoch-', str(epoch_k), 'Loss: ', torch_fold_k_epoch_k_loss) threshold_epoch_loss = torch_fold_k_epoch_k_loss else: print('\tStopped according epoch-loss!', torch_fold_k_epoch_k_loss, threshold_epoch_loss) break if do_summary: # track sy_prefix = '_'.join(['Fold', str(fold_k)]) fold_k_train_eval = np.vstack(list_fold_k_train_eval_track) fold_k_test_eval = np.vstack(list_fold_k_test_eval_track) pickle_save(fold_k_train_eval, file=self.dir_run + '_'.join([sy_prefix, 'train_eval.np'])) pickle_save(fold_k_test_eval, file=self.dir_run + '_'.join([sy_prefix, 'test_eval.np'])) fold_k_epoch_loss = np.hstack(list_epoch_loss) pickle_save( (fold_k_epoch_loss, train_data.__len__()), file=self.dir_run + '_'.join([sy_prefix, 'epoch_loss.np'])) if do_vali: fold_k_vali_eval = np.hstack(list_fold_k_vali_eval_track) pickle_save(fold_k_vali_eval, file=self.dir_run + '_'.join([sy_prefix, 'vali_eval.np'])) if do_vali: # using the fold-wise optimal model for later testing based on validation data buffered_model = '_'.join( ['net_params_epoch', str(fold_optimal_epoch_val)]) + '.pkl' ranker.load(self.dir_run + fold_optimal_checkpoint + '/' + buffered_model) fold_optimal_ranker = ranker else: # buffer the model after a fixed number of training-epoches if no validation is deployed fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)]) ranker.save(dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k)]) + '.pkl') fold_optimal_ranker = ranker torch_fold_ndcg_ks = ndcg_at_ks( ranker=fold_optimal_ranker, test_data=test_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting.data_dict['label_type']) fold_ndcg_ks = torch_fold_ndcg_ks.data.numpy() performance_list = [model_id + ' Fold-' + str(fold_k) ] # fold-wise performance for i, co in enumerate(cutoffs): performance_list.append('nDCG@{}:{:.4f}'.format( co, fold_ndcg_ks[i])) performance_str = '\t'.join(performance_list) print('\t', performance_str) l2r_cv_avg_scores = np.add( l2r_cv_avg_scores, fold_ndcg_ks) # sum for later cv-performance time_end = datetime.datetime.now() # overall timing elapsed_time_str = str(time_end - time_begin) print('Elapsed time:\t', elapsed_time_str + "\n\n") l2r_cv_avg_scores = np.divide(l2r_cv_avg_scores, fold_num) eval_prefix = str( fold_num) + '-fold cross validation scores:' if do_vali else str( fold_num) + '-fold average scores:' print(model_id, eval_prefix, metric_results_to_string(list_scores=l2r_cv_avg_scores, list_cutoffs=cutoffs) ) # print either cv or average performance return l2r_cv_avg_scores def naive_train(self, ranker, eval_dict, train_data=None, test_data=None, vali_data=None): """ A simple train and test, namely train based on training data & test based on testing data :param ranker: :param eval_dict: :param train_data: :param test_data: :param vali_data: :return: """ ranker.reset_parameters() # reset with the same random initialization assert train_data is not None assert test_data is not None list_losses = [] list_train_ndcgs = [] list_test_ndcgs = [] epochs, cutoffs = eval_dict['epochs'], eval_dict['cutoffs'] for i in range(epochs): epoch_loss = torch.zeros(1).to( self.device) if self.gpu else torch.zeros(1) for qid, batch_rankings, batch_stds in train_data: if self.gpu: batch_rankings, batch_stds = batch_rankings.to( self.device), batch_stds.to(self.device) batch_loss, stop_training = ranker.train(batch_rankings, batch_stds, qid=qid) epoch_loss += batch_loss.item() np_epoch_loss = epoch_loss.cpu().numpy( ) if self.gpu else epoch_loss.data.numpy() list_losses.append(np_epoch_loss) test_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=test_data, ks=cutoffs, label_type=LABEL_TYPE.MultiLabel, gpu=self.gpu, device=self.device, ) np_test_ndcg_ks = test_ndcg_ks.data.numpy() list_test_ndcgs.append(np_test_ndcg_ks) train_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=train_data, ks=cutoffs, label_type=LABEL_TYPE.MultiLabel, gpu=self.gpu, device=self.device, ) np_train_ndcg_ks = train_ndcg_ks.data.numpy() list_train_ndcgs.append(np_train_ndcg_ks) test_ndcgs = np.vstack(list_test_ndcgs) train_ndcgs = np.vstack(list_train_ndcgs) return list_losses, train_ndcgs, test_ndcgs def set_data_setting(self, data_json=None, debug=False, data_id=None, dir_data=None): if data_json is not None: self.data_setting = DataSetting(data_json=data_json) else: self.data_setting = DataSetting(debug=debug, data_id=data_id, dir_data=dir_data) def get_default_data_setting(self): return self.data_setting.default_setting() def iterate_data_setting(self): return self.data_setting.grid_search() def set_eval_setting(self, eval_json=None, debug=False, dir_output=None): if eval_json is not None: self.eval_setting = EvalSetting(debug=debug, eval_json=eval_json) else: self.eval_setting = EvalSetting(debug=debug, dir_output=dir_output) def get_default_eval_setting(self): return self.eval_setting.default_setting() def iterate_eval_setting(self): return self.eval_setting.grid_search() def set_scoring_function_setting(self, sf_json=None, debug=None, data_dict=None): if sf_json is not None: self.sf_parameter = ScoringFunctionParameter(sf_json=sf_json) else: self.sf_parameter = ScoringFunctionParameter(debug=debug, data_dict=data_dict) def get_default_scoring_function_setting(self): return self.sf_parameter.default_para_dict() def iterate_scoring_function_setting(self, data_dict=None): return self.sf_parameter.grid_search(data_dict=data_dict) def set_model_setting(self, model_id=None, dir_json=None, debug=False): """ Initialize the parameter class for a specified model :param debug: :param model_id: :return: """ if model_id in ['RankMSE', 'ListNet', 'RankCosine']: # ModelParameter is sufficient self.model_parameter = ModelParameter(model_id=model_id) else: if dir_json is not None: para_json = dir_json + model_id + "Parameter.json" self.model_parameter = globals()[model_id + "Parameter"]( para_json=para_json) else: # the 3rd type, where debug-mode enables quick test self.model_parameter = globals()[model_id + "Parameter"](debug=debug) def get_default_model_setting(self): return self.model_parameter.default_para_dict() def iterate_model_setting(self): return self.model_parameter.grid_search() def declare_global(self, model_id=None): """ Declare global variants if required, such as for efficiency :param model_id: :return: """ if model_id == 'WassRank': # global buffering across a number of runs with different model parameters self.dict_cost_mats, self.dict_std_dists = dict(), dict() def point_run(self, debug=False, model_id=None, data_id=None, dir_data=None, dir_output=None): """ Perform one-time run based on given setting. :param debug: :param model_id: :param data_id: :param dir_data: :param dir_output: :return: """ self.set_eval_setting(debug=debug, dir_output=dir_output) self.set_data_setting(debug=debug, data_id=data_id, dir_data=dir_data) data_dict = self.get_default_data_setting() eval_dict = self.get_default_eval_setting() self.set_scoring_function_setting(debug=debug, data_dict=data_dict) sf_para_dict = self.get_default_scoring_function_setting() self.set_model_setting(debug=debug, model_id=model_id) model_para_dict = self.get_default_model_setting() self.declare_global(model_id=model_id) self.kfold_cv_eval(data_dict=data_dict, eval_dict=eval_dict, model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) def grid_run(self, model_id=None, dir_json=None, debug=False, data_id=None, dir_data=None, dir_output=None): """ Explore the effects of different hyper-parameters of a model based on grid-search :param debug: :param model_id: :param data_id: :param dir_data: :param dir_output: :return: """ if dir_json is not None: data_eval_sf_json = dir_json + 'Data_Eval_ScoringFunction.json' self.set_eval_setting(debug=debug, eval_json=data_eval_sf_json) self.set_data_setting(data_json=data_eval_sf_json) self.set_scoring_function_setting(sf_json=data_eval_sf_json) self.set_model_setting(model_id=model_id, dir_json=dir_json) else: self.set_eval_setting(debug=debug, dir_output=dir_output) self.set_data_setting(debug=debug, data_id=data_id, dir_data=dir_data) self.set_scoring_function_setting(debug=debug) self.set_model_setting(debug=debug, model_id=model_id) self.declare_global(model_id=model_id) ''' select the best setting through grid search ''' vali_k, cutoffs = 5, [1, 3, 5, 10, 20, 50] max_cv_avg_scores = np.zeros(len(cutoffs)) # fold average k_index = cutoffs.index(vali_k) max_common_para_dict, max_sf_para_dict, max_model_para_dict = None, None, None for data_dict in self.iterate_data_setting(): for eval_dict in self.iterate_eval_setting(): assert self.eval_setting.check_consistence( vali_k=vali_k, cutoffs=cutoffs) # a necessary consistence for sf_para_dict in self.iterate_scoring_function_setting( data_dict=data_dict): for model_para_dict in self.iterate_model_setting(): curr_cv_avg_scores = self.kfold_cv_eval( data_dict=data_dict, eval_dict=eval_dict, sf_para_dict=sf_para_dict, model_para_dict=model_para_dict) if curr_cv_avg_scores[k_index] > max_cv_avg_scores[ k_index]: max_cv_avg_scores, max_sf_para_dict, max_eval_dict, max_model_para_dict = \ curr_cv_avg_scores, sf_para_dict, eval_dict, model_para_dict # log max setting self.log_max(data_dict=data_dict, eval_dict=max_eval_dict, max_cv_avg_scores=max_cv_avg_scores, sf_para_dict=max_sf_para_dict, log_para_str=self.model_parameter.to_para_string( log=True, given_para_dict=max_model_para_dict)) def run(self, debug=False, model_id=None, config_with_json=None, dir_json=None, data_id=None, dir_data=None, dir_output=None, grid_search=False): if config_with_json: assert dir_json is not None self.grid_run(debug=debug, model_id=model_id, dir_json=dir_json) else: if grid_search: self.grid_run(debug=debug, model_id=model_id, data_id=data_id, dir_data=dir_data, dir_output=dir_output) else: self.point_run(debug=debug, model_id=model_id, data_id=data_id, dir_data=dir_data, dir_output=dir_output)
class LTREvaluator(): """ The class for evaluating different ltr_adhoc methods. """ def __init__(self, frame_id=LTRFRAME_TYPE.Adhoc, cuda=None): self.frame_id = frame_id if cuda is None: self.gpu, self.device = False, 'cpu' else: self.gpu, self.device = True, 'cuda:'+str(cuda) torch.cuda.set_device(cuda) def display_information(self, data_dict, model_para_dict, reproduce=False): """ Display some information. :param data_dict: :param model_para_dict: :return: """ if self.gpu: print('-- GPU({}) is launched --'.format(self.device)) else: print('Only CPU is used.') if reproduce: print(' '.join(['\nReproducing results for {} on {} >>>'.format(model_para_dict['model_id'], data_dict['data_id'])])) else: print(' '.join(['\nStart {} on {} >>>'.format(model_para_dict['model_id'], data_dict['data_id'])])) def check_consistency(self, data_dict, eval_dict, sf_para_dict): """ Check whether the settings are reasonable in the context of adhoc learning-to-rank """ ''' Part-1: data loading ''' if data_dict['data_id'] == 'Istella': assert eval_dict['do_validation'] is not True # since there is no validation data if data_dict['data_id'] in MSLETOR_SEMI: assert data_dict['train_presort'] is not True # due to the non-labeled documents if data_dict['binary_rele']: # for unsupervised dataset, it is required for binarization due to '-1' labels assert data_dict['unknown_as_zero'] else: assert data_dict['unknown_as_zero'] is not True # since there is no non-labeled documents if data_dict['data_id'] in MSLETOR_LIST: # for which the standard ltr_adhoc of each query is unique assert 1 == data_dict['train_batch_size'] if data_dict['scale_data']: scaler_level = data_dict['scaler_level'] if 'scaler_level' in data_dict else None assert not scaler_level== 'DATASET' # not supported setting assert data_dict['validation_presort'] # Rule of thumb setting for adhoc learning-to-rank assert data_dict['test_presort'] # Rule of thumb setting for adhoc learning-to-rank ''' Part-2: evaluation setting ''' if eval_dict['mask_label']: # True is aimed to use supervised data to mimic semi-supervised data by masking assert not data_dict['data_id'] in MSLETOR_SEMI def determine_files(self, data_dict, fold_k=None): """ Determine the file path correspondingly. :param data_dict: :param fold_k: :return: """ if data_dict['data_id'] in YAHOO_LTR: file_train, file_vali, file_test = os.path.join(data_dict['dir_data'], data_dict['data_id'].lower() + '.train.txt'),\ os.path.join(data_dict['dir_data'], data_dict['data_id'].lower() + '.valid.txt'),\ os.path.join(data_dict['dir_data'], data_dict['data_id'].lower() + '.test.txt') elif data_dict['data_id'] in ISTELLA_LTR: if data_dict['data_id'] == 'Istella_X' or data_dict['data_id']=='Istella_S': file_train, file_vali, file_test = data_dict['dir_data'] + 'train.txt', data_dict['dir_data'] + 'vali.txt', data_dict['dir_data'] + 'test.txt' else: file_vali = None file_train, file_test = data_dict['dir_data'] + 'train.txt', data_dict['dir_data'] + 'test.txt' else: print('Fold-', fold_k) fold_k_dir = data_dict['dir_data'] + 'Fold' + str(fold_k) + '/' file_train, file_vali, file_test = fold_k_dir + 'train.txt', fold_k_dir + 'vali.txt', fold_k_dir + 'test.txt' return file_train, file_vali, file_test def load_data(self, eval_dict, data_dict, fold_k): """ Load the dataset correspondingly. :param eval_dict: :param data_dict: :param fold_k: :param model_para_dict: :return: """ file_train, file_vali, file_test = self.determine_files(data_dict, fold_k=fold_k) input_eval_dict = eval_dict if eval_dict['mask_label'] else None # required when enabling masking data _train_data = LTRDataset(file=file_train, split_type=SPLIT_TYPE.Train, presort=data_dict['train_presort'], data_dict=data_dict, eval_dict=input_eval_dict) train_letor_sampler = LETORSampler(data_source=_train_data, rough_batch_size=data_dict['train_rough_batch_size']) train_loader = torch.utils.data.DataLoader(_train_data, batch_sampler=train_letor_sampler, num_workers=0) _test_data = LTRDataset(file=file_test, split_type=SPLIT_TYPE.Test, data_dict=data_dict, presort=data_dict['test_presort']) test_letor_sampler = LETORSampler(data_source=_test_data, rough_batch_size=data_dict['test_rough_batch_size']) test_loader = torch.utils.data.DataLoader(_test_data, batch_sampler=test_letor_sampler, num_workers=0) if eval_dict['do_validation'] or eval_dict['do_summary']: # vali_data is required _vali_data = LTRDataset(file=file_vali, split_type=SPLIT_TYPE.Validation, data_dict=data_dict, presort=data_dict['validation_presort']) vali_letor_sampler = LETORSampler(data_source=_vali_data, rough_batch_size=data_dict['validation_rough_batch_size']) vali_loader = torch.utils.data.DataLoader(_vali_data, batch_sampler=vali_letor_sampler, num_workers=0) else: vali_loader = None return train_loader, test_loader, vali_loader def load_ranker(self, sf_para_dict, model_para_dict): """ Load a ranker correspondingly :param sf_para_dict: :param model_para_dict: :param kwargs: :return: """ model_id = model_para_dict['model_id'] if model_id in ['RankMSE', 'ListMLE', 'ListNet', 'RankCosine', 'DASALC', 'HistogramAP']: ranker = globals()[model_id](sf_para_dict=sf_para_dict, gpu=self.gpu, device=self.device) elif model_id in ['RankNet', 'LambdaRank', 'STListNet', 'ApproxNDCG', 'DirectOpt', 'LambdaLoss', 'MarginLambdaLoss', 'MDPRank', 'ExpectedUtility', 'MDNExpectedUtility', 'RankingMDN', 'SoftRank', 'TwinRank']: ranker = globals()[model_id](sf_para_dict=sf_para_dict, model_para_dict=model_para_dict, gpu=self.gpu, device=self.device) elif model_id == 'WassRank': ranker = WassRank(sf_para_dict=sf_para_dict, wass_para_dict=model_para_dict, dict_cost_mats=self.dict_cost_mats, dict_std_dists=self.dict_std_dists, gpu=self.gpu, device=self.device) else: raise NotImplementedError return ranker def setup_output(self, data_dict=None, eval_dict=None): """ Update output directory :param data_dict: :param eval_dict: :param sf_para_dict: :param model_para_dict: :return: """ model_id = self.model_parameter.model_id grid_search, do_vali, dir_output = eval_dict['grid_search'], eval_dict['do_validation'], eval_dict['dir_output'] mask_label = eval_dict['mask_label'] if grid_search: dir_root = dir_output + '_'.join(['gpu', 'grid', model_id]) + '/' if self.gpu else dir_output + '_'.join(['grid', model_id]) + '/' else: dir_root = dir_output eval_dict['dir_root'] = dir_root if not os.path.exists(dir_root): os.makedirs(dir_root) sf_str = self.sf_parameter.to_para_string() data_eval_str = '_'.join([self.data_setting.to_data_setting_string(), self.eval_setting.to_eval_setting_string()]) if mask_label: data_eval_str = '_'.join([data_eval_str, 'MaskLabel', 'Ratio', '{:,g}'.format(eval_dict['mask_ratio'])]) file_prefix = '_'.join([model_id, 'SF', sf_str, data_eval_str]) if data_dict['scale_data']: if data_dict['scaler_level'] == 'QUERY': file_prefix = '_'.join([file_prefix, 'QS', data_dict['scaler_id']]) else: file_prefix = '_'.join([file_prefix, 'DS', data_dict['scaler_id']]) dir_run = dir_root + file_prefix + '/' # run-specific outputs model_para_string = self.model_parameter.to_para_string() if len(model_para_string) > 0: dir_run = dir_run + model_para_string + '/' eval_dict['dir_run'] = dir_run if not os.path.exists(dir_run): os.makedirs(dir_run) return dir_run def setup_eval(self, data_dict, eval_dict, sf_para_dict, model_para_dict): """ Finalize the evaluation setting correspondingly :param data_dict: :param eval_dict: :param sf_para_dict: :param model_para_dict: :return: """ sf_para_dict[sf_para_dict['sf_id']].update(dict(num_features=data_dict['num_features'])) self.dir_run = self.setup_output(data_dict, eval_dict) if eval_dict['do_log'] and not self.eval_setting.debug: time_str = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M") sys.stdout = open(self.dir_run + '_'.join(['log', time_str]) + '.txt', "w") #if self.do_summary: self.summary_writer = SummaryWriter(self.dir_run + 'summary') if not model_para_dict['model_id'] in ['MDPRank', 'ExpectedUtility', 'WassRank']: """ Aiming for efficient batch processing, please use a large batch_size, e.g., {train_rough_batch_size, validation_rough_batch_size, test_rough_batch_size = 300, 300, 300} """ #assert data_dict['train_rough_batch_size'] > 1 def log_max(self, data_dict=None, max_cv_avg_scores=None, sf_para_dict=None, eval_dict=None, log_para_str=None): ''' Log the best performance across grid search and the corresponding setting ''' dir_root, cutoffs = eval_dict['dir_root'], eval_dict['cutoffs'] data_id = data_dict['data_id'] sf_str = self.sf_parameter.to_para_string(log=True) data_eval_str = self.data_setting.to_data_setting_string(log=True) +'\n'+ self.eval_setting.to_eval_setting_string(log=True) with open(file=dir_root + '/' + '_'.join([data_id, sf_para_dict['sf_id'], 'max.txt']), mode='w') as max_writer: max_writer.write('\n\n'.join([data_eval_str, sf_str, log_para_str, metric_results_to_string(max_cv_avg_scores, cutoffs, metric='nDCG')])) def kfold_cv_reproduce(self, data_dict=None, eval_dict=None, sf_para_dict=None, model_para_dict=None): self.display_information(data_dict, model_para_dict) self.check_consistency(data_dict, eval_dict, sf_para_dict) model_id = model_para_dict['model_id'] fold_num, max_label = data_dict['fold_num'], data_dict['max_rele_level'] cutoffs, do_vali = eval_dict['cutoffs'], eval_dict['do_validation'] cv_tape = CVTape(model_id=model_id, fold_num=fold_num, cutoffs=cutoffs, do_validation=do_vali, reproduce=True) sf_para_dict[sf_para_dict['sf_id']].update(dict(num_features=data_dict['num_features'])) ranker = self.load_ranker(model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) model_exp_dir = self.setup_output(data_dict, eval_dict) for fold_k in range(1, fold_num + 1): # evaluation over k-fold data ranker.init() # initialize or reset with the same random initialization _, test_data, _ = self.load_data(eval_dict, data_dict, fold_k) cv_tape.fold_evaluation_reproduce(ranker=ranker, test_data=test_data, dir_run=model_exp_dir, max_label=max_label, fold_k=fold_k, model_id=model_id, device=self.device) ndcg_cv_avg_scores = cv_tape.get_cv_performance() return ndcg_cv_avg_scores def kfold_cv_eval(self, data_dict=None, eval_dict=None, sf_para_dict=None, model_para_dict=None): """ Evaluation learning-to-rank methods via k-fold cross validation if there are k folds, otherwise one fold. :param data_dict: settings w.r.t. data :param eval_dict: settings w.r.t. evaluation :param sf_para_dict: settings w.r.t. scoring function :param model_para_dict: settings w.r.t. the ltr_adhoc model :return: """ self.display_information(data_dict, model_para_dict) self.check_consistency(data_dict, eval_dict, sf_para_dict) ranker = self.load_ranker(model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) ranker.uniform_eval_setting(eval_dict=eval_dict) self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict) model_id = model_para_dict['model_id'] fold_num, label_type, max_label = data_dict['fold_num'], data_dict['label_type'], data_dict['max_rele_level'] train_presort, validation_presort, test_presort = \ data_dict['train_presort'], data_dict['validation_presort'], data_dict['test_presort'] # for quick access of common evaluation settings epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided'] vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict['log_step'], eval_dict['cutoffs'] do_vali, vali_metric, do_summary = eval_dict['do_validation'], eval_dict['vali_metric'], eval_dict['do_summary'] cv_tape = CVTape(model_id=model_id, fold_num=fold_num, cutoffs=cutoffs, do_validation=do_vali) for fold_k in range(1, fold_num + 1): # evaluation over k-fold data ranker.init() # initialize or reset with the same random initialization train_data, test_data, vali_data = self.load_data(eval_dict, data_dict, fold_k) if do_vali: vali_tape = ValidationTape(fold_k=fold_k, num_epochs=epochs, validation_metric=vali_metric, validation_at_k=vali_k, dir_run=self.dir_run) if do_summary: summary_tape = SummaryTape(do_validation=do_vali, cutoffs=cutoffs, label_type=label_type, train_presort=train_presort, test_presort=test_presort, gpu=self.gpu) if not do_vali and loss_guided: opt_loss_tape = OptLossTape(gpu=self.gpu) for epoch_k in range(1, epochs + 1): torch_fold_k_epoch_k_loss, stop_training = ranker.train(train_data=train_data, epoch_k=epoch_k, presort=train_presort, label_type=label_type) ranker.scheduler.step() # adaptive learning rate with step_size=40, gamma=0.5 if stop_training: print('training is failed !') break if (do_summary or do_vali) and (epoch_k % log_step == 0 or epoch_k == 1): # stepwise check if do_vali: # per-step validation score torch_vali_metric_value = ranker.validation(vali_data=vali_data, k=vali_k, device='cpu', vali_metric=vali_metric, label_type=label_type, max_label=max_label, presort=validation_presort) vali_metric_value = torch_vali_metric_value.squeeze(-1).data.numpy() vali_tape.epoch_validation(ranker=ranker, epoch_k=epoch_k, metric_value=vali_metric_value) if do_summary: # summarize per-step performance w.r.t. train, test summary_tape.epoch_summary(ranker=ranker, torch_epoch_k_loss=torch_fold_k_epoch_k_loss, train_data=train_data, test_data=test_data, vali_metric_value=vali_metric_value if do_vali else None) elif loss_guided: # stopping check via epoch-loss early_stopping = opt_loss_tape.epoch_cmp_loss(fold_k=fold_k, epoch_k=epoch_k, torch_epoch_k_loss=torch_fold_k_epoch_k_loss) if early_stopping: break if do_summary: # track summary_tape.fold_summary(fold_k=fold_k, dir_run=self.dir_run, train_data_length=train_data.__len__()) if do_vali: # using the fold-wise optimal model for later testing based on validation data ranker.load(vali_tape.get_optimal_path(), device=self.device) vali_tape.clear_fold_buffer(fold_k=fold_k) else: # buffer the model after a fixed number of training-epoches if no validation is deployed fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)]) ranker.save(dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k)]) + '.pkl') cv_tape.fold_evaluation(model_id=model_id, ranker=ranker, test_data=test_data, max_label=max_label, fold_k=fold_k) ndcg_cv_avg_scores = cv_tape.get_cv_performance() return ndcg_cv_avg_scores def naive_train(self, ranker, eval_dict, train_data=None, test_data=None): """ A simple train and test, namely train based on training data & test based on testing data :param ranker: :param eval_dict: :param train_data: :param test_data: :param vali_data: :return: """ ranker.reset_parameters() # reset with the same random initialization assert train_data is not None assert test_data is not None list_losses = [] list_train_ndcgs = [] list_test_ndcgs = [] epochs, cutoffs = eval_dict['epochs'], eval_dict['cutoffs'] for i in range(epochs): epoch_loss = torch.zeros(1).to(self.device) if self.gpu else torch.zeros(1) for qid, batch_rankings, batch_stds in train_data: if self.gpu: batch_rankings, batch_stds = batch_rankings.to(self.device), batch_stds.to(self.device) batch_loss, stop_training = ranker.train(batch_rankings, batch_stds, qid=qid) epoch_loss += batch_loss.item() np_epoch_loss = epoch_loss.cpu().numpy() if self.gpu else epoch_loss.data.numpy() list_losses.append(np_epoch_loss) test_ndcg_ks = ranker.ndcg_at_ks(test_data=test_data, ks=cutoffs, label_type=LABEL_TYPE.MultiLabel, device='cpu') np_test_ndcg_ks = test_ndcg_ks.data.numpy() list_test_ndcgs.append(np_test_ndcg_ks) train_ndcg_ks = ranker.ndcg_at_ks(test_data=train_data, ks=cutoffs, label_type=LABEL_TYPE.MultiLabel, device='cpu') np_train_ndcg_ks = train_ndcg_ks.data.numpy() list_train_ndcgs.append(np_train_ndcg_ks) test_ndcgs = np.vstack(list_test_ndcgs) train_ndcgs = np.vstack(list_train_ndcgs) return list_losses, train_ndcgs, test_ndcgs def set_data_setting(self, data_json=None, debug=False, data_id=None, dir_data=None): if data_json is not None: self.data_setting = DataSetting(data_json=data_json) else: self.data_setting = DataSetting(debug=debug, data_id=data_id, dir_data=dir_data) def get_default_data_setting(self): return self.data_setting.default_setting() def iterate_data_setting(self): return self.data_setting.grid_search() def set_eval_setting(self, eval_json=None, debug=False, dir_output=None): if eval_json is not None: self.eval_setting = EvalSetting(debug=debug, eval_json=eval_json) else: self.eval_setting = EvalSetting(debug=debug, dir_output=dir_output) def get_default_eval_setting(self): return self.eval_setting.default_setting() def iterate_eval_setting(self): return self.eval_setting.grid_search() def set_scoring_function_setting(self, sf_json=None, debug=None, sf_id=None): if sf_json is not None: self.sf_parameter = ScoringFunctionParameter(sf_json=sf_json) else: self.sf_parameter = ScoringFunctionParameter(debug=debug, sf_id=sf_id) def get_default_scoring_function_setting(self): return self.sf_parameter.default_para_dict() def iterate_scoring_function_setting(self): return self.sf_parameter.grid_search() def set_model_setting(self, model_id=None, dir_json=None, debug=False): """ Initialize the parameter class for a specified model :param debug: :param model_id: :return: """ if model_id in ['RankMSE', 'ListMLE', 'ListNet', 'RankCosine', 'DASALC', 'HistogramAP']: # ModelParameter is sufficient self.model_parameter = ModelParameter(model_id=model_id) else: if dir_json is not None: para_json = dir_json + model_id + "Parameter.json" self.model_parameter = globals()[model_id + "Parameter"](para_json=para_json) else: # the 3rd type, where debug-mode enables quick test self.model_parameter = globals()[model_id + "Parameter"](debug=debug) def get_default_model_setting(self): return self.model_parameter.default_para_dict() def iterate_model_setting(self): return self.model_parameter.grid_search() def declare_global(self, model_id=None): """ Declare global variants if required, such as for efficiency :param model_id: :return: """ if model_id == 'WassRank': # global buffering across a number of runs with different model parameters self.dict_cost_mats, self.dict_std_dists = dict(), dict() def point_run(self, debug=False, model_id=None, sf_id=None, data_id=None, dir_data=None, dir_output=None, dir_json=None, reproduce=False): """ Perform one-time run based on given setting. :param debug: :param model_id: :param data_id: :param dir_data: :param dir_output: :return: """ if dir_json is None: self.set_eval_setting(debug=debug, dir_output=dir_output) self.set_data_setting(debug=debug, data_id=data_id, dir_data=dir_data) self.set_scoring_function_setting(debug=debug, sf_id=sf_id) self.set_model_setting(debug=debug, model_id=model_id) else: data_eval_sf_json = dir_json + 'Data_Eval_ScoringFunction.json' self.set_eval_setting(eval_json=data_eval_sf_json) self.set_data_setting(data_json=data_eval_sf_json) self.set_scoring_function_setting(sf_json=data_eval_sf_json) self.set_model_setting(model_id=model_id, dir_json=dir_json) data_dict = self.get_default_data_setting() eval_dict = self.get_default_eval_setting() sf_para_dict = self.get_default_scoring_function_setting() model_para_dict = self.get_default_model_setting() self.declare_global(model_id=model_id) if reproduce: self.kfold_cv_reproduce(data_dict=data_dict, eval_dict=eval_dict, model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) else: self.kfold_cv_eval(data_dict=data_dict, eval_dict=eval_dict, model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) def grid_run(self, model_id=None, sf_id=None, dir_json=None, debug=False, data_id=None, dir_data=None, dir_output=None): """ Explore the effects of different hyper-parameters of a model based on grid-search :param debug: :param model_id: :param data_id: :param dir_data: :param dir_output: :return: """ if dir_json is not None: data_eval_sf_json = dir_json + 'Data_Eval_ScoringFunction.json' self.set_data_setting(data_json=data_eval_sf_json) self.set_scoring_function_setting(sf_json=data_eval_sf_json) self.set_eval_setting(debug=debug, eval_json=data_eval_sf_json) self.set_model_setting(model_id=model_id, dir_json=dir_json) else: self.set_eval_setting(debug=debug, dir_output=dir_output) self.set_data_setting(debug=debug, data_id=data_id, dir_data=dir_data) self.set_scoring_function_setting(debug=debug, sf_id=sf_id) self.set_model_setting(debug=debug, model_id=model_id) self.declare_global(model_id=model_id) ''' select the best setting through grid search ''' vali_k, cutoffs = 5, [1, 3, 5, 10, 20, 50] max_cv_avg_scores = np.zeros(len(cutoffs)) # fold average k_index = cutoffs.index(vali_k) max_common_para_dict, max_sf_para_dict, max_model_para_dict = None, None, None for data_dict in self.iterate_data_setting(): for eval_dict in self.iterate_eval_setting(): assert self.eval_setting.check_consistence(vali_k=vali_k, cutoffs=cutoffs) # a necessary consistence for sf_para_dict in self.iterate_scoring_function_setting(): for model_para_dict in self.iterate_model_setting(): curr_cv_avg_scores = self.kfold_cv_eval(data_dict=data_dict, eval_dict=eval_dict, sf_para_dict=sf_para_dict, model_para_dict=model_para_dict) if curr_cv_avg_scores[k_index] > max_cv_avg_scores[k_index]: max_cv_avg_scores, max_sf_para_dict, max_eval_dict, max_model_para_dict = \ curr_cv_avg_scores, sf_para_dict, eval_dict, model_para_dict # log max setting self.log_max(data_dict=data_dict, eval_dict=max_eval_dict, max_cv_avg_scores=max_cv_avg_scores, sf_para_dict=max_sf_para_dict, log_para_str=self.model_parameter.to_para_string(log=True, given_para_dict=max_model_para_dict)) def run(self, debug=False, model_id=None, sf_id=None, config_with_json=None, dir_json=None, data_id=None, dir_data=None, dir_output=None, grid_search=False, reproduce=False): if config_with_json: assert dir_json is not None if reproduce: self.point_run(debug=debug, model_id=model_id, dir_json=dir_json, reproduce=reproduce) else: self.grid_run(debug=debug, model_id=model_id, dir_json=dir_json) else: assert sf_id in ['pointsf', 'listsf'] if grid_search: self.grid_run(debug=debug, model_id=model_id, sf_id=sf_id, data_id=data_id, dir_data=dir_data, dir_output=dir_output) else: self.point_run(debug=debug, model_id=model_id, sf_id=sf_id, data_id=data_id, dir_data=dir_data, dir_output=dir_output, reproduce=reproduce)