def execute_fold_parallel(participants_fold: pd.Series, fold: int, cuda_device: str,
                          hyper_parameters_tune_mode: bool=False, model_nums_list: list=None,
                          reversed_order: bool=False, bert_hc_exp: bool=False):
    """
    This function get a dict that split the participant to train-val-test (for this fold) and run all the models
    we want to compare --> it train them using the train data and evaluate them using the val data
    :param participants_fold: split the participant to train-val-test (for this fold)
    :param fold: the fold number
    :param cuda_device: the number of cuda device if using it
    :param hyper_parameters_tune_mode: after find good data - hyper parameter tuning
    :param model_nums_list: list of models to run
    :param reversed_order: if to run with reversed_order of the features in the causal graph
    :param bert_hc_exp: if we run the BERt_HC experiment (textual features are created by BERT fine tuning)
    :return:
    """
    # get the train, test, validation participant code for this fold
    os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device
    fold_split_dict = dict()
    for data_set in ['train', 'test', 'validation']:
        fold_split_dict[data_set] = participants_fold.loc[participants_fold == data_set].index.tolist()

    # models_to_compare should have for each row:
    # model_num, model_type, model_name, function_to_run, data_file_name, hyper_parameters
    # (strings of all parameters for the running function as dict: {'parameter_name': parameter_value})
    models_to_compare = pd.read_excel(os.path.join(base_directory, 'models_info.xlsx'),
                                      sheet_name='table_to_load', skiprows=[0])
    fold_dir = utils.set_folder(f'fold_{fold}', run_dir)
    excel_models_results = utils.set_folder(folder_name='excel_models_results', father_folder_path=fold_dir)
    # for test
    print(f'test_dir: {test_dir}')
    test_fold_dir = utils.set_folder(f'fold_{fold}', test_dir)
    excel_test_models_results = utils.set_folder(folder_name='excel_best_models_results',
                                                 father_folder_path=test_fold_dir)
    test_participants_fold = pd.read_csv(os.path.join(data_directory, pair_folds_file_name))
    test_participants_fold.index = test_participants_fold.pair_id
    test_table_writer = pd.ExcelWriter(os.path.join(excel_test_models_results, f'Results_test_data_best_models.xlsx'),
                                       engine='xlsxwriter')

    path = f"{REVIEWS_FEATURES_DATASETS_DIR}/experiment_manage.csv"
    experiment_manage_df = pd.read_csv(path)
    bert_models = experiment_manage_df.exp_name.values.tolist()

    table_writer = None
    log_file_name = os.path.join(fold_dir, f'LogFile_fold_{fold}.log')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename=log_file_name,
                        level=logging.DEBUG,
                        format='%(asctime)s: %(levelname)s %(message)s',
                        datefmt='%H:%M:%S',
                        )

    if model_nums_list is not None:
        all_model_nums = model_nums_list
    else:
        all_model_nums = list(set(models_to_compare.model_num))

    all_models_results = pd.DataFrame()
    all_models_prediction_results = pd.DataFrame()
    if bert_hc_exp:
        if reversed_order:
            bert_models = reversed(list(enumerate(bert_models)))
        else:
            bert_models = enumerate(bert_models)
    else:
        bert_models = enumerate([''])
    for feature_num, bert_feature in bert_models:
        for model_num in all_model_nums:  # compare all versions of each model type
            num_iterates = 1
            model_type_versions = models_to_compare.loc[models_to_compare.model_num == model_num]
            model_num = f'{model_num}_{feature_num}'
            model_num_results_path = os.path.join(excel_models_results, f'model_num_results_{model_num}.pkl')
            if not os.path.isfile(model_num_results_path):
                model_num_results = pd.DataFrame(columns=['model_num', 'model_name', 'model_type',
                                                          'hyper_parameters_str', 'data_file_name',
                                                          'RMSE', 'Raisha', 'Round'])
                joblib.dump(model_num_results, model_num_results_path)

            for index, row in model_type_versions.iterrows():  # iterate over all the models to compare
                # get all model parameters
                model_type = row['model_type']
                model_name = row['model_name']

                function_to_run = row['function_to_run']
                data_file_name = row['data_file_name']
                test_data_file_name = row['test_data_file_name']

                if bert_hc_exp:
                    model_name = f'{model_name}_{bert_feature}'

                if bert_hc_exp:
                    data_file_name =\
                        data_file_name.replace('bert_embedding', f'bert_embedding_for_feature_{bert_feature}')
                    test_data_file_name =\
                        test_data_file_name.replace('bert_embedding', f'bert_embedding_for_feature_{bert_feature}')
                hyper_parameters_str = row['hyper_parameters']
                # get hyper parameters as dict
                if type(hyper_parameters_str) == str:
                    hyper_parameters_dict = json.loads(hyper_parameters_str)
                else:
                    hyper_parameters_dict = None

                if hyper_parameters_dict is not None and 'features_max_size' in hyper_parameters_dict.keys():
                    if int(hyper_parameters_dict['features_max_size']) > 1000:
                        continue

                if outer_is_debug:
                    hyper_parameters_dict['num_epochs'] = 2
                else:
                    hyper_parameters_dict['num_epochs'] = 100

                # if predict test already done:
                predict_folder = os.path.join(test_dir, f'fold_{fold}',
                                              f'{model_num}_{model_type}_{model_name}_'
                                              f'{hyper_parameters_dict["num_epochs"]}_epochs_fold_num_{fold}')
                if os.path.isdir(predict_folder):
                    continue

                # each function need to get: model_num, fold, fold_dir, model_type, model_name, data_file_name,
                # fold_split_dict, table_writer, data_directory, hyper_parameters_dict.
                # During running it needs to write the predictions to the table_writer and save the trained model with
                # the name: model_name_model_num to the fold_dir.
                # it needs to return a dict with the final results over the evaluation data: {measure_name: measure}
                if hyper_parameters_tune_mode:
                    if 'LSTM' in model_type or 'Transformer' in model_type:
                        if 'LSTM' in model_type and 'use_transformer' not in model_type:
                            greadsearch = lstm_gridsearch_params
                        else:  # for Transformer models and LSTM_use_transformer models
                            greadsearch = transformer_gridsearch_params
                        for i, parameters_dict in enumerate(greadsearch):
                            if outer_is_debug and i > 1:
                                continue
                            new_hyper_parameters_dict = copy.deepcopy(hyper_parameters_dict)
                            new_hyper_parameters_dict.update(parameters_dict)
                            if 'linear' in model_type and 'lstm_hidden_dim' in new_hyper_parameters_dict:
                                new_hyper_parameters_dict['linear_hidden_dim'] = \
                                    int(0.5 * int(new_hyper_parameters_dict['lstm_hidden_dim']))
                            if '_avg_turn' in model_type:
                                for inner_i, inner_parameters_dict in enumerate(avg_turn_gridsearch_params):
                                    if outer_is_debug and inner_i > 1:
                                        continue
                                    new_hyper_parameters_dict.update(inner_parameters_dict)
                                    new_model_name = f'{model_name}'
                                    new_model_num = f'{model_num}_{i}_{inner_i}'
                                    if os.path.isfile(os.path.join(excel_models_results,
                                                                   f'Results_fold_{fold}_model_{new_model_num}.xlsx')):
                                        continue
                                    all_models_results = execute_create_fit_predict_eval_model(
                                        function_to_run, new_model_num, fold, fold_dir, model_type, new_model_name,
                                        data_file_name, fold_split_dict, table_writer, new_hyper_parameters_dict,
                                        excel_models_results, all_models_results, model_num_results_path)
                            else:
                                new_model_name = f'{model_name}'
                                new_model_num = f'{model_num}_{i}'
                                if os.path.isfile(os.path.join(excel_models_results,
                                                               f'Results_fold_{fold}_model_{new_model_num}.xlsx')):
                                    continue
                                all_models_results = execute_create_fit_predict_eval_model(
                                    function_to_run, new_model_num, fold, fold_dir, model_type, new_model_name,
                                    data_file_name, fold_split_dict, table_writer, new_hyper_parameters_dict,
                                    excel_models_results, all_models_results, model_num_results_path)
                    elif 'SVM' in model_type and 'XGBoost' not in model_name or 'Baseline' in model_type:
                        if 'baseline' in model_name or 'Baseline' in model_type:
                            svm_gridsearch_params_inner = [{}]
                        else:
                            svm_gridsearch_params_inner = svm_gridsearch_params
                        if 'EWG' in model_name:
                            num_iterates = 5
                        for i, parameters_dict in enumerate(svm_gridsearch_params_inner):
                            if outer_is_debug and i > 1:
                                continue
                            new_hyper_parameters_dict = copy.deepcopy(hyper_parameters_dict)
                            new_hyper_parameters_dict.update(parameters_dict)
                            new_model_name = f'{model_name}'
                            new_model_num = f'{model_num}_{i}'
                            if os.path.isfile(os.path.join(excel_models_results,
                                                           f'Results_fold_{fold}_model_{new_model_num}.xlsx')):
                                continue
                            all_models_results = execute_create_fit_predict_eval_model(
                                function_to_run, new_model_num, fold, fold_dir, model_type, new_model_name,
                                data_file_name, fold_split_dict, table_writer, new_hyper_parameters_dict,
                                excel_models_results, all_models_results, model_num_results_path,
                                num_iterates=num_iterates)

                    elif 'XGBoost' in model_name:
                        for i, parameters_dict in enumerate(xgboost_gridsearch_params):
                            if outer_is_debug and i > 1:
                                continue
                            new_hyper_parameters_dict = copy.deepcopy(hyper_parameters_dict)
                            new_hyper_parameters_dict.update(parameters_dict)
                            new_model_name = f'{model_name}'
                            new_model_num = f'{model_num}_{i}'
                            if os.path.isfile(os.path.join(excel_models_results,
                                                           f'Results_fold_{fold}_model_{new_model_num}.xlsx')):
                                continue
                            all_models_results = execute_create_fit_predict_eval_model(
                                function_to_run, new_model_num, fold, fold_dir, model_type, new_model_name,
                                data_file_name, fold_split_dict, table_writer, new_hyper_parameters_dict,
                                excel_models_results, all_models_results, model_num_results_path,
                                num_iterates=num_iterates)
                    else:
                        print('Model type must be LSTM-kind, Transformer-kind, or SVM-kind')

                    # select the best hyper-parameters set for this model based on the RMSE
                    model_num_results = joblib.load(model_num_results_path)
                    if model_num_results.empty:
                        continue
                    argmin_index = model_num_results.RMSE.argmin()
                    best_model = model_num_results.iloc[argmin_index]
                    best_model_version_num = best_model.model_num
                    logging.info(f'Best model version for model {model_num}-{model_name} in fold {fold} is: '
                                 f'{best_model_version_num}. Start predict over test data')
                    print(f'Best model version for model {model_num}-{model_name} in fold {fold} is: '
                          f'{best_model_version_num}. Start predict over test data')

                    # predict on test data using the best version of this model
                    test_fold_split_dict = dict()
                    test_pair_ids_in_fold = test_participants_fold[f'fold_{fold}']
                    for data_set in ['train', 'test', 'validation']:
                        test_fold_split_dict[data_set] = \
                            test_pair_ids_in_fold.loc[test_pair_ids_in_fold == data_set].index.tolist()
                    hyper_parameters_str = best_model.hyper_parameters_str
                    model_folder = run_dir
                    if not os.path.exists(os.path.join(base_directory, 'logs', model_folder, f'fold_{fold}')):
                        if not os.path.exists(
                                os.path.join(base_directory, 'logs', f'{model_folder}_best', f'fold_{fold}')):
                            # the folder we need not exists
                            print(f'fold {fold} in folder {model_folder} is not exists')
                            continue
                        else:
                            model_folder = f'{model_folder}_best'
                    # get hyper parameters as dict
                    if type(hyper_parameters_str) == str:
                        hyper_parameters_dict = json.loads(hyper_parameters_str)
                    elif type(hyper_parameters_str) == dict:
                        hyper_parameters_dict = hyper_parameters_str
                    else:
                        hyper_parameters_dict = None
                        print('no hyper parameters dict')

                    num_epochs = hyper_parameters_dict['num_epochs']

                    model_file_name = f'{best_model_version_num}_{model_type}_{model_name}_fold_{fold}.pkl'
                    if function_to_run == 'ExecuteEvalLSTM':
                        inner_model_folder = \
                            f'{best_model_version_num}_{model_type}_{model_name}_{num_epochs}_epochs_fold_num_{fold}'
                    else:
                        inner_model_folder = ''
                    trained_model_dir = os.path.join(base_directory, 'logs', model_folder, f'fold_{fold}',
                                                     inner_model_folder)
                    trained_model = joblib.load(os.path.join(trained_model_dir, model_file_name))

                    metadata_dict = {'model_num': model_num, 'model_type': model_type, 'model_name': model_name,
                                     'data_file_name': data_file_name, 'test_data_file_name': test_data_file_name,
                                     'hyper_parameters_str': hyper_parameters_dict, 'fold': fold,
                                     'best_model_version_num': best_model_version_num}

                    metadata_df = pd.DataFrame.from_dict(metadata_dict, orient='index').T
                    model_class = getattr(execute_cv_models, function_to_run)(
                        model_num, fold, test_fold_dir, model_type, model_name, data_file_name, test_fold_split_dict,
                        test_table_writer, data_directory, hyper_parameters_dict, excel_test_models_results,
                        trained_model, trained_model_dir, model_file_name, test_data_file_name, 'test')

                    model_class.load_data_create_model()
                    results_df = pd.DataFrame()
                    for i in range(num_iterates):
                        print(f'Start Test Iteration number {i}')
                        logging.info(f'Start Test Iteration number {i}')
                        model_class.predict()
                        results_dict = model_class.eval_model()
                        current_results_df = pd.DataFrame.from_dict(results_dict).T
                        results_df = pd.concat([results_df, current_results_df], sort='False')

                    results_df['raisha_round'] = results_df.index
                    results_df[['Raisha', 'Round']] = results_df.raisha_round.str.split(expand=True)
                    results_df = results_df.drop('raisha_round', axis=1)
                    results_df = results_df.groupby(by=['Raisha', 'Round']).mean()
                    results_df = results_df.reset_index()
                    results_df.index = np.zeros(shape=(results_df.shape[0],))
                    results_df = metadata_df.join(results_df)
                    all_models_prediction_results = pd.concat([all_models_prediction_results, results_df], sort='False')
                    utils.write_to_excel(model_class.model_table_writer, 'Model results', ['Model results'],
                                         results_df)
                    model_class.model_table_writer.save()

                    model_num_results = model_num_results.reset_index()

                    for remove_index, remove_row in model_num_results.iterrows():
                        if remove_row.model_num == best_model_version_num:
                            continue
                        hyper_parameters_str = remove_row.hyper_parameters_str
                        # get hyper parameters as dict
                        if type(hyper_parameters_str) == str:
                            hyper_parameters_dict = json.loads(hyper_parameters_str)
                        elif type(hyper_parameters_str) == dict:
                            hyper_parameters_dict = hyper_parameters_str
                        else:
                            hyper_parameters_dict = None
                            print('no hyper parameters dict')
                        num_epochs = hyper_parameters_dict['num_epochs']
                        inner_model_folder = f'{remove_row.model_num}_{remove_row.model_type}_' \
                                             f'{remove_row.model_name}_{num_epochs}_epochs_fold_num_{fold}'
                        model_folder = os.path.join(base_directory, 'logs', model_folder, f'fold_{fold}',
                                                    inner_model_folder)
                        if os.path.exists(model_folder):
                            print(f'remove {model_folder}')
                            shutil.rmtree(model_folder)
                        else:
                            print(f'Folder {model_folder} does not exist')

                else:  # no hyper parameters
                    all_models_results = execute_create_fit_predict_eval_model(
                        function_to_run, model_num, fold, fold_dir, model_type, model_name, data_file_name, fold_split_dict,
                        table_writer, hyper_parameters_dict, excel_models_results, all_models_results,
                        model_num_results_path)

    utils.write_to_excel(table_writer, 'All models results', ['All models results'], all_models_results)
    if table_writer is not None:
        table_writer.save()
    if test_table_writer is not None:
        utils.write_to_excel(test_table_writer, 'All models results', ['All models results'],
                             all_models_prediction_results)
        test_table_writer.save()

    logging.info(f'fold {fold} finish compare models')
    print(f'fold {fold} finish compare models')

    return f'fold {fold} finish compare models'
    sys.argv[5] = reversed order of features: True/False
    sys.argv[6] = outer_cuda: int: 0/1
    sys.argv[7] = bert_hc experiment: True/False
    """

    # is_parallel
    is_parallel = sys.argv[1]
    if is_parallel == 'False':
        is_parallel = False

    run_dir_name = datetime.now().strftime(f'compare_prediction_models_%d_%m_%Y_%H_%M')
    test_dir_name = datetime.now().strftime(f'predict_best_models_%d_%m_%Y_%H_%M')
    if len(sys.argv) > 2:
        folder_date = sys.argv[2]
        if folder_date != 'False':
            run_dir = utils.set_folder(datetime.now().strftime(f'compare_prediction_models_{folder_date}'), 'logs')
            # for test
            test_dir = utils.set_folder(datetime.now().strftime(f'predict_best_models_{folder_date}'), 'logs')
        else:
            # folder dir
            run_dir = utils.set_folder(run_dir_name, 'logs')
            # for test
            test_dir = utils.set_folder(test_dir_name, 'logs')
    else:
        # folder dir
        run_dir = utils.set_folder(run_dir_name, 'logs')
        # for test
        test_dir = utils.set_folder(test_dir_name, 'logs')

    print(f'test_dir: {test_dir}')
    # is_debug
def execute_fold_parallel(participants_fold: pd.Series, fold: int, cuda_device: str, data_file_name: str,
                          features_families: list, hyper_parameters_tune_mode: bool=False,
                          test_data_file_name: str=None, id_column: str='pair_id', model_type: str='regression',
                          features_to_remove: Union[list, str] = None):
    """
    This function get a dict that split the participant to train-val-test (for this fold) and run all the models
    we want to compare --> it train them using the train data and evaluate them using the val data
    :param participants_fold: split the participant to train-val-test (for this fold)
    :param fold: the fold number
    :param cuda_device: the number of cuda device if using it
    :param hyper_parameters_tune_mode: after find good data - hyper parameter tuning
    :param data_file_name: the data file name
    :param features_families: the families of features to use
    :param id_column: the name of the ID column
    :param test_data_file_name: the test_data_file_name
    :param model_type: is this a regression model or a classification model
    :param features_to_remove: features we want to remove
    :return:
    """
    # get the train, test, validation participant code for this fold
    os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device
    fold_split_dict = dict()
    for data_set in ['train', 'test', 'validation']:
        fold_split_dict[data_set] = participants_fold.loc[participants_fold == data_set].index.tolist()

    fold_dir = utils.set_folder(f'fold_{fold}', run_dir)
    excel_models_results = utils.set_folder(folder_name='excel_models_results', father_folder_path=fold_dir)
    # for test
    test_fold_dir = utils.set_folder(f'fold_{fold}', test_dir)
    excel_test_models_results = utils.set_folder(folder_name='excel_best_models_results',
                                                 father_folder_path=test_fold_dir)
    test_participants_fold = pd.read_csv(os.path.join(data_directory, pair_folds_file_name))
    test_participants_fold.index = test_participants_fold[id_column]
    test_table_writer = pd.ExcelWriter(os.path.join(excel_test_models_results, f'Results_test_data_best_models.xlsx'),
                                       engine='xlsxwriter')

    log_file_name = os.path.join(fold_dir, f'LogFile_fold_{fold}.log')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename=log_file_name,
                        level=logging.DEBUG,
                        format='%(asctime)s: %(levelname)s %(message)s',
                        datefmt='%H:%M:%S',
                        )

    all_models_results = pd.DataFrame()
    all_results_table_writer = pd.ExcelWriter(os.path.join(excel_models_results,
                                                           f'Results_fold_{fold}_all_models.xlsx'), engine='xlsxwriter')
    all_models_test_data_results = pd.DataFrame()
    best_models_paths_dict = defaultdict(str)

    # load data
    data_path = os.path.join(base_directory, 'data', 'verbal', 'models_input', data_file_name)
    if test_data_file_name is None:
        test_data_path = data_path
    else:
        test_data_path = os.path.join(base_directory, 'data', 'verbal', 'models_input', test_data_file_name)
    train_pair_ids = participants_fold.loc[participants_fold == 'train'].index.tolist()
    validation_pair_ids = participants_fold.loc[participants_fold == 'validation'].index.tolist()
    test_pair_ids = participants_fold.loc[participants_fold == 'test'].index.tolist()

    train_x, train_y, validation_x, validation_y = utils.load_data(data_path=data_path, label_name='label',
                                                                   features_families=features_families,
                                                                   test_pair_ids=validation_pair_ids,
                                                                   train_pair_ids=train_pair_ids, id_column=id_column,
                                                                   features_to_remove=features_to_remove)
    _, _, test_x, test_y = utils.load_data(data_path=test_data_path, label_name='label', id_column=id_column,
                                           features_families=features_families, test_pair_ids=test_pair_ids,
                                           features_to_remove=features_to_remove)

    data_features = train_x.columns.tolist()

    model_names = ['SVM', 'mean', 'median', 'RandomForest', 'XGBoost', 'CatBoost']  # , 'lightGBM', '']

    for model_num, model_name in enumerate(model_names):
        model_num_results_path = os.path.join(excel_models_results, f'model_name_results_{model_name}.pkl')
        if not os.path.isfile(model_num_results_path):
            model_num_results = pd.DataFrame(columns=['model_name', 'hyper_parameters_str'] + measures[model_type][0])
            joblib.dump(model_num_results, model_num_results_path)

        # each function need to get: model_num, fold, fold_dir, model_type, model_name,
        # fold_split_dict, table_writer, data_directory, hyper_parameters_dict.
        # During running it needs to write the predictions to the table_writer and save the trained model with
        # the name: model_name_model_num to the fold_dir.
        # it needs to return a dict with the final results over the evaluation data: {measure_name: measure}
        if hyper_parameters_tune_mode:
            greadsearch = gridsearch_params[model_name]
            for i, parameters_dict in enumerate(greadsearch):
                # if i > 0:
                #     continue
                if os.path.isfile(os.path.join(excel_models_results, f'Results_fold_{fold}_model_{model_name}.xlsx')):
                    continue
                new_model_num = f'{model_num}_{i}'
                print(f'start model {model_name} with number {new_model_num} for fold {fold}')
                all_models_results = execute_create_fit_predict_eval_model(
                    model_num=new_model_num, features=data_features, train_x=train_x, train_y=train_y,
                    test_x=validation_x, test_y=validation_y, fold=fold, fold_dir=fold_dir, model_name=model_name,
                    excel_models_results_folder=excel_models_results, hyper_parameters_dict=parameters_dict,
                    all_models_results=all_models_results, model_num_results_path=model_num_results_path,
                    model_type=model_type)

        else:  # no hyper parameters
            parameters_dict = default_gridsearch_params[model_name]
            all_models_results = execute_create_fit_predict_eval_model(
                model_num=model_num, features=data_features, train_x=train_x, train_y=train_y,
                test_x=validation_x, test_y=validation_y, fold=fold, fold_dir=fold_dir, model_name=model_name,
                excel_models_results_folder=excel_models_results, hyper_parameters_dict=parameters_dict,
                all_models_results=all_models_results, model_num_results_path=model_num_results_path,
                model_type=model_type)

        # select the best hyper-parameters set for this model based on the Accuracy
        model_num_results = joblib.load(model_num_results_path)
        if model_num_results.empty:
            continue
        # measures[model_type][0] is the measure to choose the best model
        if model_type == 'regression':
            argmax_index = model_num_results[measures[model_type][0][0]].argmin()
        elif model_type == 'classification':
            argmax_index = model_num_results[measures[model_type][0][0]].argmax()
        else:
            raise ValueError('model_type must be regression or classification')
        best_model = model_num_results.iloc[argmax_index]
        model_version_num = best_model.model_num
        logging.info(f'Best model version for model {model_num}-{model_name} in fold {fold} is: '
                     f'{model_version_num}. Start predict over test data')
        print(f'Best model version for model {model_num}-{model_name} in fold {fold} is: '
              f'{model_version_num}. Start predict over test data')
        # predict on test data using the best version of this model
        hyper_parameters_str = best_model.hyper_parameters_str
        model_folder = run_dir
        if not os.path.exists(os.path.join(base_directory, 'logs', model_folder, f'fold_{fold}')):
            if not os.path.exists(
                    os.path.join(base_directory, 'logs', f'{model_folder}_best', f'fold_{fold}')):
                # the folder we need not exists
                print(f'fold {fold} in folder {model_folder} is not exists')
                continue
            else:
                model_folder = f'{model_folder}_best'
        # get hyper parameters as dict
        if type(hyper_parameters_str) == str:
            hyper_parameters_dict = json.loads(hyper_parameters_str)
        elif type(hyper_parameters_str) == dict:
            hyper_parameters_dict = hyper_parameters_str
        else:
            hyper_parameters_dict = None
            print('no hyper parameters dict')

        model_file_name = f'{model_version_num}_{model_name}_fold_{fold}.pkl'
        trained_model_dir = os.path.join(base_directory, 'logs', model_folder, f'fold_{fold}')
        trained_model = joblib.load(os.path.join(trained_model_dir, model_file_name))
        best_models_paths_dict[model_name] = os.path.join(trained_model_dir, model_file_name)

        metadata_dict = {'model_num': model_num, 'model_name': model_name,
                         'hyper_parameters_str': hyper_parameters_dict, 'fold': fold,
                         'best_model_version_num': model_version_num}

        metadata_df = pd.DataFrame.from_dict(metadata_dict, orient='index').T

        # create model class with trained_model
        test_model_class = predictive_models.PredictiveModel(
            data_features, model_name, hyper_parameters_dict, model_num, fold, fold_dir,
            excel_test_models_results, trained_model=trained_model, model_type=model_type)

        test_predictions = test_model_class.predict(test_x, test_y)
        results_dict = getattr(utils, measures[model_type][1])(all_predictions=test_predictions)
        results_df = pd.DataFrame(results_dict, index=[0])
        results_df = metadata_df.join(results_df)
        all_models_test_data_results = pd.concat([all_models_test_data_results, results_df], sort='False')
        utils.write_to_excel(test_model_class.model_table_writer, 'Model results', ['Model results'],
                             results_df)
        test_model_class.model_table_writer.save()

    utils.write_to_excel(all_results_table_writer, 'All models results', ['All models results'], all_models_results)
    if all_results_table_writer is not None:
        all_results_table_writer.save()
    if test_table_writer is not None:
        utils.write_to_excel(test_table_writer, 'All models results', ['All models results'],
                             all_models_test_data_results)
        test_table_writer.save()

    logging.info(f'fold {fold} finish compare models')
    print(f'fold {fold} finish compare models')

    for model_type in best_models_paths_dict.keys():
        if model_type not in ['RandomForest', 'XGBoost', 'CatBoost']:
            continue
        print(f'\n computing SHAP values of {model_type}')
        pkl_model_path = Path(best_models_paths_dict[model_type])
        model = joblib.load(pkl_model_path)
        X_test = train_x
        X_train = train_x

        # create a file for the SHAP results to be saved at
        save_shap_values_path = pkl_model_path.parent.joinpath('SHAP_values_results')
        save_shap_values_path.mkdir(exist_ok=True)

        shap_obj = XAI_Methods.XAIMethods(model, X_test, X_train, 'SHAP', model_type)
        shap_res = shap_obj.get_shap_feature_mean_values()
        shap_res_save_path = save_shap_values_path.joinpath(pkl_model_path.name.replace('pkl', 'csv'))
        shap_res.to_csv(shap_res_save_path)

    return f'fold {fold} finish compare models', best_models_paths_dict
Exemplo n.º 4
0
def predict_best_models(best_model_file_name: str):
    all_models_results = pd.DataFrame()
    best_models = pd.read_excel(os.path.join(base_directory, 'logs', best_model_file_name), sheet_name='table_to_load')
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    participants_fold = pd.read_csv(os.path.join(data_directory, 'pairs_folds_new_test_data.csv'))
    participants_fold.index = participants_fold.pair_id
    excel_models_results = utils.set_folder(folder_name='excel_best_models_results', father_folder_path=run_dir)
    table_writer = pd.ExcelWriter(os.path.join(excel_models_results, f'Results_test_data_best_models.xlsx'),
                                  engine='xlsxwriter')
    log_file_name = os.path.join(run_dir, f'LogFile.log')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename=log_file_name,
                        level=logging.DEBUG,
                        format='%(asctime)s: %(levelname)s %(message)s',
                        datefmt='%H:%M:%S',
                        )
    for fold in range(6):
        pair_ids_in_fold = participants_fold[f'fold_{fold}']
        fold_split_dict = dict()
        for data_set in ['train', 'test', 'validation']:
            fold_split_dict[data_set] = pair_ids_in_fold.loc[pair_ids_in_fold == data_set].index.tolist()
        for index, row in best_models.iterrows():
            model_name = row['model_name']
            model_name_folder = row[f'model_name_folder_fold_{fold}']
            model_num = row['model_num']
            # if model_num not in [879]:
            #     continue

            model_type = row['model_type']
            model_type_folder = row[f'model_type_folder_fold_{fold}']
            if type(model_type_folder) == float and np.isnan(model_type_folder):
                continue
            function_to_run = row['function_to_run']
            data_file_name = row['data_file_name']
            test_data_file_name = row['test_data_file_name']
            hyper_parameters_str = row[f'hyper_parameters_fold_{fold}']
            model_folder = row[f'model_folder_fold_{fold}']
            if not os.path.exists(os.path.join(base_directory, 'logs', model_folder, f'fold_{fold}')):
                if not os.path.exists(os.path.join(base_directory, 'logs', f'{model_folder}_best', f'fold_{fold}')):
                    # the folder we need not exists
                    print(f'fold {fold} in folder {model_folder} is not exists')
                    continue
                else:
                    model_folder = f'{model_folder}_best'
            model_version_num = row[f'model_version_num_fold_{fold}']
            model_file_name = f'{model_version_num}_{model_type_folder}_{model_name_folder}_fold_{fold}.pkl'
            if function_to_run == 'ExecuteEvalLSTM':
                inner_model_folder =\
                    f'{model_version_num}_{model_type_folder}_{model_name_folder}_100_epochs_fold_num_{fold}'
            else:
                inner_model_folder = ''
            trained_model_dir = os.path.join(base_directory, 'logs', model_folder, f'fold_{fold}', inner_model_folder)
            # if torch.cuda.is_available() or function_to_run != 'ExecuteEvalLSTM':
            trained_model = joblib.load(os.path.join(trained_model_dir, model_file_name))
            # else:
            #     trained_model = torch.load(os.path.join(trained_model_dir, model_file_name),
            #                                map_location=torch.device('cpu'))

            # get hyper parameters as dict
            if type(hyper_parameters_str) == str:
                hyper_parameters_dict = json.loads(hyper_parameters_str)
            else:
                hyper_parameters_dict = None

            metadata_dict = {'model_num': model_num, 'model_type': model_type, 'model_name': model_name,
                             'data_file_name': data_file_name, 'test_data_file_name': test_data_file_name,
                             'hyper_parameters_str': hyper_parameters_dict, 'fold': fold}

            metadata_df = pd.DataFrame.from_dict(metadata_dict, orient='index').T
            model_class = getattr(execute_cv_models, function_to_run)(
                model_num, fold, run_dir, model_type, model_name, data_file_name, fold_split_dict, table_writer,
                data_directory, hyper_parameters_dict, excel_models_results, trained_model_dir=trained_model_dir,
                trained_model=trained_model, model_file_name=model_file_name, test_data_file_name=test_data_file_name,
                predict_type='test')
            model_class.load_data_create_model()
            model_class.predict()
            results_dict = model_class.eval_model()
            results_df = pd.DataFrame.from_dict(results_dict).T
            results_df['raisha_round'] = results_df.index
            results_df[['Raisha', 'Round']] = results_df.raisha_round.str.split(expand=True)
            results_df = results_df.drop('raisha_round', axis=1)
            results_df.index = np.zeros(shape=(results_df.shape[0],))
            results_df = metadata_df.join(results_df)
            all_models_results = pd.concat([all_models_results, results_df], sort='False')
            utils.write_to_excel(model_class.model_table_writer, 'Model results', ['Model results'], results_df)
            model_class.model_table_writer.save()

    utils.write_to_excel(table_writer, 'All models results', ['All models results'], all_models_results)
    table_writer.save()

    logging.info(f'Finish predict best models')
    print(f'Finish predict best models')
Exemplo n.º 5
0
import pandas as pd
import os
import utils
from datetime import datetime
import logging
import json
import execute_cv_models
import joblib
import numpy as np
import torch


base_directory = os.path.abspath(os.curdir)
condition = 'verbal'
data_directory = os.path.join(base_directory, 'data', condition, 'cv_framework')
run_dir = utils.set_folder(datetime.now().strftime(f'predict_best_models_%d_%m_%Y_%H_%M'), 'logs')


def predict_best_models(best_model_file_name: str):
    all_models_results = pd.DataFrame()
    best_models = pd.read_excel(os.path.join(base_directory, 'logs', best_model_file_name), sheet_name='table_to_load')
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    participants_fold = pd.read_csv(os.path.join(data_directory, 'pairs_folds_new_test_data.csv'))
    participants_fold.index = participants_fold.pair_id
    excel_models_results = utils.set_folder(folder_name='excel_best_models_results', father_folder_path=run_dir)
    table_writer = pd.ExcelWriter(os.path.join(excel_models_results, f'Results_test_data_best_models.xlsx'),
                                  engine='xlsxwriter')
    log_file_name = os.path.join(run_dir, f'LogFile.log')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename=log_file_name,
def execute_fold_parallel(participants_fold: pd.Series,
                          fold: int,
                          cuda_device: str,
                          hyper_parameters_tune_mode: bool = False,
                          three_losses: bool = False,
                          leaky_relu: bool = False):
    """
    This function get a dict that split the participant to train-val-test (for this fold) and run all the models
    we want to compare --> it train them using the train data and evaluate them using the val data
    :param participants_fold: split the participant to train-val-test (for this fold)
    :param fold: the fold number
    :param cuda_device: the number of cuda device if using it
    :param hyper_parameters_tune_mode: after find good data - hyper parameter tuning
    :param three_losses: if we want 3 losses for avg_turn models
    :param leaky_relu: if we wan to use leaky_relu in linear layers
    :return:
    """
    # get the train, test, validation participant code for this fold
    os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device
    fold_split_dict = dict()
    for data_set in ['train', 'test', 'validation']:
        fold_split_dict[data_set] = participants_fold.loc[
            participants_fold == data_set].index.tolist()

    # models_to_compare should have for each row:
    # model_num, model_type, model_name, function_to_run, data_file_name, hyper_parameters
    # (strings of all parameters for the running function as dict: {'parameter_name': parameter_value})
    models_to_compare = pd.read_excel(os.path.join(
        base_directory, 'models_to_hyper_parameters.xlsx'),
                                      sheet_name='table_to_load',
                                      skiprows=[0])
    fold_dir = utils.set_folder(f'fold_{fold}', run_dir)
    excel_models_results = utils.set_folder(folder_name='excel_models_results',
                                            father_folder_path=fold_dir)
    # for test
    test_fold_dir = utils.set_folder(f'fold_{fold}', test_dir)
    excel_test_models_results = utils.set_folder(
        folder_name='excel_best_models_results',
        father_folder_path=test_fold_dir)
    test_participants_fold = pd.read_csv(
        os.path.join(data_directory, pair_folds_file_name))
    test_participants_fold.index = test_participants_fold.pair_id
    test_table_writer = pd.ExcelWriter(os.path.join(
        excel_test_models_results, f'Results_test_data_best_models.xlsx'),
                                       engine='xlsxwriter')
    # table_writer = pd.ExcelWriter(os.path.join(excel_models_results, f'Results_fold_{fold}_all_models.xlsx'),
    #                               engine='xlsxwriter')
    table_writer = None
    log_file_name = os.path.join(fold_dir, f'LogFile_fold_{fold}.log')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(
        filename=log_file_name,
        level=logging.DEBUG,
        format='%(asctime)s: %(levelname)s %(message)s',
        datefmt='%H:%M:%S',
    )
    # all_model_types = models_to_compare.model_type.unique()
    # all_model_types = ['LSTM_avg', 'LSTM_avg_turn', 'Transformer_avg_turn', 'Transformer_avg',
    #                    'LSTM_avg_turn_linear', 'Attention_avg']
    # all_model_types = ['Attention_avg']
    all_model_nums = list(set(models_to_compare.model_num))
    # already_trained_models = list(range(15, 21)) + list(range(11))
    # all_model_nums = [x for x in all_model_nums if x not in already_trained_models]
    all_model_nums = [78, 79, 80] + list(range(84, 87))
    # all_model_nums = [23, 24, 30, 31] + list(range(54, 63)) + list(range(69, 78)) + list(range(81, 84)) +\
    #                  list(range(163, 166)) + list(range(178, 181))
    # all_model_nums = list(range(34, 38)) + [40] + list(range(192, 195)) + list(range(90, 94)) + list(range(100, 103))
    all_model_nums = [36]

    all_models_results = pd.DataFrame()
    all_models_prediction_results = pd.DataFrame()

    for model_num in all_model_nums:  # compare all versions of each model type
        # if model_num != 79:
        #     continue
        model_type_versions = models_to_compare.loc[models_to_compare.model_num
                                                    == model_num]
        model_num_results_path = os.path.join(
            excel_models_results, f'model_num_results_{model_num}.pkl')
        if not os.path.isfile(model_num_results_path):
            model_num_results = pd.DataFrame(columns=[
                'model_num', 'model_name', 'model_type',
                'hyper_parameters_str', 'data_file_name', 'RMSE', 'Raisha',
                'Round'
            ])
            joblib.dump(model_num_results, model_num_results_path)
        for index, row in model_type_versions.iterrows(
        ):  # iterate over all the models to compare
            # get all model parameters
            model_type = row['model_type']
            model_name = row['model_name']

            if leaky_relu:
                model_name = model_name + '_leaky'
                model_num += 600

            # for 3 losses:
            if '_avg_turn' in model_type and three_losses:
                model_num += 700
                model_name = row['model_name'] + '_3_losses'
                avg_turn_gridsearch_params_inner = [{
                    'avg_loss': 1.0,
                    'turn_loss': 1.0,
                    'avg_turn_loss': 1.0
                }, {
                    'avg_loss': 2.0,
                    'turn_loss': 2.0,
                    'avg_turn_loss': 1.0
                }, {
                    'avg_loss': 1.0,
                    'turn_loss': 1.0,
                    'avg_turn_loss': 2.0
                }]
            else:
                avg_turn_gridsearch_params_inner = avg_turn_gridsearch_params

            function_to_run = row['function_to_run']
            data_file_name = row['data_file_name']
            test_data_file_name = row['test_data_file_name']
            hyper_parameters_str = row['hyper_parameters']
            # get hyper parameters as dict
            if type(hyper_parameters_str) == str:
                hyper_parameters_dict = json.loads(hyper_parameters_str)
            else:
                hyper_parameters_dict = None

            if hyper_parameters_dict is not None and 'features_max_size' in hyper_parameters_dict.keys(
            ):
                if int(hyper_parameters_dict['features_max_size']) > 3000:
                    continue

            if outer_is_debug:
                hyper_parameters_dict['num_epochs'] = 2
            else:
                hyper_parameters_dict['num_epochs'] = 100

            # each function need to get: model_num, fold, fold_dir, model_type, model_name, data_file_name,
            # fold_split_dict, table_writer, data_directory, hyper_parameters_dict.
            # During running it needs to write the predictions to the table_writer and save the trained model with
            # the name: model_name_model_num to the fold_dir.
            # it needs to return a dict with the final results over the evaluation data: {measure_name: measure}
            if hyper_parameters_tune_mode:
                if 'LSTM' in model_type or 'Transformer' in model_type:
                    if 'LSTM' in model_type and 'use_transformer' not in model_type:
                        greadsearch = lstm_gridsearch_params
                    else:  # for Transformer models and LSTM_use_transformer models
                        greadsearch = transformer_gridsearch_params
                    for i, parameters_dict in enumerate(greadsearch):
                        # if i > 1:
                        #     continue

                        new_hyper_parameters_dict = copy.deepcopy(
                            hyper_parameters_dict)
                        new_hyper_parameters_dict.update(parameters_dict)
                        if 'linear' in model_type and 'lstm_hidden_dim' in new_hyper_parameters_dict:
                            new_hyper_parameters_dict['linear_hidden_dim'] = \
                                int(0.5 * int(new_hyper_parameters_dict['lstm_hidden_dim']))
                        if '_avg_turn' in model_type:
                            for inner_i, inner_parameters_dict in enumerate(
                                    avg_turn_gridsearch_params_inner):
                                # if inner_i > 0:
                                #     break
                                new_hyper_parameters_dict.update(
                                    inner_parameters_dict)
                                new_model_name = f'{model_name}'
                                new_model_num = f'{model_num}_{i}_{inner_i}'
                                if os.path.isfile(
                                        os.path.join(
                                            excel_models_results,
                                            f'Results_fold_{fold}_model_{new_model_num}.xlsx'
                                        )):
                                    continue
                                all_models_results = execute_create_fit_predict_eval_model(
                                    function_to_run, new_model_num, fold,
                                    fold_dir, model_type, new_model_name,
                                    data_file_name, fold_split_dict,
                                    table_writer, new_hyper_parameters_dict,
                                    excel_models_results, all_models_results,
                                    model_num_results_path)
                        else:
                            new_model_name = f'{model_name}'
                            new_model_num = f'{model_num}_{i}'
                            if os.path.isfile(
                                    os.path.join(
                                        excel_models_results,
                                        f'Results_fold_{fold}_model_{new_model_num}.xlsx'
                                    )):
                                continue
                            all_models_results = execute_create_fit_predict_eval_model(
                                function_to_run, new_model_num, fold, fold_dir,
                                model_type, new_model_name, data_file_name,
                                fold_split_dict, table_writer,
                                new_hyper_parameters_dict,
                                excel_models_results, all_models_results,
                                model_num_results_path)
                elif 'SVM' in model_type or 'Baseline' in model_type:
                    num_iterates = 1
                    if 'baseline' in model_name or 'Baseline' in model_type:
                        svm_gridsearch_params_inner = [{}]
                    else:
                        svm_gridsearch_params_inner = svm_gridsearch_params
                    if 'stratified' in model_name:
                        num_iterates = 5000
                    for i, parameters_dict in enumerate(
                            svm_gridsearch_params_inner):
                        # if i > 0:
                        #     continue
                        new_hyper_parameters_dict = copy.deepcopy(
                            hyper_parameters_dict)
                        new_hyper_parameters_dict.update(parameters_dict)
                        new_model_name = f'{model_name}'
                        new_model_num = f'{model_num}_{i}'
                        if os.path.isfile(
                                os.path.join(
                                    excel_models_results,
                                    f'Results_fold_{fold}_model_{new_model_num}.xlsx'
                                )):
                            continue
                        all_models_results = execute_create_fit_predict_eval_model(
                            function_to_run,
                            new_model_num,
                            fold,
                            fold_dir,
                            model_type,
                            new_model_name,
                            data_file_name,
                            fold_split_dict,
                            table_writer,
                            new_hyper_parameters_dict,
                            excel_models_results,
                            all_models_results,
                            model_num_results_path,
                            num_iterates=num_iterates)
                elif 'CRF' in model_type:
                    for i, parameters_dict in enumerate(crf_gridsearch_params):
                        # if i > 0:
                        #     continue
                        new_hyper_parameters_dict = copy.deepcopy(
                            hyper_parameters_dict)
                        new_hyper_parameters_dict.update(parameters_dict)
                        new_model_name = f'{model_name}'
                        new_model_num = f'{model_num}_{i}'
                        if os.path.isfile(
                                os.path.join(
                                    excel_models_results,
                                    f'Results_fold_{fold}_model_{new_model_num}.xlsx'
                                )):
                            continue
                        all_models_results = execute_create_fit_predict_eval_model(
                            function_to_run, new_model_num, fold, fold_dir,
                            model_type, new_model_name, data_file_name,
                            fold_split_dict, table_writer,
                            new_hyper_parameters_dict, excel_models_results,
                            all_models_results, model_num_results_path)
                else:
                    print(
                        'Model type must be LSTM-kind, Transformer-kind, CRF-kind or SVM-kind'
                    )

                # select the best hyper-parameters set for this model based on the RMSE
                model_num_results = joblib.load(model_num_results_path)
                if model_num_results.empty:
                    continue
                argmin_index = model_num_results.RMSE.argmin()
                best_model = model_num_results.iloc[argmin_index]
                model_version_num = best_model.model_num
                logging.info(
                    f'Best model version for model {model_num}-{model_name} in fold {fold} is: '
                    f'{model_version_num}. Start predict over test data')
                print(
                    f'Best model version for model {model_num}-{model_name} in fold {fold} is: '
                    f'{model_version_num}. Start predict over test data')

                # predict on test data using the best version of this model
                test_fold_split_dict = dict()
                test_pair_ids_in_fold = test_participants_fold[f'fold_{fold}']
                for data_set in ['train', 'test', 'validation']:
                    test_fold_split_dict[data_set] = \
                        test_pair_ids_in_fold.loc[test_pair_ids_in_fold == data_set].index.tolist()
                hyper_parameters_str = best_model.hyper_parameters_str
                model_folder = run_dir
                if not os.path.exists(
                        os.path.join(base_directory, 'logs', model_folder,
                                     f'fold_{fold}')):
                    if not os.path.exists(
                            os.path.join(base_directory, 'logs',
                                         f'{model_folder}_best',
                                         f'fold_{fold}')):
                        # the folder we need not exists
                        print(
                            f'fold {fold} in folder {model_folder} is not exists'
                        )
                        continue
                    else:
                        model_folder = f'{model_folder}_best'
                # get hyper parameters as dict
                if type(hyper_parameters_str) == str:
                    hyper_parameters_dict = json.loads(hyper_parameters_str)
                elif type(hyper_parameters_str) == dict:
                    hyper_parameters_dict = hyper_parameters_str
                else:
                    hyper_parameters_dict = None
                    print('no hyper parameters dict')

                num_epochs = hyper_parameters_dict['num_epochs']

                model_file_name = f'{model_version_num}_{model_type}_{model_name}_fold_{fold}.pkl'
                if function_to_run == 'ExecuteEvalLSTM':
                    inner_model_folder = \
                        f'{model_version_num}_{model_type}_{model_name}_{num_epochs}_epochs_fold_num_{fold}'
                else:
                    inner_model_folder = ''
                trained_model_dir = os.path.join(base_directory, 'logs',
                                                 model_folder, f'fold_{fold}',
                                                 inner_model_folder)
                # if torch.cuda.is_available() or function_to_run != 'ExecuteEvalLSTM':
                trained_model = joblib.load(
                    os.path.join(trained_model_dir, model_file_name))
                # else:
                #     trained_model = torch.load(os.path.join(trained_model_dir, model_file_name),
                #                                map_location=torch.device('cpu'))

                metadata_dict = {
                    'model_num': model_num,
                    'model_type': model_type,
                    'model_name': model_name,
                    'data_file_name': data_file_name,
                    'test_data_file_name': test_data_file_name,
                    'hyper_parameters_str': hyper_parameters_dict,
                    'fold': fold,
                    'best_model_version_num': model_version_num
                }

                metadata_df = pd.DataFrame.from_dict(metadata_dict,
                                                     orient='index').T
                model_class = getattr(execute_cv_models, function_to_run)(
                    model_num, fold, test_fold_dir, model_type, model_name,
                    data_file_name, test_fold_split_dict, test_table_writer,
                    data_directory, hyper_parameters_dict,
                    excel_test_models_results, trained_model,
                    trained_model_dir, model_file_name, test_data_file_name,
                    'test')

                model_class.load_data_create_model()
                model_class.predict()
                results_dict = model_class.eval_model()
                results_df = pd.DataFrame.from_dict(results_dict).T
                results_df['raisha_round'] = results_df.index
                results_df[['Raisha', 'Round'
                            ]] = results_df.raisha_round.str.split(expand=True)
                results_df = results_df.drop('raisha_round', axis=1)
                results_df.index = np.zeros(shape=(results_df.shape[0], ))
                results_df = metadata_df.join(results_df)
                all_models_prediction_results = pd.concat(
                    [all_models_prediction_results, results_df], sort='False')
                utils.write_to_excel(model_class.model_table_writer,
                                     'Model results', ['Model results'],
                                     results_df)
                model_class.model_table_writer.save()

            else:  # no hyper parameters
                all_models_results = execute_create_fit_predict_eval_model(
                    function_to_run, model_num, fold, fold_dir, model_type,
                    model_name, data_file_name, fold_split_dict, table_writer,
                    hyper_parameters_dict, excel_models_results,
                    all_models_results, model_num_results_path)

    utils.write_to_excel(table_writer, 'All models results',
                         ['All models results'], all_models_results)
    if table_writer is not None:
        table_writer.save()
    if test_table_writer is not None:
        utils.write_to_excel(test_table_writer, 'All models results',
                             ['All models results'],
                             all_models_prediction_results)
        test_table_writer.save()

    logging.info(f'fold {fold} finish compare models')
    print(f'fold {fold} finish compare models')

    return f'fold {fold} finish compare models'