Пример #1
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85)
    """
    26-10-2020
    > OPTIMIZATION ON THE RANGE [200, +INF)
    
    Already done optimizations:
    >
    
    RECOMMENDER I'AM CONSIDERING (the fastest up to now)
    > PureSVD
    > ItemKNNCBF
    > ItemKNNCF
    > UserKNNCF
    > P3A
    > RP3beta
    """
    f_range = (200, -1)

    URM_validation = parser.filter_URM_test_by_range(URM_train, URM_validation,
                                                     f_range)
    URM_test = parser.filter_URM_test_by_range(URM_train, URM_test, f_range)
    output_folder_path = "result_experiments_v2/" + "range_" + str(
        f_range[0]) + "-" + str(f_range[1]) + "/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
    """
    collaborative_algorithm_list = [
        #EASE_R_Recommender
        PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
    ]

    content_algorithm_list= [
        #ItemKNNCBFRecommender
    ]
    """

    algorithm_in_sequence = [(ItemKNNCFRecommender, 'CF'),
                             (UserKNNCFRecommender, 'CF'),
                             (P3alphaRecommender, 'CF'),
                             (RP3betaRecommender, 'CF'),
                             (PureSVDRecommender, 'CF'),
                             (ItemKNNCBFRecommender, 'CBF')]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    for algo, type in algorithm_in_sequence:
        print(F"OPTIMIZING {algo.RECOMMENDER_NAME} - {type}")
        if type == 'CF':
            collaborative_algorithm_list = []
            collaborative_algorithm_list.append(algo)

            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation_earlystopping=evaluator_validation,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,  #LOOOK AT HEREEEEEEEEEEEEEEEEE
                parallelizeKNN=False)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)

        elif type == 'CBF':
            content_algorithm_list = []
            content_algorithm_list.append(algo)
            runParameterSearch_Content_partial = partial(
                runParameterSearch_Content,
                URM_train=URM_train,
                ICM_object=ICM_obj,
                ICM_name='BookFeatures',
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                metric_to_optimize="MAP",
                parallelizeKNN=False,
                allow_weighting=True,
                #similarity_type_list=['cosine']
            )
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Content_partial,
                     content_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    # SPLIT TO GET THE sub_rec VALID PARTITION
    URM_train, URM_valid_sub = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    output_folder_path = "result_experiments_v3/seed_" + str(seed) + '/'

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        #EASE_R_Recommender
        #PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
        #LinearHybrid002
    ]

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10])
    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
        # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Content_partial = partial(runParameterSearch_Content,
                                                 URM_train=URM_train,
                                                 ICM_object=ICM_obj,
                                                 ICM_name='1BookFeatures',
                                                 n_cases = 50,
                                                 n_random_starts = 20,
                                                 evaluator_validation= evaluator_valid_sub,
                                                 evaluator_test = evaluator_valid_hybrid,
                                                 metric_to_optimize = "MAP",
                                                 output_folder_path=output_folder_path,
                                                 parallelizeKNN = False,
                                                 allow_weighting = True,
                                                 #similarity_type_list = ['cosine']
                                                 )
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)
    """

    # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        ICM_train=ICM_obj,
        metric_to_optimize="MAP",
        n_cases=50,
        n_random_starts=20,
        evaluator_validation_earlystopping=evaluator_valid_sub,
        evaluator_validation=evaluator_valid_sub,
        evaluator_test=evaluator_valid_hybrid,
        output_folder_path=output_folder_path,
        allow_weighting=False,
        #similarity_type_list = ["cosine", 'jaccard'],
        parallelizeKNN=False)
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                maxtasksperchild=1)
    pool.map(runParameterSearch_Collaborative_partial,
             collaborative_algorithm_list)
Пример #3
0
            user_id_array=user_id_array,
            cutoff=cutoff,
            remove_seen_flag=remove_seen_flag,
            items_to_compute=items_to_compute,
            remove_top_pop_flag=remove_top_pop_flag,
            remove_custom_items_flag=remove_custom_items_flag,
            return_scores=return_scores)

        return round_robin_merging([ranking_list1, ranking_list2])


if __name__ == '__main__':
    seed = 1205
    parser = DataParser('../data')
    URM_all = parser.get_URM_all()
    ICM_all = parser.get_ICM_all()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.85, seed=seed)

    recommender = ListMerged001(URM_train)
    recommender.fit()
    result_dict = evaluator(recommender, URM_test, cutoff=10)
    print("Round robin:")
    print(result_dict)

    result_dict = evaluator(recommender.ger_recommenders()[0],
                            URM_test,
                            cutoff=10)
    print("RP3beta:")
    print(result_dict)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    # SPLIT TO GET THE sub_rec VALID PARTITION
    URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    collaborative_algorithm_list = [
        #EASE_R_Recommender
        #PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
        #LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10])
    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
        # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Content_partial = partial(runParameterSearch_Content,
                                                 URM_train=URM_train,
                                                 ICM_object=ICM_obj,
                                                 ICM_name='1BookFeatures',
                                                 n_cases = 50,
                                                 n_random_starts = 20,
                                                 evaluator_validation= evaluator_valid_sub,
                                                 evaluator_test = evaluator_valid_hybrid,
                                                 metric_to_optimize = "MAP",
                                                 output_folder_path=output_folder_path,
                                                 parallelizeKNN = False,
                                                 allow_weighting = True,
                                                 #similarity_type_list = ['cosine']
                                                 )
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 1000,
        'alpha': 0.38192761611274967,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 131,
        'alpha': 0.33660811631883863,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 100,
        'shrink': 1000,
        'similarity': 'asymmetric',
        'normalize': True,
        'asymmetric_alpha': 0.0
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    print("SlimBPR training...")
    sbpr = SLIM_BPR_Cython(URM_train, verbose=False)
    sbpr_params = {
        'topK': 979,
        'epochs': 130,
        'symmetric': False,
        'sgd_mode': 'adam',
        'lambda_i': 0.004947329669424629,
        'lambda_j': 1.1534760845071758e-05,
        'learning_rate': 0.0001
    }
    sbpr.fit(**sbpr_params)
    print("Done")
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {
        'topK': 992,
        'l1_ratio': 0.004065081925341167,
        'alpha': 0.003725005053334143
    }
    sen.fit(**sen_params)
    print("Done")

    list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr]
    list_already_seen = [rp3b, p3a, icf, ucf, icb]

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                #similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    parser = DataParser()
    seed = 1666
    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.85, seed=seed)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    k = 5

    output_folder_path = "result_experiments_CV/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative = True

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    collaborative_algorithm_list = [
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender,
        #IALSRecommender,
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    if not collaborative:
        runParameterSearch_Content_partial = partial(
            runParameterSearch_Content,
            URM_train=URM_train,
            ICM_object=ICM_obj,
            ICM_name='1BookFeatures',
            n_cases=50,
            n_random_starts=20,
            metric_to_optimize="MAP",
            output_folder_path=output_folder_path,
            parallelizeKNN=False,
            allow_weighting=True,
            #similarity_type_list = ['cosine']
            k=k,
            seed=seed)

        pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                    maxtasksperchild=1)
        pool.map(runParameterSearch_Content_partial, content_algorithm_list)

    else:
        runParameterSearch_Collaborative_partial = partial(
            runParameterSearch_Collaborative,
            URM_train=URM_train,
            metric_to_optimize="MAP",
            n_cases=50,
            n_random_starts=20,
            #evaluator_test = evaluator_test,
            output_folder_path=output_folder_path,
            similarity_type_list=["cosine"],
            parallelizeKNN=False,
            allow_weighting=False,
            k=k,
            seed=seed)

        pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                    maxtasksperchild=1)
        pool.map(runParameterSearch_Collaborative_partial,
                 collaborative_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    URM_valid_hybrid = parser.filter_URM_test_by_range(URM_train,
                                                       URM_valid_hybrid,
                                                       (3, -1))

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }
    
    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")
    
    
    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 753,
        'alpha': 0.3873710051288722,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 438,
        'alpha': 0.41923120471415165,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 565,
        'shrink': 554,
        'similarity': 'tversky',
        'normalize': True,
        'tversky_alpha': 1.9109121434662428,
        'tversky_beta': 1.7823834698905734
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    """
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 954, 'l1_ratio': 3.87446082207643e-05, 'alpha': 0.07562657698792305}
    sen.fit(**sen_params)
    print("Done")
    """

    list_recommender = [icb, icf, ucf, p3a, rp3b]
    list_already_seen = []

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '_3--1' + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                # similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85,
                                                                              seed=seed)

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }

    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")


    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """

    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False}
    try:
        rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')
        print(f"{rp3b.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {rp3b.RECOMMENDER_NAME} ...")
        rp3b.fit(**rp3b_params)
        print(f"done.")
        rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')

    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False}
    try:
        p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')
        print(f"{p3a.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {p3a.RECOMMENDER_NAME} ...")
        p3a.fit(**p3a_params)
        print(f"done.")
        p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')

    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0}
    try:
        icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')
        print(f"{icf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icf.fit(**icf_params)
        print(f"done.")
        icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')

    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True}
    try:
        ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')
        print(f"{ucf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {ucf.RECOMMENDER_NAME} ...")
        ucf.fit(**ucf_params)
        print(f"done.")
        ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')

    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True}
    try:
        icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')
        print(f"{icb.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icb.fit(**icb_params)
        print(f"done.")
        icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')

    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143}
    try:
        sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')
        print(f"{sen.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {sen.RECOMMENDER_NAME} ...")
        sen.fit(**sen_params)
        print(f"done.")
        sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')

    print("\nStart.")
    list_recommender = [icb, icf, ucf, p3a, rp3b, sen]
    list_already_seen = []
    combinations_already_seen = []
    """
    (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen),
                                (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen)
    """

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations_already_seen:

            recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/linear_combination/' + recommender_names + '/'
            print(F"\nTESTING THE COMBO {recommender_names}")

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                               URM_train=URM_train,
                                                               ICM_train=ICM_obj,
                                                               metric_to_optimize="MAP",
                                                               n_cases=50,
                                                               n_random_starts=20,
                                                               evaluator_validation_earlystopping=evaluator_valid_hybrid,
                                                               evaluator_validation=evaluator_valid_hybrid,
                                                               #evaluator_test=evaluator_test,
                                                               output_folder_path=output_folder_path,
                                                               allow_weighting=False,
                                                               # similarity_type_list = ["cosine", 'jaccard'],
                                                               parallelizeKNN=False,
                                                               list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    parser = DataParser()
    seed = 1666
    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    #URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85, seed=seed)
    #URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85, seed=seed)
    k = 5

    output_folder_path = "result_experiments_CV/"

    collaborative_algorithm_list = [
        HybridCombinationSearchCV
        #HybridSuperLinear
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    icb = (ItemKNNCBFRecommender), {
        'topK': 164,
        'shrink': 8,
        'similarity': 'jaccard',
        'normalize': True
    }
    icbsup = (SpecialItemKNNCBFRecommender), {
        'topK': 1000,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icbcf = (ItemKNN_CBF_CF), {
        'topK': 1000,
        'shrink': 1000,
        'similarity': 'asymmetric',
        'normalize': True,
        'asymmetric_alpha': 0.241892724784089,
        'feature_weighting': 'TF-IDF',
        'icm_weight': 1.0
    }
    icf = (ItemKNNCFRecommender), {
        'topK': 1000,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'TF-IDF'
    }
    ucf = (UserKNNCFRecommender), {
        'topK': 163,
        'shrink': 846,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'TF-IDF'
    }
    p3a = (RP3betaRecommender), {
        'topK': 926,
        'alpha': 0.4300109351916609,
        'beta': 0.01807360750913967,
        'normalize_similarity': False
    }
    rp3b = (P3alphaRecommender), {
        'topK': 575,
        'alpha': 0.48009885897470206,
        'normalize_similarity': False
    }
    sbpr = (SLIM_BPR_Cython, {
        'topK': 1000,
        'epochs': 130,
        'symmetric': False,
        'sgd_mode': 'adam',
        'lambda_i': 1e-05,
        'lambda_j': 1e-05,
        'learning_rate': 0.0001
    })
    sslim = (SSLIMElasticNet, {
        'beta': 0.567288665094892,
        'topK': 1000,
        'l1_ratio': 1e-05,
        'alpha': 0.001
    })

    combo_algorithm_list = [
        icb, icbsup, icbcf, icf, ucf, p3a, rp3b, sbpr, sslim
    ]
    list_already_seen = []
    combinations_already_seen = combinations(list_already_seen, 3)
    """
    (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen),
                                (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen)
    """
    combination_to_be_done = list(combinations(combo_algorithm_list, 3))

    for rec_perm in combination_to_be_done:

        if rec_perm not in combinations_already_seen:
            recommender_names = '_'.join(
                [r[0].RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_CV2/seed_" + str(
                seed) + '/linear/' + recommender_names + '/'
            print(F"\nTESTING THE COMBO {recommender_names}")

            if ((icb in rec_perm) or
                (icbsup in rec_perm)) and not ((icb in rec_perm) and
                                               (icbsup in rec_perm)):
                # If directory does not exist, create
                if not os.path.exists(output_folder_path):
                    os.makedirs(output_folder_path)

                    runParameterSearch_Collaborative_partial = partial(
                        runParameterSearch_Collaborative,
                        URM_train=URM_all,
                        ICM_train=ICM_obj,
                        metric_to_optimize="MAP",
                        n_cases=50,
                        n_random_starts=20,
                        output_folder_path=output_folder_path,
                        parallelizeKNN=False,
                        allow_weighting=False,
                        k=k,
                        seed=seed,
                        list_rec=rec_perm,
                        level='hybrid_search')
                    pool = multiprocessing.Pool(processes=int(
                        multiprocessing.cpu_count()),
                                                maxtasksperchild=1)
                    pool.map(runParameterSearch_Collaborative_partial,
                             collaborative_algorithm_list)