Exemplo n.º 1
0
        # Return single list for one user, instead of list of lists
        if single_user:
            ranking_list = ranking_list[0]

        if return_scores:
            return ranking_list, scores_batch

        else:
            return ranking_list


if __name__ =='__main__':
    seed = 1205
    parser = DataParser('../data')
    URM_all = parser.get_URM_all()
    ICM_all = parser.get_ICM_all()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85, seed=seed)

    f_range = (0, 2)

    # --------------------
    URM_test = parser.filter_URM_test_by_range(URM_train, URM_test, f_range)
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    recommender = PipeHybrid001(URM_train, ICM_all)
    recommender.fit(topK=946, alpha=0.47193263239089045, beta=0.0316773658685341, normalize_similarity=False)

    result, _ = evaluator_test.evaluateRecommender(recommender)
    print(result)

Exemplo n.º 2
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85)
    """
    26-10-2020
    > OPTIMIZATION ON THE RANGE [200, +INF)
    
    Already done optimizations:
    >
    
    RECOMMENDER I'AM CONSIDERING (the fastest up to now)
    > PureSVD
    > ItemKNNCBF
    > ItemKNNCF
    > UserKNNCF
    > P3A
    > RP3beta
    """
    f_range = (200, -1)

    URM_validation = parser.filter_URM_test_by_range(URM_train, URM_validation,
                                                     f_range)
    URM_test = parser.filter_URM_test_by_range(URM_train, URM_test, f_range)
    output_folder_path = "result_experiments_v2/" + "range_" + str(
        f_range[0]) + "-" + str(f_range[1]) + "/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
    """
    collaborative_algorithm_list = [
        #EASE_R_Recommender
        PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
    ]

    content_algorithm_list= [
        #ItemKNNCBFRecommender
    ]
    """

    algorithm_in_sequence = [(ItemKNNCFRecommender, 'CF'),
                             (UserKNNCFRecommender, 'CF'),
                             (P3alphaRecommender, 'CF'),
                             (RP3betaRecommender, 'CF'),
                             (PureSVDRecommender, 'CF'),
                             (ItemKNNCBFRecommender, 'CBF')]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    for algo, type in algorithm_in_sequence:
        print(F"OPTIMIZING {algo.RECOMMENDER_NAME} - {type}")
        if type == 'CF':
            collaborative_algorithm_list = []
            collaborative_algorithm_list.append(algo)

            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation_earlystopping=evaluator_validation,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,  #LOOOK AT HEREEEEEEEEEEEEEEEEE
                parallelizeKNN=False)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)

        elif type == 'CBF':
            content_algorithm_list = []
            content_algorithm_list.append(algo)
            runParameterSearch_Content_partial = partial(
                runParameterSearch_Content,
                URM_train=URM_train,
                ICM_object=ICM_obj,
                ICM_name='BookFeatures',
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                metric_to_optimize="MAP",
                parallelizeKNN=False,
                allow_weighting=True,
                #similarity_type_list=['cosine']
            )
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Content_partial,
                     content_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    URM_valid_hybrid = parser.filter_URM_test_by_range(URM_train,
                                                       URM_valid_hybrid,
                                                       (3, -1))

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }
    
    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")
    
    
    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 753,
        'alpha': 0.3873710051288722,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 438,
        'alpha': 0.41923120471415165,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 565,
        'shrink': 554,
        'similarity': 'tversky',
        'normalize': True,
        'tversky_alpha': 1.9109121434662428,
        'tversky_beta': 1.7823834698905734
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    """
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 954, 'l1_ratio': 3.87446082207643e-05, 'alpha': 0.07562657698792305}
    sen.fit(**sen_params)
    print("Done")
    """

    list_recommender = [icb, icf, ucf, p3a, rp3b]
    list_already_seen = []

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '_3--1' + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                # similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)