def run_parameter_search_p3alpha_wsparse(recommender_class, URM_train, item_W_sparse, user_W_sparse,
                                         evaluator_validation, metric_to_optimize="MAP", n_cases=60, n_random_starts=20,
                                         output_folder_path="result_experiments/"):
    parameterSearch = SearchBayesianSkopt(recommender_class,
                                          evaluator_validation=evaluator_validation)

    hyperparameters_range_dictionary = {"topK": Integer(5, 1000),
                                        "alpha": Real(low=0, high=2, prior='uniform'),
                                        "normalize_similarity": Categorical([True, False])}

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, user_W_sparse, item_W_sparse],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={}
    )

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    parameterSearch.search(recommender_input_args,
                           recommender_input_args_last_test=None,
                           parameter_search_space=hyperparameters_range_dictionary,
                           n_cases=n_cases,
                           n_random_starts=n_random_starts,
                           save_model="no",
                           output_folder_path=output_folder_path,
                           output_file_name_root=recommender_class.RECOMMENDER_NAME,
                           metric_to_optimize=metric_to_optimize
                           )
def run_parameter_search_field_ICM_weight(
        URM_train,
        ICM_train,
        base_recommender_class,
        base_recommender_parameter,
        item_feature_to_range_mapper,
        output_folder_path="result_experiments/",
        evaluator_validation=None,
        evaluator_test=None,
        n_cases=35,
        n_random_starts=5,
        metric_to_optimize="MAP"):
    recommender_class = SearchFieldWeightICMRecommender

    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    output_file_name_root = recommender_class.RECOMMENDER_NAME

    hyperparameters_range_dictionary = {}
    for user_feature_name in item_feature_to_range_mapper.keys():
        hyperparameters_range_dictionary[user_feature_name] = Real(
            low=0, high=2, prior="uniform")

    # Set args for recommender
    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[
            URM_train, ICM_train, base_recommender_class,
            base_recommender_parameter, item_feature_to_range_mapper
        ],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={})

    parameterSearch = SearchBayesianSkopt(
        recommender_class, evaluator_validation=evaluator_validation)

    parameterSearch.search(
        recommender_input_args,
        parameter_search_space=hyperparameters_range_dictionary,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root,
        metric_to_optimize=metric_to_optimize,
        save_model="no")
def run_parameter_search(URM_train,
                         ICM_all,
                         W_sparse_CF,
                         evaluator_test,
                         metric_to_optimize="MAP",
                         n_cases=10,
                         n_random_starts=3,
                         output_folder_path="result_experiments/"):
    recommender_class = CFW_D_Similarity_Linalg

    parameterSearch = SearchBayesianSkopt(recommender_class,
                                          evaluator_validation=evaluator_test)

    hyperparameters_range_dictionary = {
        "topK": Integer(1, 2000),
        "add_zeros_quota": Real(low=0, high=0.1, prior='uniform'),
        "normalize_similarity": Categorical([True, False])
    }

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_all, W_sparse_CF],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={})

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Clone data structure to perform the fitting with the best hyper parameters on train + validation data
    recommender_input_args_last_test = recommender_input_args.copy()
    recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train

    parameterSearch.search(
        recommender_input_args,
        recommender_input_args_last_test=recommender_input_args_last_test,
        parameter_search_space=hyperparameters_range_dictionary,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        save_model="no",
        output_folder_path=output_folder_path,
        output_file_name_root=recommender_class.RECOMMENDER_NAME,
        metric_to_optimize=metric_to_optimize)
def run_parameter_search_VNN(recommender_class, URM_train,
                             metric_to_optimize="PRECISION",
                             evaluator_validation=None, evaluator_test=None,
                             output_folder_path="result_experiments/", parallelizeKNN=True,
                             n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best",
                             allow_weighting=True,
                             similarity_type_list=None):
    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    output_file_name_root = recommender_class.RECOMMENDER_NAME

    parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation,
                                          evaluator_test=evaluator_test)

    if similarity_type_list is None:
        similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"]

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={}
    )

    recommender_input_args_last_test = None
    run_KNNCFRecommender_on_similarity_type_partial = partial(run_VKNNRecommender_on_similarity_type,
                                                              recommender_input_args=recommender_input_args,
                                                              parameter_search_space={},
                                                              parameterSearch=parameterSearch,
                                                              n_cases=n_cases,
                                                              n_random_starts=n_random_starts,
                                                              resume_from_saved=resume_from_saved,
                                                              save_model=save_model,
                                                              output_folder_path=output_folder_path,
                                                              output_file_name_root=output_file_name_root,
                                                              metric_to_optimize=metric_to_optimize,
                                                              allow_weighting=allow_weighting,
                                                              recommender_input_args_last_test=recommender_input_args_last_test)

    if parallelizeKNN:
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1)
        pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list)

        pool.close()
        pool.join()

    else:

        for similarity_type in similarity_type_list:
            run_KNNCFRecommender_on_similarity_type_partial(similarity_type)

    return
Пример #5
0
def run_parameter_search_bagging(recommender_class, URM_train, constructor_kwargs, fit_kwargs,
                                 URM_train_last_test=None,
                                 n_cases=30, n_random_starts=5, resume_from_saved=False, save_model="no",
                                 evaluator_validation=None, evaluator_test=None,
                                 metric_to_optimize="PRECISION",
                                 output_folder_path="result_experiments/", parallelizeKNN=False):
    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    output_file_name_root = recommender_class.RECOMMENDER_NAME + "_" + \
                            constructor_kwargs['recommender_class'].RECOMMENDER_NAME

    parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation,
                                          evaluator_test=evaluator_test)

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
        CONSTRUCTOR_KEYWORD_ARGS=constructor_kwargs,
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS=fit_kwargs
    )

    hyperparameters_range = {}
    hyperparameters_range['num_models'] = Integer(10, 100)

    if recommender_class in [BaggingMergeItemSimilarityRecommender, BaggingMergeUserSimilarityRecommender]:
        hyperparameters_range['topK'] = Integer(low=1, high=3000)

    parameterSearch.search(recommender_input_args,
                           parameter_search_space=hyperparameters_range,
                           n_cases=n_cases,
                           n_random_starts=n_random_starts,
                           output_folder_path=output_folder_path,
                           output_file_name_root=output_file_name_root,
                           metric_to_optimize=metric_to_optimize,
                           save_model=save_model,
                           resume_from_saved=resume_from_saved)
Пример #6
0
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     URM_train_last_test=None,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_folder_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=35,
                                     n_random_starts=5,
                                     resume_from_saved=False,
                                     save_model="best",
                                     allow_weighting=True,
                                     similarity_type_list=None):
    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    earlystopping_keywargs = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation_earlystopping,
        "lower_validations_allowed": 5,
        "validation_metric": metric_to_optimize,
    }

    URM_train = URM_train.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = SearchBayesianSkopt(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, GlobalEffects, Random]:
            """
            TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed
            """

            parameterSearch = SearchSingleCase(
                recommender_class,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values={},
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
            )

            return

        ##########################################################################################################

        if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]:

            if similarity_type_list is None:
                similarity_type_list = [
                    'cosine', 'jaccard', "asymmetric", "dice", "tversky"
                ]

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNRecommender_on_similarity_type,
                recommender_input_args=recommender_input_args,
                parameter_search_space={},
                parameterSearch=parameterSearch,
                n_cases=n_cases,
                n_random_starts=n_random_starts,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize,
                allow_weighting=allow_weighting,
                recommender_input_args_last_test=
                recommender_input_args_last_test)

            if parallelizeKNN:
                pool = multiprocessing.Pool(
                    processes=multiprocessing.cpu_count(), maxtasksperchild=1)
                pool.map(run_KNNCFRecommender_on_similarity_type_partial,
                         similarity_type_list)

                pool.close()
                pool.join()

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is P3alphaRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary["beta"] = Real(low=0,
                                                            high=2,
                                                            prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical(
                [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["num_factors"] = Categorical(
                [200])
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical([1])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([1500])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["batch_size"] = Categorical(
                [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["positive_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["negative_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None
                })

        ##########################################################################################################

        if recommender_class is IALSRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary[
                "confidence_scaling"] = Categorical(["linear", "log"])
            hyperparameters_range_dictionary["alpha"] = Real(
                low=1e-3, high=50.0, prior='log-uniform')
            hyperparameters_range_dictionary["epsilon"] = Real(
                low=1e-3, high=10.0, prior='log-uniform')
            hyperparameters_range_dictionary["reg"] = Real(low=1e-5,
                                                           high=1e-2,
                                                           prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 1000)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is NMFRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 350)
            hyperparameters_range_dictionary["solver"] = Categorical(
                ["coordinate_descent", "multiplicative_update"])
            hyperparameters_range_dictionary["init_type"] = Categorical(
                ["random", "nndsvda"])
            hyperparameters_range_dictionary["beta_loss"] = Categorical(
                ["frobenius", "kullback-leibler"])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 2000)
            hyperparameters_range_dictionary["epochs"] = Categorical(
                [1200, 1500, 1700])
            hyperparameters_range_dictionary["symmetric"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            hyperparameters_range_dictionary["lambda_i"] = Real(
                low=1e-7, high=1e1, prior='log-uniform')
            hyperparameters_range_dictionary["lambda_j"] = Real(
                low=1e-7, high=1e1, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-6, high=1e-3, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None,
                    'train_with_sparse_weights': None
                })

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["l1_ratio"] = Real(
                low=1e-5, high=1.0, prior='log-uniform')
            hyperparameters_range_dictionary["alpha"] = Real(low=1e-3,
                                                             high=1.0,
                                                             prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        if URM_train_last_test is not None:
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test
        else:
            recommender_input_args_last_test = None

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(
            recommender_input_args,
            parameter_search_space=hyperparameters_range_dictionary,
            n_cases=n_cases,
            n_random_starts=n_random_starts,
            resume_from_saved=resume_from_saved,
            save_model=save_model,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize,
            recommender_input_args_last_test=recommender_input_args_last_test)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Пример #7
0
def run_parameter_search_mf_collaborative(
        recommender_class,
        URM_train,
        UCM_train=None,
        UCM_name="NO_UCM",
        ICM_train=None,
        ICM_name="NO_ICM",
        URM_train_last_test=None,
        metric_to_optimize="PRECISION",
        evaluator_validation=None,
        evaluator_test=None,
        evaluator_validation_earlystopping=None,
        output_folder_path="result_experiments/",
        parallelize_search=True,
        n_cases=35,
        n_random_starts=5,
        resume_from_saved=False,
        save_model="best",
        approximate_recommender=None):
    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    earlystopping_keywargs = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation_earlystopping,
        "lower_validations_allowed": 5,
        "validation_metric": metric_to_optimize,
    }

    URM_train = URM_train.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME + "_" + ICM_name + "_" + UCM_name

        parameterSearch = SearchBayesianSkopt(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            CONSTRUCTOR_KEYWORD_ARGS={},
            FIT_POSITIONAL_ARGS=[],
            FIT_KEYWORD_ARGS={})
        hyperparameters_range_dictionary = {}

        if recommender_class is ImplicitALSRecommender:
            hyperparameters_range_dictionary["num_factors"] = Integer(300, 550)
            hyperparameters_range_dictionary["regularization"] = Real(
                low=1e-2, high=200, prior='log-uniform')
            hyperparameters_range_dictionary["epochs"] = Categorical([50])
            hyperparameters_range_dictionary[
                "confidence_scaling"] = Categorical(["linear"])
            hyperparameters_range_dictionary["alpha"] = Real(
                low=1e-2, high=1e2, prior='log-uniform')

        if recommender_class is MF_BPR_Recommender:
            hyperparameters_range_dictionary["num_factors"] = Categorical(
                [600])
            hyperparameters_range_dictionary["regularization"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-2, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary["epochs"] = Categorical([300])

        if recommender_class is FunkSVDRecommender:
            hyperparameters_range_dictionary["num_factors"] = Integer(50, 400)
            hyperparameters_range_dictionary["regularization"] = Real(
                low=1e-8, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-6, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary["epochs"] = Categorical([300])

        if recommender_class is LogisticMFRecommender:
            hyperparameters_range_dictionary["num_factors"] = Integer(20, 400)
            hyperparameters_range_dictionary["regularization"] = Real(
                low=1e-5, high=1e1, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-2, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary["epochs"] = Categorical([300])

        if recommender_class is LightFMRecommender:
            recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[
                'UCM_train'] = UCM_train
            recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[
                'ICM_train'] = ICM_train

            hyperparameters_range_dictionary['no_components'] = Categorical(
                [100])
            hyperparameters_range_dictionary['epochs'] = Categorical([100])

            run_light_fm_search(parameterSearch,
                                recommender_input_args,
                                hyperparameters_range_dictionary,
                                URM_train_last_test=URM_train_last_test,
                                parallelize_search=parallelize_search,
                                n_cases=n_cases,
                                n_random_starts=n_random_starts,
                                output_folder_path=output_folder_path,
                                output_file_name_root=output_file_name_root,
                                metric_to_optimize=metric_to_optimize,
                                save_model=save_model)

        if recommender_class is FieldAwareFMRecommender:
            if approximate_recommender is None:
                raise ValueError("approximate_recommender has to be set")
            root_path = get_project_root_path()
            train_svm_file_path = os.path.join(root_path, "resources",
                                               "fm_data",
                                               "URM_ICM_UCM_uncompressed.txt")
            recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[
                'train_svm_file_path'] = train_svm_file_path
            recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[
                'approximate_recommender'] = approximate_recommender
            recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[
                'UCM_train'] = UCM_train
            recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[
                'ICM_train'] = ICM_train

            hyperparameters_range_dictionary['epochs'] = Categorical([200])
            hyperparameters_range_dictionary['latent_factors'] = Integer(
                low=20, high=500)
            hyperparameters_range_dictionary['regularization'] = Real(
                low=10e-7, high=10e-1, prior="log-uniform")
            hyperparameters_range_dictionary['learning_rate'] = Real(
                low=10e-3, high=10e-1, prior="log-uniform")

        if URM_train_last_test is not None:
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test
        else:
            recommender_input_args_last_test = None

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(
            recommender_input_args,
            parameter_search_space=hyperparameters_range_dictionary,
            n_cases=n_cases,
            n_random_starts=n_random_starts,
            resume_from_saved=resume_from_saved,
            save_model=save_model,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize,
            recommender_input_args_last_test=recommender_input_args_last_test)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Пример #8
0
def run_parameter_search_lightgbm(URM_train,
                                  X_train,
                                  y_train,
                                  X_test,
                                  y_test,
                                  cutoff_test,
                                  categorical_features=None,
                                  num_iteration=10000,
                                  early_stopping_iteration=150,
                                  objective="lambdarank",
                                  verbose=True,
                                  output_folder_path="result_experiments/",
                                  evaluator_validation=None,
                                  n_cases=35,
                                  n_random_starts=5,
                                  metric_to_optimize="MAP"):
    recommender_class = LightGBMRecommender

    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    output_file_name_root = recommender_class.RECOMMENDER_NAME

    hyperparameters_range_dictionary = {}
    hyperparameters_range_dictionary["learning_rate"] = Real(
        low=1e-6, high=1e-1, prior="log-uniform")
    hyperparameters_range_dictionary["min_gain_to_split"] = Real(
        low=1e-4, high=1e-1, prior="log-uniform")
    hyperparameters_range_dictionary["reg_l1"] = Real(low=1e-7,
                                                      high=1e1,
                                                      prior="log-uniform")
    hyperparameters_range_dictionary["reg_l2"] = Real(low=1e-7,
                                                      high=1e1,
                                                      prior="log-uniform")
    hyperparameters_range_dictionary["max_depth"] = Integer(low=4, high=100)
    hyperparameters_range_dictionary["min_data_in_leaf"] = Integer(low=5,
                                                                   high=100)
    hyperparameters_range_dictionary["bagging_freq"] = Integer(low=2, high=100)
    hyperparameters_range_dictionary["num_leaves"] = Integer(low=16, high=400)
    hyperparameters_range_dictionary["bagging_fraction"] = Real(
        low=0.1, high=0.9, prior="log-uniform")
    hyperparameters_range_dictionary["feature_fraction"] = Real(
        low=0.1, high=0.9, prior="log-uniform")

    # Set args for recommender
    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[
            URM_train, X_train, y_train, X_test, y_test, cutoff_test,
            categorical_features
        ],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={
            "num_iteration": num_iteration,
            "early_stopping_round": early_stopping_iteration,
            "verbose": verbose,
            "objective": objective
        })

    parameterSearch = SearchBayesianSkopt(
        recommender_class, evaluator_validation=evaluator_validation)

    parameterSearch.search(
        recommender_input_args,
        parameter_search_space=hyperparameters_range_dictionary,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root,
        metric_to_optimize=metric_to_optimize,
        save_model="best")
def runParameterSearch_QSLIM(URM_train,
                             solver,
                             n_reads=50,
                             filter_items_n=100,
                             URM_train_last_test=None,
                             metric_to_optimize="MAP",
                             evaluator_validation=None,
                             evaluator_test=None,
                             output_folder_path="result_experiments/",
                             parallelizeKNN=True,
                             n_cases=35,
                             n_random_starts=5,
                             resume_from_saved=False,
                             save_model="best",
                             item_selection_list=None):

    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
    output_file_name_root = QuantumSLIM_MSE.RECOMMENDER_NAME

    URM_train = URM_train.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    parameterSearch = SearchBayesianSkopt(
        QuantumSLIM_MSE,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test)

    if item_selection_list is None:
        item_selection_list = [
            x for x in QuantumSLIM_MSE.get_implemented_filter_item_methods()
            if x != "NONE"
        ]

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, solver, "NORM_MSE"],
        CONSTRUCTOR_KEYWORD_ARGS={"verbose": False},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={
            "filter_items_n": filter_items_n,
            "num_reads": n_reads
        })

    if URM_train_last_test is not None:
        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train_last_test
    else:
        recommender_input_args_last_test = None

    run_KNNCFRecommender_on_similarity_type_partial = partial(
        run_QSLIM_on_item_selection,
        recommender_input_args=recommender_input_args,
        parameter_search_space={},
        parameterSearch=parameterSearch,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        resume_from_saved=resume_from_saved,
        save_model=save_model,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root,
        metric_to_optimize=metric_to_optimize,
        recommender_input_args_last_test=recommender_input_args_last_test)

    if parallelizeKNN:
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(),
                                    maxtasksperchild=1)
        pool.map(run_KNNCFRecommender_on_similarity_type_partial,
                 item_selection_list)
        pool.close()
        pool.join()
    else:
        for similarity_type in item_selection_list:
            run_KNNCFRecommender_on_similarity_type_partial(similarity_type)

    return
def run_parameter_search_user_item_all(
        recommender_class,
        URM_train,
        UCM_train,
        ICM_train,
        UCM_name,
        ICM_name,
        metric_to_optimize="PRECISION",
        evaluator_validation=None,
        output_folder_path="result_experiments/",
        parallelizeKNN=True,
        n_cases=60,
        n_random_starts=10,
        similarity_type_list=None):
    # Create folder if it does not exist
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format(
        UCM_name) + "_{}".format(ICM_name)

    parameterSearch = SearchBayesianSkopt(
        recommender_class, evaluator_validation=evaluator_validation)

    if similarity_type_list is None:
        similarity_type_list = ['jaccard', 'asymmetric', "cosine"]

    # Set hyperparameters
    hyperparameters_range_dictionary = {
        "user_topK": Integer(5, 2000),
        "user_shrink": Integer(0, 2000),
        "item_topK": Integer(5, 2000),
        "item_shrink": Integer(0, 2000)
    }

    # Set args for recommender
    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_train, ICM_train],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={})

    run_user_item_all_on_combination_similarity_type_partial = partial(
        run_user_item_all_on_combination_similarity_type,
        recommender_input_args=recommender_input_args,
        parameter_search_space=hyperparameters_range_dictionary,
        parameterSearch=parameterSearch,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root,
        metric_to_optimize=metric_to_optimize,
        allow_user_weighting=True,
        allow_item_weighting=True)

    if parallelizeKNN:
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(),
                                    maxtasksperchild=1)
        pool.map(
            run_user_item_all_on_combination_similarity_type_partial,
            list(
                itertools.product(
                    *[similarity_type_list, similarity_type_list])))
        pool.close()
        pool.join()
    else:
        for user_similarity_type in similarity_type_list:
            for item_similarity_type in similarity_type_list:
                run_user_item_all_on_combination_similarity_type_partial(
                    user_similarity_type, item_similarity_type)