示例#1
0
# NUOVA AGGIUNTA

reader = dataReader()
URM_train = reader.mat_complete
ICM_Art = matTrack_Artist
ICM_Alb = matTrack_Album

recommender = HybridRecommender(URM_train)
item = ItemKNNCFRecommender(URM_train)
user = UserKNNCFRecommender(URM_train)
SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)
item.fit(topK=800, shrink=10, similarity='cosine', normalize=True)
user.fit(topK=70, shrink=22, similarity='cosine', normalize=True)
SLIM.fit(l1_penalty=1e-05,
         l2_penalty=0,
         positive_only=True,
         topK=150,
         alpha=0.00415637376180466)
recommender.fit(ICM_Art,
                ICM_Alb,
                item=item,
                user=user,
                SLIM=SLIM,
                w_itemcf=1.1,
                w_usercf=0.6,
                w_cbart=0.3,
                w_cbalb=0.6,
                w_slim=0.8,
                w_svd=0.6)
file = open("SubmissionNEW2.csv", "w")
file.write("playlist_id,track_ids")
class Hybrid005AlphaRecommender(BaseRecommender):
    """Hybrid004AlphaRecommender recommender"""

    RECOMMENDER_NAME = "Hybrid005AlphaRecommender"

    def __init__(self, data: DataObject):
        super(Hybrid005AlphaRecommender, self).__init__(data.urm_train)
        self.data = data
        self.cold_recommender = Hybrid100AlphaRecommender(data)
        self.cold_recommender.fit()
        # rec1 = RP3betaRecommender(data.urm_train)
        # rec1.fit(topK=20, alpha=0.12, beta=0.24)
        # rec2 = ItemKNNCFRecommender(data.urm_train)
        # rec2.fit(topK=22, shrink=850, similarity='jaccard', feature_weighting='BM25')
        # self.warm_2_recommender = ItemKNNSimilarityHybridRecommender(data.urm_train, rec1.W_sparse, rec2.W_sparse)
        urm = data.urm_train
        urm = sps.vstack([data.urm_train, data.icm_all_augmented.T])
        urm = urm.tocsr()
        self.warm_recommender = MultiThreadSLIM_ElasticNet(data.urm_train)
        self.warm_2_3_recommender = MultiThreadSLIM_ElasticNet(data.urm_train)
        self.warm_1_recommender = Hybrid101AlphaRecommender(data)

    def fit(self):
        # self.warm_recommender.fit(topK=30, shrink=30, feature_weighting="none", similarity="jaccard")
        # self.warm_2_recommender.fit(alpha=0.9, topK=50)
        self.warm_1_recommender.fit()
        try:
            self.warm_recommender.load_model("SLIM_ElasticNet_ICM",
                                             "FULL_URM_topK=100_l1_ratio=0.04705_alpha=0.00115_positive_only=True_max_iter=35")
        except:
            self.warm_recommender.fit(topK=100, l1_ratio=0.04705, alpha=0.00115, positive_only=True, max_iter=35)
            self.warm_recommender.save_model("SLIM_ElasticNet_ICM",
                                             "FULL_URM_topK=100_l1_ratio=0.04705_alpha=0.00115_positive_only=True_max_iter=35")

    def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None,
                  remove_top_pop_flag=False, remove_CustomItems_flag=False, return_scores=False):
        if user_id_array in self.data.ids_ultra_cold_users:
            return self.cold_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[0][1]:
            return self.cold_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[1][1]:
            return self.warm_1_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[2][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[3][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[4][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[5][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[6][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[7][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[8][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[9][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[10][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.urm_train_users_by_type[11][1]:
            return self.warm_recommender.recommend(user_id_array, cutoff=cutoff)
        elif user_id_array in self.data.ids_cold_user:
            return self.cold_recommender.recommend(user_id_array, cutoff=cutoff)
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    for recommender_class in recommender_list:

        try:

            print("Algorithm: {}".format(recommender_class))
            recommender = recommender_class(URM_train)

            SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)
            SLIM.fit(l1_penalty=1e-05,
                     l2_penalty=0,
                     positive_only=True,
                     topK=300,
                     alpha=4e-5)
            item = ItemKNNCFRecommender(URM_train)
            W_sparse_CF = item.fit(topK=800,
                                   shrink=8,
                                   similarity='cosine',
                                   normalize=True)
            CBAlb = ItemKNNCBFRecommender(ICM=ICM_Alb, URM_train=URM_train)
            CBAlb.fit(topK=10,
                      shrink=16,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")
            CBArt = ItemKNNCBFRecommender(ICM=ICM_Art, URM_train=URM_train)
            CBArt.fit(topK=12,
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     ICM_1,
                                     ICM_2,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_root_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=100):
    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    try:

        output_root_path_rec_name = output_root_path + recommender_class.RECOMMENDER_NAME

        parameterSearch = BayesianSearch(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, Random]:
            recommender = recommender_class(URM_train)

            recommender.fit()

            output_file = open(
                output_root_path_rec_name + "_BayesianSearch.txt", "a")
            result_dict, result_baseline = evaluator_validation.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_validation. Results: {}"
                .format(result_baseline))

            pickle.dump(
                result_dict.copy(),
                open(output_root_path_rec_name + "_best_result_validation",
                     "wb"),
                protocol=pickle.HIGHEST_PROTOCOL)

            result_dict, result_baseline = evaluator_test.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_test. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(output_root_path_rec_name + "_best_result_test",
                             "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            output_file.close()

            return

        ##########################################################################################################

        if recommender_class is UserKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is ItemKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        # if recommender_class is MultiThreadSLIM_RMSE:
        #
        #     hyperparamethers_range_dictionary = {}
        #     hyperparamethers_range_dictionary["topK"] = [50, 100]
        #     hyperparamethers_range_dictionary["l1_penalty"] = [1e-2, 1e-3, 1e-4]
        #     hyperparamethers_range_dictionary["l2_penalty"] = [1e-2, 1e-3, 1e-4]
        #
        #
        #     recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        #                              DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        #                              DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        #                              DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        #                              DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}
        #
        #

        ##########################################################################################################

        if recommender_class is P3alphaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [
                True, False
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is HybridRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["w_itemcf"] = [
                x * 0.1 + 1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_usercf"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbart"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbalb"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_slim"] = [
                x * 0.1 for x in range(0, 10)
            ]
            #hyperparamethers_range_dictionary["w_svd"] = [x * 0.05 for x in range(0, 20)]
            #hyperparamethers_range_dictionary["w_rp3"] = [x * 0.05 for x in range(0, 20)]

            item = ItemKNNCFRecommender(URM_train)

            user = UserKNNCFRecommender(URM_train)

            SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)

            item.fit(topK=800, shrink=10, similarity='cosine', normalize=True)

            user.fit(topK=70, shrink=22, similarity='cosine', normalize=True)

            SLIM.fit(l1_penalty=1e-05,
                     l2_penalty=0,
                     positive_only=True,
                     topK=150,
                     alpha=0.00415637376180466)

            CBArt = ItemKNNCBFRecommender(ICM=ICM_1, URM_train=URM_train)
            CBArt.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            CBAlb = ItemKNNCBFRecommender(ICM=ICM_2, URM_train=URM_train)
            CBAlb.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "ICM_Art": ICM_1,
                    "ICM_Alb": ICM_2,
                    "item": item,
                    "user": user,
                    "SLIM": SLIM,
                    "CBArt": CBArt,
                    "CBAlb": CBAlb,
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["beta"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [True]
            hyperparamethers_range_dictionary["implicit"] = [True]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [0.0, 1e-3, 1e-6, 1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is FunkSVD:
            hyperparamethers_range_dictionary = {}

            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [
                0.0, 1e-03, 1e-06, 1e-09
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [1e-02, 1e-03]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 500, 10)
            hyperparamethers_range_dictionary["batch_size"] = [
                100, 200, 300, 400
            ]
            hyperparamethers_range_dictionary["positive_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["negative_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]
            hyperparamethers_range_dictionary["user_reg"] = [
                1e-3, 1e-4, 1e-5, 1e-6
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["num_factors"] = list(
                range(0, 250, 5))

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [800, 900, 1000, 1200]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad"]
            hyperparamethers_range_dictionary["lambda_i"] = [1e-6]
            hyperparamethers_range_dictionary["lambda_j"] = [1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                0.01, 0.001, 1e-4, 1e-5, 0.1
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'train_with_sparse_weights': True,
                    'symmetric': True,
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 10,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 3,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MultiThreadSLIM_ElasticNet:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                3300, 4300, 5300, 6300, 7300
            ]
            hyperparamethers_range_dictionary["l1_penalty"] = [
                1e-5, 1e-6, 1e-4, 1e-3
            ]
            hyperparamethers_range_dictionary["l2_penalty"] = [1e-4]
            hyperparamethers_range_dictionary["alpha"] = range(0, 1)
            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        best_parameters = parameterSearch.search(
            recommenderDictionary,
            n_cases=n_cases,
            output_root_path=output_root_path_rec_name,
            metric=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_root_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()