def __init__(self):
        self.HYP = {}
        self.report_counter = 60
        self.writer = Writer()

        # Some parameters
        self.hyperparams = dict()
        self.hyperparams_names = list()
        self.hyperparams_values = list()
        self.hyperparams_single_value = dict()

        # Extractor for matricies
        extractor = Extractor()
        urm = extractor.get_urm_all()
        self.icm = extractor.get_icm_all()

        # Splitting into post-validation & testing in case of parameter tuning
        matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True)

        self.urm_post_validation = matrices[0]
        self.urm_test = matrices[1]

        # Splitting the post-validation matrix in train & validation
        # (Problem of merging train and validation again at the end => loo twice)
        matrices_for_validation = loo.split_train_leave_k_out_user_wise(
            self.urm_post_validation, 1, False, True)
        self.urm_train = matrices_for_validation[0]
        self.urm_validation = matrices_for_validation[1]
    def __init__(self):
        self.extractor = Extractor
        self.writer = Writer()
        self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/prova.txt"

        # Some useful variables
        self.urm = self.extractor.get_urm_all(self.extractor)

        self.ucm_age = self.extractor.get_ucm_age(self.extractor).tocsr()
        self.ucm_region = self.extractor.get_ucm_region(self.extractor).tocsr()

        self.icm_asset = self.get_icm_asset()
        self.icm_price = self.get_icm_price()
        self.icm_sub_cat = self.extractor.get_icm_subclass(
            self.extractor).tocsr()
class Optimizer(object):
    def __init__(self):
        self.HYP = {}
        self.report_counter = 60
        self.writer = Writer()

        # Some parameters
        self.hyperparams = dict()
        self.hyperparams_names = list()
        self.hyperparams_values = list()
        self.hyperparams_single_value = dict()

        # Extractor for matricies
        extractor = Extractor()
        urm = extractor.get_urm_all()
        self.icm = extractor.get_icm_all()

        # Splitting into post-validation & testing in case of parameter tuning
        matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True)

        self.urm_post_validation = matrices[0]
        self.urm_test = matrices[1]

        # Splitting the post-validation matrix in train & validation
        # (Problem of merging train and validation again at the end => loo twice)
        matrices_for_validation = loo.split_train_leave_k_out_user_wise(
            self.urm_post_validation, 1, False, True)
        self.urm_train = matrices_for_validation[0]
        self.urm_validation = matrices_for_validation[1]

    def optimeze_weights(self):
        # weights = {'icfknn': 2, 'ucfknn': 0.2, 'cbfknn': 0.5, 'slimbpr': 1, 'puresvd': 1.5, 'als': 1, 'cfw': 3, 'p3a': 2, 'rp3b': 3}
        weights = {}
        weights["icfknn"] = Real(
            low=0, high=5,
            prior='uniform')  # high=100000, prior='log-uniform')
        weights["ucfknn"] = Real(low=0, high=5, prior='uniform')
        weights["cbfknn"] = Real(low=0, high=5, prior='uniform')
        weights["slimbpr"] = Real(low=0, high=5, prior='uniform')
        weights["puresvd"] = Real(low=0, high=5, prior='uniform')
        #weights["als"] = Real(low=0, high=5, prior='uniform')
        weights["p3a"] = Real(low=0, high=5, prior='uniform')
        weights["rp3b"] = Real(low=0, high=5, prior='uniform')

        return weights

    def rebuild_weights(self, array):
        return {
            "icfknn": array[0],
            "ucfknn": array[1],
            "cbfknn": array[2],
            "slimbpr": array[3],
            "puresvd": array[4],
            "p3a": array[5],
            "rp3b": array[6]
        }

    def optimize_single_KNN(self):
        parameters = {
            "topK": Integer(5, 800),
            "shrink": Integer(0, 1000),
            "similarity": Categorical(similarity_type),
            "normalize": Categorical([True, False])
        }

        if parameters["similarity"] == "asymmetric":
            parameters["normalize"] = Categorical([True])

        elif parameters["similarity"] == "tversky":
            parameters["normalize"] = Categorical([True])

        parameters["asymmetric_alpha"] = Real(low=0, high=2, prior='uniform')
        parameters["tversky_alpha"] = Real(low=0, high=2, prior='uniform')
        parameters["tversky_beta"] = Real(low=0, high=2, prior='uniform')

        return parameters

    def rebuild_single_KNN(self, array):
        return {
            "topK": array[0],
            "shrink": array[1],
            "similarity": array[2],
            "normalize": array[3],
            "asymmetric_alpha": array[4],
            "tversky_alpha": array[5],
            "tversky_beta": array[6]
        }

    def optimize_all_KNN(self):
        ICFKNN = self.optimize_single_KNN()
        UCFKNN = self.optimize_single_KNN()
        CBFKNN = self.optimize_single_KNN()

        return (ICFKNN, UCFKNN, CBFKNN)

    def optimize_slim(self):
        return {
            "topK": Integer(5, 1000),
            "epochs": Integer(20, 1500),
            "symmetric": Categorical([True, False]),
            "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
            "lambda_i": Real(low=1e-5, high=1e-2, prior='log-uniform'),
            "lambda_j": Real(low=1e-5, high=1e-2, prior='log-uniform'),
            "learning_rate": Real(low=1e-4, high=1e-1, prior='log-uniform')
        }

    def rebuild_slim(self, array):
        return {
            "topK": array[0],
            "epochs": array[1],
            "symmetric": array[2],
            "sgd_mode": array[3],
            "lambda_i": array[4],
            "lambda_j": array[5],
            "learning_rate": array[6]
        }

    def optimize_puresvd(self):
        return {"num_factors": Integer(5, 1000)}

    def rebuild_puresvd(self, array):
        return {"num_factors": array[0]}

    def optimize_als(self):
        return {
            "alpha_val": Real(low=0, high=2, prior='uniform'),
            "n_factors": Integer(5, 1000),
            "regularization": Real(low=1e-4, high=10, prior='log-uniform'),
            "iterations": Integer(5, 50)
        }

    def rebuild_als(self, array):
        return {
            "alpha_val": array[0],
            "n_factors": array[1],
            "regularization": array[2],
            "iterations": array[3]
        }

    def optimize_p3a(self):
        return {
            "topK": Integer(5, 800),
            "alpha": Real(low=0, high=2, prior='uniform'),
            "normalize_similarity": Categorical([True, False])
        }

    def rebuild_p3a(self, array):
        return {
            "topK": array[0],
            "alpha": array[1],
            "normalize_similarity": array[2]
        }

    def optimize_rp3beta(self):
        return {
            "topK": Integer(5, 800),
            "alpha": Real(low=0, high=2, prior='uniform'),
            "beta": Real(low=0, high=2, prior='uniform'),
            "normalize_similarity": Categorical([True, False])
        }

    def rebuild_rp3beta(self, array):
        return {
            "topK": array[0],
            "alpha": array[1],
            "beta": array[2],
            "normalize_similarity": array[3]
        }

    def evaluate(self, hyp):
        # print("NUMBER OF PARAMETERS ON evaluate():" + str(len(hyp)))

        self.recommender = WeightedHybrid(self.urm_train, self.icm,
                                          self.rebuild_single_KNN(hyp[0:7]),
                                          self.rebuild_single_KNN(hyp[7:14]),
                                          self.rebuild_single_KNN(hyp[14:21]),
                                          self.rebuild_slim(hyp[21:28]),
                                          self.rebuild_puresvd(hyp[28:29]),
                                          None, None,
                                          self.rebuild_p3a(hyp[29:32]),
                                          self.rebuild_rp3beta(hyp[32:36]),
                                          self.rebuild_weights(hyp[36:]))
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_validation,
                                    self.recommender,
                                    at=10)

        return float(result["MAP"] * (-1))

    def post_validation(self, hyp):

        self.recommender = WeightedHybrid(self.urm_post_validation, self.icm,
                                          self.rebuild_single_KNN(hyp[0:7]),
                                          self.rebuild_single_KNN(hyp[7:14]),
                                          self.rebuild_single_KNN(hyp[14:21]),
                                          self.rebuild_slim(hyp[21:28]),
                                          self.rebuild_puresvd(hyp[28:29]),
                                          None, None,
                                          self.rebuild_p3a(hyp[29:32]),
                                          self.rebuild_rp3beta(hyp[32:36]),
                                          self.rebuild_weights(hyp[36:]))
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_test, self.recommender, at=10)
        self.writer.write_report("\n\n" + str(result), self.report_counter)

    def evaluate_single(self, hyp):
        self.recommender = WeightedHybrid(self.urm_train,
                                          self.icm,
                                          p_icfknn=None,
                                          p_ucfknn=None,
                                          p_cbfknn=None,
                                          p_slimbpr=None,
                                          p_puresvd=None,
                                          p_als=self.rebuild_als(hyp[0:]),
                                          p_cfw=None,
                                          p_p3a=None,
                                          p_rp3b=None,
                                          weights={"als": 1})
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_test, self.recommender, at=10)

        return float(result["MAP"] * (-1))

    def run(self):
        self.HYP = {}
        self.HYP["p_icfknn"], self.HYP["p_ucfknn"], self.HYP[
            "p_cbfknn"] = self.optimize_all_KNN()
        self.HYP["p_slimbpr"] = self.optimize_slim()
        self.HYP["p_puresvd"] = self.optimize_puresvd()
        # self.HYP["p_als"] = self.optimize_als()
        self.HYP["p_p3a"] = self.optimize_p3a()
        self.HYP["p_rp3b"] = self.optimize_rp3beta()

        self.HYP["weight"] = self.optimeze_weights()

        self.iterator_to_create_dimension(self.HYP)

        res = gp_minimize(
            self.evaluate,
            self.hyperparams_values,
            n_calls=70,
            n_random_starts=20,
            n_points=10000,
            # noise = 'gaussian',
            noise=1e-5,
            acq_func='gp_hedge',
            acq_optimizer='auto',
            random_state=None,
            verbose=True,
            n_restarts_optimizer=10,
            xi=0.01,
            kappa=1.96,
            x0=None,
            y0=None,
            n_jobs=-1)

        self.writer.write_report(str(res), self.report_counter)
        self.create_parameters(res["x"])
        self.post_validation(res["x"])

    def run_single(self):
        self.HYP["p_als"] = self.optimize_als()

        self.iterator_to_create_dimension(self.HYP)

        res = gp_minimize(
            self.evaluate_single,
            self.hyperparams_values,
            n_calls=70,
            n_random_starts=20,
            n_points=10000,
            # noise = 'gaussian',
            noise=1e-5,
            acq_func='gp_hedge',
            acq_optimizer='auto',
            random_state=None,
            verbose=True,
            n_restarts_optimizer=10,
            xi=0.01,
            kappa=1.96,
            x0=None,
            y0=None,
            n_jobs=-1)

        self.writer.write_report(str(res), self.report_counter)
        self.create_parameters(res["x"])

    def iterator_to_create_dimension(self, to_iterate):
        skopt_types = [Real, Integer, Categorical]
        for name, hyperparam in to_iterate.items():
            if any(
                    isinstance(hyperparam, sko_type)
                    for sko_type in skopt_types):
                self.hyperparams_names.append(name)
                self.hyperparams_values.append(hyperparam)
                self.hyperparams[name] = hyperparam

            elif isinstance(hyperparam, str) or isinstance(
                    hyperparam, int) or isinstance(hyperparam, bool):
                self.hyperparams_single_value[name] = hyperparam
            elif isinstance(hyperparam, dict):
                self.iterator_to_create_dimension(to_iterate[name])
            else:
                raise ValueError("Unexpected parameter type: {} - {}".format(
                    str(name), str(hyperparam)))

    def create_parameters(self, hyp):
        self.report_counter = self.report_counter + 1

        #self.writer.write_report("p_icfknn :" + str(self.rebuild_single_KNN(hyp[0:7]) ), self.report_counter)
        #self.writer.write_report("p_ucfknn :" + str(self.rebuild_single_KNN(hyp[7:14])), self.report_counter)
        #self.writer.write_report("p_cbfknn :" + str(self.rebuild_single_KNN(hyp[14:21])), self.report_counter)
        #self.writer.write_report("p_cbfknn :" + str(self.rebuild_single_KNN(hyp[0:7])), self.report_counter)
        #self.writer.write_report("p_slimbpr :" + str(self.rebuild_slim(hyp[21:28])), self.report_counter)
        #self.writer.write_report("p_puresvd :" + str(self.rebuild_puresvd(hyp[28:29])), self.report_counter)
        #self.writer.write_report("p_p3a :" + str(self.rebuild_p3a(hyp[29:32])), self.report_counter)
        #self.writer.write_report("p_rp3b :" + str(self.rebuild_rp3beta(hyp[32:36])), self.report_counter)
        #self.writer.write_report("weight :" + str(self.rebuild_weights(hyp[36:])), self.report_counter)
        self.writer.write_report("p_als :" + str(self.rebuild_als(hyp[0:])),
                                 self.report_counter)
class CustomExtractor:
    def __init__(self):
        self.extractor = Extractor
        self.writer = Writer()
        self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/prova.txt"

        # Some useful variables
        self.urm = self.extractor.get_urm_all(self.extractor)

        self.ucm_age = self.extractor.get_ucm_age(self.extractor).tocsr()
        self.ucm_region = self.extractor.get_ucm_region(self.extractor).tocsr()

        self.icm_asset = self.get_icm_asset()
        self.icm_price = self.get_icm_price()
        self.icm_sub_cat = self.extractor.get_icm_subclass(
            self.extractor).tocsr()

    def create_validation_test_files(self, write_userf, write_itemf):

        import Utils.Split.split_train_validation_leave_k_out as loo
        urm = self.urm

        # Splitting into post-validation & testing in case of parameter tuning
        matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True)

        urm_post_validation = matrices[0]
        self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/post_validation.txt"
        self.urm = urm_post_validation
        self.create_general_file(write_userf, write_itemf)

        urm_test = matrices[1]
        self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/test.txt"
        self.urm = urm_test
        self.create_general_file(write_userf, write_itemf)

        # Splitting the post-validation matrix in train & validation
        # (Problem of merging train and validation again at the end => loo twice)
        matrices_for_validation = loo.split_train_leave_k_out_user_wise(
            urm_post_validation, 1, False, True)

        urm_train = matrices_for_validation[0]
        self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/train.txt"
        self.urm = urm_train
        self.create_general_file(write_userf, write_itemf)

        urm_validation = matrices_for_validation[1]
        self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/validation.txt"
        self.urm = urm_validation
        self.create_general_file(write_userf, write_itemf)

    def create_general_file(self, write_userf, write_itemf):

        n_users, n_items = self.urm.shape

        from tqdm import tqdm

        for user in tqdm(range(0, n_users)):
            # Getting positive items
            positive_interactions = list(self.urm[user].indices)

            # Checking
            to_create = len(positive_interactions)
            if len(positive_interactions) == 0:
                to_create = 1

            # Generating equal number of negative ones
            negative_interactions = []
            for _ in range(0, to_create):
                value = np.random.randint(0, high=(n_items + 1), size=1)[0]
                while value in positive_interactions:
                    value = np.random.randint(0, high=(n_items + 1), size=1)[0]
                negative_interactions.append(value)
            '''
            File written:
            0/1 if is positive interaction or not
            0 --------- Interaction Layer
                0 - User
                1 - Item
            1 --------- User Content Layer
                0 - Age
                1 - Region 1
                2 - Region 2
            2 --------- Item Content Layer
                0 - Asset
                1 - Price
                2 - Sub Cat
            '''
            # Writing stage
            for item in positive_interactions:
                # 0 --------- Interaction Layer
                row_to_write = "1 0:0:" + str(user) + " 0:1:" + str(item)

                # 1 --------- User Content Layer
                if write_userf:
                    row_to_write += self.create_user_features(user)

                # 2 --------- Item Content Layer
                if write_itemf:
                    row_to_write += self.create_item_features(item)

                row_to_write += "\n"
                self.writer.write_generic(self.my_path, row_to_write)

            for item in negative_interactions:
                # 0 --------- Interaction Layer
                row_to_write = "0 0:0:" + str(user) + " 0:1:" + str(item)

                # 1 --------- User Content Layer
                if write_userf:
                    row_to_write += self.create_user_features(user)

                # 2 --------- Item Content Layer
                if write_itemf:
                    row_to_write += self.create_item_features(item)

                row_to_write += "\n"
                self.writer.write_generic(self.my_path, row_to_write)

    def create_user_features(self, user):
        to_write = ""

        if not user >= self.ucm_age.shape[0]:
            for i in range(0, len(self.ucm_age[user].indices)):
                age = self.ucm_age[user].indices[i]
                to_write += " 1:0:" + str(age)

        if not user >= self.ucm_region.shape[0]:
            for i in range(0, len(self.ucm_region[user].indices)):
                region = self.ucm_region[user].indices[i]
                to_write += " 1:" + str(i + 1) + ":" + str(region)

        return to_write

    def create_item_features(self, item):
        to_write = ""

        if not item >= self.icm_asset.shape[0]:
            for i in range(0, len(self.icm_asset[item].data)):
                data = self.icm_asset[item].data[i]
                to_write += " 2:0:" + str(data)

        if not item >= self.icm_price.shape[0]:
            for i in range(0, len(self.icm_price[item].data)):
                data = self.icm_price[item].data[i]
                to_write += " 2:1:" + str(data)

        if not item >= self.icm_sub_cat.shape[0]:
            for i in range(0, len(self.icm_sub_cat[item].indices)):
                data = self.icm_sub_cat[item].indices[i]
                to_write += " 2:2:" + str(data)

        return to_write

    def get_icm_asset(self):
        # Composing the name
        file_name = self.extractor.DATA_FILE_PATH + "data_ICM_asset.csv"

        with open(file_name) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            line_count = 0

            items = []
            assets = []
            for line in csv_reader:
                if line_count != 0:
                    items.append(int(line[0]))
                    assets.append(float(line[2]))
                line_count += 1

            cols = np.zeros(line_count - 1, dtype=int)

            return sps.coo_matrix((assets, (items, cols))).tocsr()

    def get_icm_price(self):
        # Composing the name
        file_name = self.extractor.DATA_FILE_PATH + "data_ICM_price.csv"

        with open(file_name) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            line_count = 0

            items = []
            prices = []
            for line in csv_reader:
                if line_count != 0:
                    items.append(int(line[0]))
                    prices.append(float(line[2]))
                line_count += 1

            cols = np.zeros(line_count - 1, dtype=int)

            return sps.coo_matrix((prices, (items, cols))).tocsr()
示例#5
0
        item_price=False,
        item_subclass=False,
    ):

        features_adder = FeatureAdder(dataframe, self.group_length)

        if user_profile_length:
            features_adder.add_user_profile_length()
        if user_region1:
            features_adder.add_user_region1()
        if user_age:
            features_adder.add_user_age()
        if top_pop:
            features_adder.add_item_popularity()

    def retrieve_test_dataframe(self):
        file_name = self.DATA_FILE_PATH + self.TEST_FILE + str(
            self.group_length) + ".csv"
        return pd.read_csv(file_name)


if __name__ == '__main__':
    xgb = XGBoostDataframe(20)
    users, items = xgb.get_user_and_item_lists()
    dataframe = xgb.build_base_dataframe(users, items)
    xgb.add_features(dataframe, **ARGS_FEATURES)

    writer = Writer()
    # writer.save_dataframe(dataframe, 20)
    print(dataframe)
    ratingList = np.ones(Extractor().get_numb_interactions())

    URM_all = extractor.get_interaction_matrix(extractor, False)
    warm_items_mask = np.ediff1d(URM_all.tocsc().indptr) > 0
    warm_items = np.arange(URM_all.shape[1])[warm_items_mask]

    URM_all = URM_all[:, warm_items]

    warm_users_mask = np.ediff1d(URM_all.tocsr().indptr) > 0
    warm_users = np.arange(URM_all.shape[0])[warm_users_mask]

    URM_all = URM_all[warm_users, :]

    URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)

    recommender = UserCFKNNRecommender(URM_train)
    recommender.fit(shrink=0.0, topK=200)

    submissionID = 3

    userList_unique = extractor.get_user_to_make_rec(extractor)

    writer = Writer()
    fields = ['playlist_id', 'track_ids']
    writer.write_header(submissionID, fields)

    for user_id in userList_unique:
        writer.write(submissionID, user_id,
                     recommender.recommend(user_id, at=10))
    print("Done")
示例#7
0
from OwnUtils.Extractor import Extractor
from TopPopRecommender import TopPopRecommender
from OwnUtils.Writer import Writer
import OwnUtils.Evaluator as ev
import numpy as np

if __name__ == '__main__':
    # Function to launch all the others

    SUBMISSION_NUMBER = 1  # TO BE CHANGED MANUALLY
    field_names = ['playlist_id', 'track_ids']
    users = Extractor.get_user_to_make_rec(Extractor)

    Writer.write_header(Writer, SUBMISSION_NUMBER, field_names)

    matricies = Extractor.get_train_test_matrix(Extractor)
    URM_train = matricies[0]
    URM_test = matricies[1]

    topPopRecommender_removeSeen = TopPopRecommender()
    topPopRecommender_removeSeen.fit(URM_train)

    unique_users = list(set(Extractor.get_interaction_users(Extractor, False)))
    ev.evaluate_algorithm(URM_test,
                          topPopRecommender_removeSeen,
                          unique_users,
                          at=10)

    for user_id in users:
        recs = topPopRecommender_removeSeen.recommend(user_id, at=10)
        Writer.write(Writer, SUBMISSION_NUMBER, user_id, recs)