Пример #1
0
    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test):
        self.urm = urm
        self.n_users, self.n_items = urm.getCSR().shape
        self.setEnables(enable_dict )

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)

        self.user = CollaborativeFiltering()
        self.user.fit(urm_t, k=100, h=0, mode='user')

        if self.enableSVD:
            self.svd = SVDRecommender(urm, nf=385)

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True)

        if self.enableSLIM:
            choice = 2
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0,
                                        URM_validation=urm_test.getCSR(), final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200,
                          sgd_mode="adagrad", learning_rate=0.075)

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")
Пример #2
0
    def __init__(self,
                 urm,
                 urm_t,
                 icm,
                 icm2,
                 enable_dict,
                 urm_test,
                 recalcSLIM=True):
        self.urm = urm
        self.setEnables(enable_dict)

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)

        if self.enableUSER:
            self.cbu = CollaborativeFiltering()
            self.cbu.fit(urm_t, k=100, h=0, mode='user')

        if self.enableRP3B:

            self.rp3b = RP3betaRecommender(urm.getCSR())

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80,
                         alpha=1,
                         min_rating=0,
                         implicit=True,
                         normalize_similarity=True)

        if self.enableSLIM:
            if recalcSLIM:
                choice = 2
                logFile = open("SLIM_BPR_Cython.txt", "a")

                self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                            recompile_cython=False,
                                            positive_threshold=0,
                                            URM_validation=urm_test.getCSR(),
                                            final_model_sparse_weights=True,
                                            train_with_sparse_weights=False)

                self.slim.fit(epochs=100,
                              validation_every_n=1,
                              logFile=logFile,
                              batch_size=5,
                              topK=200,
                              sgd_mode="adagrad",
                              learning_rate=0.075)

                self.slim_sim = self.slim.get_similarity()

                # with open('slim_sub.pkl', 'wb') as output:
                #     pickle.dump(self.slim, output, pickle.HIGHEST_PROTOCOL)

            else:
                with open('slim_test.pkl', 'rb') as input:
                    self.slim = pickle.load(input)
    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None):
        self.urm = urm
        self.setEnables(enable_dict)

        if self.enableSVD:
            self.svd = SVDRecommender(urm, nf=385)

        if self.enableSLIM:
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        URM_validation=urm_test.getCSR(),
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100,
                          validation_every_n=1,
                          logFile=logFile,
                          batch_size=5,
                          topK=200,
                          sgd_mode="adagrad",
                          learning_rate=0.075)

            self.slim_sim = self.slim.get_similarity()

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")

        # User based
        print("starting USER CF")
        self.cbu = CollaborativeFiltering()
        self.cbu.fit(urm_t, k=100, h=8, mode='user')
        print("USER CF finished")

        # Item based
        print("starting ITEM CF")
        self.cbi = CollaborativeFiltering()
        self.cbi.fit(urm, k=125, h=10, mode='item')
        print("ITEM CF finished")

        # Content based artist
        print("starting CBF")
        self.cbf = ContentBasedFiltering(icm, urm, k=25, shrinkage=100)
        self.cbf.fit()
        print("CBF finished")

        if self.enableCBF2:
            print("starting CBF2")
            self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=100)
            self.cbf2.fit()
            print("CBF2 finished")
Пример #4
0
    def buildmodel(self):
        if self.enableCBI:
            print("Fitting Item CF...")
            self.cbi = self.cbi = CollaborativeFiltering()
            self.cbi.fit(self.urm, **self.param_dict.get('cbi_param_dict'))
            print("Item CF finished")

        if self.enableRP3B:
            print("Fitting RP3B...")
            self.rp3b = RP3betaRecommender(self.urm.getCSR())
            self.rp3b.fit(**self.param_dict.get('rp3b_param_dict'))
            print("RP3B finished")

        if self.enableCBF:
            #self.cbf = ContentBasedFiltering(self.icm1, self.urm, **self.param_dict.get('cbf_param_dict'))
            self.cbf = ImprovedCBF(self.icm1, self.icm2, self.urm,
                                   **self.param_dict.get('cbf_param_dict'))
            self.cbf.fit(self.param_dict.get('CBFNorm'))
            print("CBF finished")

        if self.enableCBU:
            self.cbu = CollaborativeFiltering()
            self.cbu.fit(self.urm_t, **self.param_dict.get('cbu_param_dict'))
            print("USER CF finished")

        if self.enableSLIM:
            self.loadSLIM = self.param_dict.get('loadSLIM')
            self.slimPath = self.param_dict.get('slimPath')

            self.slim = SLIM_BPR_Cython(self.urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            if self.loadSLIM:
                print("Loading matrix")
                self.slim.loadModel('', self.slimPath)

            else:
                print("Calculating similarity matrix")
                logFile = open("SLIM_BPR_Cython.txt", "a")
                self.slim.fit(**self.param_dict.get('slim_param_dict'))
                self.slim.saveModel('', self.slimPath)

            self.normalizeSLIM = self.param_dict.get('normalizeSLIM')

            if self.normalizeSLIM != None:
                self.slim_sim = normalize(self.slim.get_similarity(),
                                          norm=self.normalizeSLIM,
                                          axis=1)
            else:
                self.slim_sim = self.slim.get_similarity()

        print("Fitting Hybrid done ")
Пример #5
0
class PopulationHybrid():
    def __init__(self,
                 urm,
                 urm_t,
                 icm,
                 icm2,
                 enable_dict,
                 param_dict,
                 urm_test=None):
        self.urm = urm
        self.setEnables(enable_dict)

        self.group_1_params = param_dict.get('group_1_params')
        self.group_2_params = param_dict.get('group_2_params')
        self.group_1_2_TH = param_dict.get('group_1_2_TH')

        if self.enableSVD:
            self.svd = SVDRecommender(urm, nf=385)

        if self.enableSLIM:
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        URM_validation=urm_test.getCSR(),
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=200,
                          validation_every_n=1,
                          logFile=logFile,
                          batch_size=5,
                          topK=200,
                          sgd_mode="adagrad",
                          learning_rate=0.075)

            self.slim_sim = self.slim.get_similarity()

        # User based
        print("starting USER CF")
        self.cbu = CollaborativeFiltering()
        self.cbu.fit(urm_t, k=100, h=8, mode='user')
        print("USER CF finished")

        # Item based
        print("starting ITEM CF")
        self.cbi = CollaborativeFiltering()
        self.cbi.fit(urm, k=125, h=10, mode='item')
        print("ITEM CF finished")

        # Content based artist
        print("starting CBF")
        self.cbf = ContentBasedFiltering(icm, urm, k=25, shrinkage=0)
        self.cbf.fit()
        print("CBF finished")

        if self.enableCBF2:
            print("starting CBF2")
            self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=0)
            self.cbf2.fit()
            print("CBF2 finished")

    def changeParams(self, param_dict):
        self.group_1_params = param_dict.get('group_1_params')
        self.group_2_params = param_dict.get('group_2_params')
        self.group_1_2_TH = param_dict.get('group_1_2_TH')

    def fit(self, weights_dict, method='rating_weight'):

        self.user_weight = weights_dict.get('user_weight', 0)
        self.item_weight = weights_dict.get('item_weight', 0)
        self.cbf_weight = weights_dict.get('cbf_weight', 0)
        self.cbf2_weight = weights_dict.get('cbf2_weight', 0)
        self.svd_weight = weights_dict.get('svd_weight', 0)
        self.slim_weight = weights_dict.get('slim_weight', 0)

        self.method = method

    def s_recommend(self, user, nRec=10):

        number_items = len(self.urm.extractTracksFromPlaylist(user))
        if number_items > self.group_1_2_TH:
            self.fit(self.group_2_params)
        else:
            self.fit(self.group_1_params)

        if self.method == 'item_weight':
            extra = 1

            recommended_items_user = self.cbu.s_recommend(user, nRec + extra)
            recommended_items_item = self.cbi.s_recommend(user, nRec + extra)
            recommended_items_cbf = self.cbf.s_recommend(user, nRec + extra)

            weighting_dict = {
                'user': (recommended_items_user, self.user_weight),
                'item': (recommended_items_item, self.item_weight),
                'cbf': (recommended_items_cbf, self.cbf_weight)
            }

            recommended_items_cbf2 = None
            if (self.enableCBF2):
                recommended_items_cbf2 = self.cbf2.s_recommend(
                    user, nRec + extra)
                weighting_dict['cbf2'] = (recommended_items_cbf2,
                                          self.cbf2_weight)

            recommended_items_svd = None
            if (self.enableSVD):
                recommended_items_svd = self.svd.s_recommend(
                    user, nRec + extra)
                weighting_dict['svd'] = (recommended_items_svd,
                                         self.svd_weight)

            recommended_items_slim = None
            if (self.enableSLIM):
                recommended_items_slim = self.slim.s_recommend(
                    user, nRec + extra)
                weighting_dict['slim'] = (recommended_items_slim,
                                          self.slim_weight)

            return self.item_weighter(weighting_dict, nRec, extra)

        elif self.method == 'rating_weight':

            norm_method = 'max'

            recommended_items_user = self.normalize_row(
                self.cbu.get_pred_row(user), method=norm_method)
            recommended_items_item = self.normalize_row(
                self.cbi.get_pred_row(user), method=norm_method)
            recommended_items_cbf = self.normalize_row(
                self.cbf.get_pred_row(user), method=norm_method)

            recommended_items_cbf2 = None
            if (self.enableCBF2):
                recommended_items_cbf2 = self.normalize_row(
                    self.cbf2.get_pred_row(user), method=norm_method)

            recommended_items_svd = None
            if (self.enableSVD):
                recommended_items_svd = self.normalize_row(
                    self.svd.get_pred_row(user), method=norm_method)

            recommended_items_slim = None
            if (self.enableSLIM):
                recommended_items_slim = self.normalize_row(
                    self.getSlimRow(user), method=norm_method)

            return self.predWeightRatingRows(
                user, nRec, recommended_items_user, recommended_items_item,
                recommended_items_cbf, recommended_items_cbf2,
                recommended_items_svd, recommended_items_slim)

        elif self.method == "hybrid":

            norm_method = 'max'
            extra = 1

            recommended_items_user = self.normalize_row(
                self.cbu.get_pred_row(user), method=norm_method)
            recommended_items_item = self.normalize_row(
                self.cbi.get_pred_row(user), method=norm_method)
            recommended_items_cbf = self.normalize_row(
                self.cbf.get_pred_row(user), method=norm_method)

            recommended_items_cbf2 = None
            if (self.enableCBF2):
                recommended_items_cbf2 = self.normalize_row(
                    self.cbf2.get_pred_row(user), method=norm_method)

            recommended_items_svd = None
            if (self.enableSVD):
                recommended_items_svd = self.normalize_row(
                    self.svd.get_pred_row(user), method=norm_method)

            recommended_items_slim = None
            if (self.enableSLIM):
                recommended_items_slim = self.normalize_row(
                    self.getSlimRow(user), method=norm_method)

            weighting_dict = {}

            weighting_dict['hybrid'] = (self.predWeightRatingRows(
                user, nRec + extra, recommended_items_user,
                recommended_items_item, recommended_items_cbf,
                recommended_items_cbf2, recommended_items_svd,
                recommended_items_slim), self.hybrid_ensemble_weight)

            recommended_items_slim = self.slim.s_recommend(user, nRec + extra)
            weighting_dict['slim'] = (recommended_items_slim,
                                      self.hybrid_slim_weight)

            return self.item_weighter(weighting_dict, nRec, extra)

        elif self.method == 'switch':

            if len(self.urm.extractTracksFromPlaylist(user)) < switchTH:
                # enough recommendations, use user
                return self.cbu.s_recommend(user, nRec=nRec)
            else:
                # not enough recommendations, use item
                return self.cbi.s_recommend(user, nRec=nRec)

        else:
            raise ValueError('Not a valid hybrid method')

    def m_recommend(self, user_ids, nRec=10):
        results = []
        for uid in user_ids:
            results.append(self.s_recommend(uid, nRec))
        return results

    def item_weighter(self, tupleDict, nRec, extra):

        # initialize a dict with recommended items as keys and value zero
        result = {}
        for tuple in tupleDict.values():

            items = tuple[0]

            for i in range(nRec + extra):
                result[str(items[i])] = 0

        # assign a score based on position

        for tuple in tupleDict.values():

            items = tuple[0]
            weight = tuple[1]

            for i in range(nRec + extra):
                result[str(items[i])] += (nRec + extra - i) * weight

        # sort the dict
        sorted_results = sorted(result.items(), key=itemgetter(1))
        rec_items = [x[0] for x in sorted_results]

        # flip to order by decreasing order
        rec_items = rec_items[::-1]

        # return only the topN recommendations
        return np.array(rec_items[0:nRec]).astype(int)

    def predWeightRatingRows(self, user, nRec, recommended_items_user,
                             recommended_items_item, recommended_items_cbf,
                             recommended_items_cbf2, recommended_items_svd,
                             recommended_items_slim):


        pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_item * self.item_weight \
                    + recommended_items_cbf * self.cbf_weight

        if self.enableSLIM and self.method != "hybrid":
            pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim

        if self.enableCBF2:
            pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2

        # needs to be before svd because svd output is dense
        pred_row = np.array(pred_row_sparse.todense()).squeeze()

        if self.enableSVD:
            pred_row = pred_row + self.svd_weight * recommended_items_svd

        ranking = np.argsort(-pred_row)
        recommended_items = self._filter_seen(user, ranking)

        return recommended_items[0:nRec]

    def _filter_seen(self, user_id, ranking):
        seen = self.urm.extractTracksFromPlaylist(user_id)
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]

    def getSlimRow(self, user):
        return self.urm.getCSR().getrow(user) * self.slim_sim

    def setEnables(self, enable_dict):
        self.enableSVD = enable_dict.get('enableSVD')
        self.enableSLIM = enable_dict.get('enableSLIM')
        self.enableCBF2 = enable_dict.get('enableCBF2')

    def normalize_row(self, recommended_items, method):
        if method == 'max':
            norm_factor = recommended_items.max()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == 'sum':
            norm_factor = recommended_items.sum()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        else:
            raise ValueError('Not a valid normalization method')
Пример #6
0
class ListHybridRecommender():
    def __init__(self,
                 urm,
                 urm_t,
                 icm,
                 icm2,
                 enable_dict,
                 urm_test,
                 recalcSLIM=True):
        self.urm = urm
        self.setEnables(enable_dict)

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)

        if self.enableUSER:
            self.cbu = CollaborativeFiltering()
            self.cbu.fit(urm_t, k=100, h=0, mode='user')

        if self.enableRP3B:

            self.rp3b = RP3betaRecommender(urm.getCSR())

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80,
                         alpha=1,
                         min_rating=0,
                         implicit=True,
                         normalize_similarity=True)

        if self.enableSLIM:
            if recalcSLIM:
                choice = 2
                logFile = open("SLIM_BPR_Cython.txt", "a")

                self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                            recompile_cython=False,
                                            positive_threshold=0,
                                            URM_validation=urm_test.getCSR(),
                                            final_model_sparse_weights=True,
                                            train_with_sparse_weights=False)

                self.slim.fit(epochs=100,
                              validation_every_n=1,
                              logFile=logFile,
                              batch_size=5,
                              topK=200,
                              sgd_mode="adagrad",
                              learning_rate=0.075)

                self.slim_sim = self.slim.get_similarity()

                # with open('slim_sub.pkl', 'wb') as output:
                #     pickle.dump(self.slim, output, pickle.HIGHEST_PROTOCOL)

            else:
                with open('slim_test.pkl', 'rb') as input:
                    self.slim = pickle.load(input)

    def fit(self, weights_dict=None, norm="none", w_method="count"):
        self.norm_method = norm
        self.weights_dict = weights_dict
        self.w_method = w_method

        self.item_item_weight = weights_dict.get('item_item_weight', 0)
        self.rp3b_weight = weights_dict.get('rp3b_weight', 0)
        self.slim_weight = weights_dict.get('slim_weight', 0)
        self.user_weight = weights_dict.get('user_weight', 0)
        self.p3a_weight = weights_dict.get('p3a_weight', 0)

    def s_recommend(self, user, nRec=10):

        weighting_dict = {}

        #recommended_items_item_item = self.normalize_row(self.item_item.get_pred_row(user), method=self.norm_method)
        recommended_items_item_item = self.item_item.s_recommend(
            user, nRec).tolist()
        weighting_dict['ii'] = (recommended_items_item_item,
                                self.item_item_weight)

        recommended_items_rp3b = None
        if (self.enableSVD):
            #recommended_items_rp3b = self.normalize_row(self.svd.get_pred_row(user), method=self.norm_method)
            recommended_items_rp3b = self.rp3b.s_recommend(user, nRec).tolist()
            weighting_dict['rp3b'] = (recommended_items_rp3b, self.rp3b_weight)

        recommended_items_p3a = None
        if (self.enableP3A):
            # recommended_items_svd = self.normalize_row(self.svd.get_pred_row(user), method=self.norm_method)
            recommended_items_p3a = self.p3a.s_recommend(user, nRec)
            weighting_dict['p3a'] = (recommended_items_p3a, self.p3a_weight)

        recommended_items_user = None
        if (self.enableUSER):
            recommended_items_user = self.cbu.s_recommend(user, nRec).tolist()
            weighting_dict['user'] = (recommended_items_user, self.user_weight)

        recommended_items_slim = None
        if (self.enableSLIM):
            #recommended_items_slim = self.normalize_row(self.getSlimRow(user), method=self.norm_method)
            recommended_items_slim = self.slim.s_recommend(user, nRec)
            weighting_dict['slim'] = (recommended_items_slim, self.slim_weight)

        return self.list_weighter(weighting_dict, nRec, 0, self.w_method)
        #return list_merger(weighting_dict, nRec)

    def m_recommend(self, user_ids, nRec=10):

        results = []
        for uid in user_ids:
            results.append(self.s_recommend(uid, nRec))
        return results

    def list_weighter(self, tupleDict, nRec, extra, weighting='parab'):
        """
            :param tupleDict : dict{(list_of_items, weight)}
                                assumes list_of_items is ordered from best rec
                                to worst rec

            :param nRec      : number of items to recommend

            :param extra     : number of extra_items to consider
                               in the lists

            :param weighting : - "linear" 1st place 10, 2nd place 9 ...
                                10th place 1
                               - "parab" 1st place 10,..  5th place 3.5 ...
                                10th place 1

            :return list of nRec items weighted according to dict
        """

        # initialize a dict with items as keys and starting value zero
        result = {}
        count_dict = {}
        for tuple in tupleDict.values():

            items = tuple[0]

            for i in range(nRec + extra):
                result[str(items[i])] = 0
                count_dict[str(items[i])] = 0

        # assign a score based on position
        for tuple in tupleDict.values():

            items = tuple[0]
            weight = tuple[1]

            # weighting logic
            if weighting == 'linear':
                for i in range(nRec + extra):
                    result[str(items[i])] += (nRec + extra - i) * weight

            elif weighting == 'parab':
                for i in range(nRec + extra):
                    result[str(
                        items[i])] += (0.1 * i**2 - 1.92 * i + nRec) * weight

            elif weighting == 'avg':
                for i in range(nRec + extra):
                    result[str(items[i])] += (nRec - i) / 3

            elif weighting == 'count_par':

                for i in range(nRec + extra):
                    count_dict[str(items[i])] += 1

                for i in range(nRec + extra):
                    result[str(items[i])] += (0.1 * i ** 2 - 1.92 * i + nRec) * weight \
                                             + 4 * count_dict.get(str(items[i]))

            else:
                raise ValueError('Not a valid weighting logic')

        # sort the dict
        sorted_results = sorted(result.items(), key=itemgetter(1))
        rec_items = [x[0] for x in sorted_results]

        # flip to order by decreasing order
        rec_items = rec_items[::-1]

        # return only the topN recommendations
        return np.array(rec_items[0:nRec]).astype(int)

    def setEnables(self, enable_dict):
        self.enableSVD = enable_dict.get('enableSVD')
        self.enableSLIM = enable_dict.get('enableSLIM')
        self.enableUSER = enable_dict.get('enableUSER', False)
        self.enableP3A = enable_dict.get('enableP3A', False)

    def _filter_seen(self, user_id, ranking):
        user_profile = self.urm.getCSR()[user_id]
        seen = user_profile.indices
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]

    def normalize_row(self, recommended_items, method):
        if method == 'max':
            norm_factor = recommended_items.max()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == 'sum':
            norm_factor = recommended_items.sum()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == "none":
            return recommended_items
        else:
            raise ValueError('Not a valid normalization method')

    def getSlimRow(self, user):
        return self.urm.getCSR().getrow(user) * self.slim_sim

    def remove_duplicates(self, ordered_list):
        """
        :param ordered_list
        :return: the ordered_list still ordered removed of duplicates
        """
        seen = set()
        seen_add = seen.add
        return [x for x in ordered_list if not (x in seen or seen_add(x))]
    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None):
        self.urm = urm
        self.setEnables(enable_dict)

        if self.enableRP3B:
            self.rp3b = RP3betaRecommender(urm.getCSR())
            self.rp3b.fit(topK=100,
                          alpha=0.7,
                          beta=0.3,
                          normalize_similarity=True,
                          implicit=True)

        if self.enableSLIM:
            choice = 2
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        URM_validation=urm_test.getCSR(),
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100,
                          validation_every_n=1,
                          logFile=logFile,
                          batch_size=5,
                          topK=200,
                          sgd_mode="adagrad",
                          learning_rate=0.075)

            self.slim_sim = self.slim.get_similarity()

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80,
                         alpha=1,
                         min_rating=0,
                         implicit=True,
                         normalize_similarity=True)

        # if self.enableCBF2:
        #     print("starting CBF2")
        #     self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=0)
        #     self.cbf2.fit()
        #     print("CBF2 finished")

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")

        if self.enableSVD:
            self.svd = PureSVDRecommender(urm.getCSR())
            self.svd.fit(num_factors=225)
            print("USER CF finished")

        # User based
        print("starting USER CF")
        self.cbu = CollaborativeFiltering()
        self.cbu.fit(urm_t, k=100, h=0, mode='user')
        print("USER CF finished")

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)
class UserItemHybridRecommender():
    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None):
        self.urm = urm
        self.setEnables(enable_dict)

        if self.enableRP3B:
            self.rp3b = RP3betaRecommender(urm.getCSR())
            self.rp3b.fit(topK=100,
                          alpha=0.7,
                          beta=0.3,
                          normalize_similarity=True,
                          implicit=True)

        if self.enableSLIM:
            choice = 2
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        URM_validation=urm_test.getCSR(),
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100,
                          validation_every_n=1,
                          logFile=logFile,
                          batch_size=5,
                          topK=200,
                          sgd_mode="adagrad",
                          learning_rate=0.075)

            self.slim_sim = self.slim.get_similarity()

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80,
                         alpha=1,
                         min_rating=0,
                         implicit=True,
                         normalize_similarity=True)

        # if self.enableCBF2:
        #     print("starting CBF2")
        #     self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=0)
        #     self.cbf2.fit()
        #     print("CBF2 finished")

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")

        if self.enableSVD:
            self.svd = PureSVDRecommender(urm.getCSR())
            self.svd.fit(num_factors=225)
            print("USER CF finished")

        # User based
        print("starting USER CF")
        self.cbu = CollaborativeFiltering()
        self.cbu.fit(urm_t, k=100, h=0, mode='user')
        print("USER CF finished")

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)

        # # Item based
        # print("starting ITEM CF")
        # self.cbi = CollaborativeFiltering()
        # self.cbi.fit(urm, k=125, h=0, mode='item')
        # print("ITEM CF finished")
        #
        # # Content based artist
        # print("starting CBF")
        # self.cbf = ContentBasedFiltering(icm, urm, k=25, shrinkage=0)
        # self.cbf.fit()
        # print("CBF finished")

    def fit(self, weights_dict, method='rating_weight', norm='max'):

        self.svd_weight = weights_dict.get('svd_weight', 0)
        self.user_weight = weights_dict.get('user_weight', 0)
        self.item_weight = weights_dict.get('item_weight', 0)
        self.cbf_weight = weights_dict.get('cbf_weight', 0)
        self.cbf2_weight = weights_dict.get('cbf2_weight', 0)
        self.rp3b_weight = weights_dict.get('rp3b_weight', 0)
        self.slim_weight = weights_dict.get('slim_weight', 0)
        self.p3a_weight = weights_dict.get('p3a_weight', 0)
        self.lfm_weight = weights_dict.get('lfm_weight', 0)

        self.method = method
        self.norm = norm

    def s_recommend(self, user, nRec=10, switchTH="15"):

        if self.method == 'item_weight':
            extra = 1

            recommended_items_user = self.cbu.s_recommend(user, nRec + extra)
            recommended_items_item = self.cbi.s_recommend(user, nRec + extra)
            recommended_items_cbf = self.cbf.s_recommend(user, nRec + extra)

            weighting_dict = {
                'user': (recommended_items_user, self.user_weight),
                'item': (recommended_items_item, self.item_weight),
                'cbf': (recommended_items_cbf, self.cbf_weight)
            }

            if (self.enableCBF2):
                recommended_items_cbf2 = self.cbf2.s_recommend(
                    user, nRec + extra)
                weighting_dict['cbf2'] = (recommended_items_cbf2,
                                          self.cbf2_weight)

            if (self.enableLFM):
                recommended_items_lfm = self.lfm.s_recommend(
                    user, nRec + extra)
                weighting_dict['lfm'] = (recommended_items_lfm,
                                         self.lfm_weight)

            if (self.enableSVD):
                recommended_items_svd = self.svd.s_recommend(
                    user, nRec + extra)
                weighting_dict['svd'] = (recommended_items_svd,
                                         self.svd_weight)

            if (self.enableSLIM):
                recommended_items_slim = self.slim.s_recommend(
                    user, nRec + extra)
                weighting_dict['slim'] = (recommended_items_slim,
                                          self.slim_weight)

            if (self.enableP3A):
                recommended_items_p3a = self.p3a.s_recommend(
                    user, nRec + extra)
                weighting_dict['p3a'] = (recommended_items_p3a,
                                         self.p3a_weight)

            return self.item_weighter(weighting_dict, nRec, extra)

        elif self.method == 'rating_weight':

            norm_method = self.norm

            recommended_items_user = self.normalize_row(
                self.cbu.get_pred_row(user), method=norm_method)
            recommended_items_item = self.normalize_row(
                self.cbi.get_pred_row(user), method=norm_method)
            recommended_items_cbf = self.normalize_row(
                self.cbf.get_pred_row(user), method=norm_method)

            pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_item * self.item_weight \
                              + recommended_items_cbf * self.cbf_weight

            if self.enableSLIM:
                recommended_items_slim = self.normalize_row(
                    self.getSlimRow(user), method=norm_method)
                pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim

            if self.enableCBF2:
                recommended_items_cbf2 = self.normalize_row(
                    self.cbf2.get_pred_row(user), method=norm_method)
                pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2

            if self.enableP3A:
                row = self.p3a.get_pred_row(user)
                pred_row_sparse = pred_row_sparse + self.p3a_weight * row

            if self.enableRP3B:
                row = self.rp3b.get_pred_row(user)
                pred_row_sparse = pred_row_sparse + self.rp3b_weight * row

            pred_row = np.array(pred_row_sparse.todense()).squeeze()

            if self.enableLFM:
                recommended_items_lfm = self.normalize_row(
                    self.lfm.get_pred_row(user), method=norm_method)
                pred_row = pred_row + self.lfm_weight * recommended_items_lfm

            if self.enableSVD:
                recommended_items_svd = self.normalize_row(
                    self.svd.get_pred_row(user), method=norm_method)
                pred_row = pred_row + self.svd_weight * recommended_items_svd

            ranking = np.argsort(-pred_row)
            recommended_items = self._filter_seen(user, ranking)

            return recommended_items[0:nRec]

        elif self.method == "hybrid":

            norm_method = 'max'
            extra = 1

            recommended_items_user = self.normalize_row(
                self.cbu.get_pred_row(user), method=norm_method)
            recommended_items_item = self.normalize_row(
                self.cbi.get_pred_row(user), method=norm_method)
            recommended_items_cbf = self.normalize_row(
                self.cbf.get_pred_row(user), method=norm_method)

            recommended_items_cbf2 = None
            if (self.enableCBF2):
                recommended_items_cbf2 = self.normalize_row(
                    self.cbf2.get_pred_row(user), method=norm_method)

            recommended_items_rp3b = None
            if (self.enableRP3B):
                recommended_items_rp3b = self.normalize_row(
                    self.rp3b.get_pred_row(user), method=norm_method)

            recommended_items_slim = None
            if (self.enableSLIM):
                recommended_items_slim = self.normalize_row(
                    self.getSlimRow(user), method=norm_method)

            weighting_dict = {}

            return self.item_weighter(weighting_dict, nRec, extra)

        elif self.method == 'switch':

            if len(self.urm.extractTracksFromPlaylist(user)) < switchTH:
                # enough recommendations, use user
                return self.cbu.s_recommend(user, nRec=nRec)
            else:
                # not enough recommendations, use item
                return self.cbi.s_recommend(user, nRec=nRec)

        else:
            raise ValueError('Not a valid hybrid method')

    def m_recommend(self, user_ids, nRec=10):
        results = []
        for uid in user_ids:
            results.append(self.s_recommend(uid, nRec))
        return results

    def item_weighter(self, tupleDict, nRec, extra):

        # initialize a dict with recommended items as keys and value zero
        result = {}
        for tuple in tupleDict.values():

            items = tuple[0]

            for i in range(nRec + extra):
                result[str(items[i])] = 0

        # assign a score based on position

        for tuple in tupleDict.values():

            items = tuple[0]
            weight = tuple[1]

            for i in range(nRec + extra):
                result[str(items[i])] += (nRec + extra - i) * weight

        # sort the dict
        sorted_results = sorted(result.items(), key=itemgetter(1))
        rec_items = [x[0] for x in sorted_results]

        # flip to order by decreasing order
        rec_items = rec_items[::-1]

        # return only the topN recommendations
        return np.array(rec_items[0:nRec]).astype(int)

    def predWeightRatingRows(self, user, nRec, recommended_items_user,
                             recommended_items_item, recommended_items_cbf,
                             recommended_items_cbf2, recommended_items_rp3b,
                             recommended_items_slim):


        pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_item * self.item_weight \
                    + recommended_items_cbf * self.cbf_weight

        if self.enableSLIM and self.method != "hybrid":
            pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim

        if self.enableCBF2:
            pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2

        # needs to be before rp3b because rp3b output is dense
        pred_row = np.array(pred_row_sparse.todense()).squeeze()

        if self.enableRP3B:
            pred_row = pred_row + self.rp3b_weight * recommended_items_rp3b

        ranking = np.argsort(-pred_row)
        recommended_items = self._filter_seen(user, ranking)

        return recommended_items[0:nRec]

    def _filter_seen(self, user_id, ranking):
        seen = self.urm.extractTracksFromPlaylist(user_id)
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]

    def getSlimRow(self, user):
        return self.urm.getCSR().getrow(user) * self.slim_sim

    def setEnables(self, enable_dict):
        self.enableSVD = enable_dict.get('enableSVD')
        self.enableRP3B = enable_dict.get('enableRP3B')
        self.enableSLIM = enable_dict.get('enableSLIM')
        self.enableCBF2 = enable_dict.get('enableCBF2')
        self.enableP3A = enable_dict.get('enableP3A')
        self.enableLFM = enable_dict.get('enableLFM')

    def normalize_row(self, recommended_items, method):
        if method == 'max':
            norm_factor = recommended_items.max()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == 'sum':
            norm_factor = recommended_items.sum()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == 'l1':

            return normalize(recommended_items, norm='l1')

        elif method == 'l2':
            return normalize(recommended_items, norm='l2')
        else:
            raise ValueError('Not a valid normalization method')
Пример #9
0
class NewHybrid(object):

        def __init__(self, matrices, param_dict, enable_dict):

            print("Fitting Hybrid...")

            self.urm = matrices.get('URM')
            self.urm_t = matrices.get('URM_T')
            self.icm1 = matrices.get('ICM_1')
            self.icm2 = matrices.get('ICM_2')

            self.param_dict = param_dict
            self.weight_dict = param_dict.get('weight_dict')
            self.setEnables(enable_dict)
            self.setWeights(self.weight_dict)

            self.buildmodel()


        def buildmodel(self):
            if self.enableCBI:
                print("Fitting Item CF...")
                self.cbi = CollaborativeFiltering()
                self.cbi.fit(self.urm, **self.param_dict.get('cbi_param_dict'))
                print("Item CF finished")

            if self.enableRP3B:
                print("Fitting RP3B...")
                self.rp3b = RP3betaRecommender(self.urm.getCSR())
                self.rp3b.fit(**self.param_dict.get('rp3b_param_dict'))
                print("RP3B finished")

            if self.enableCBF:
                self.cbf = ImprovedCBF(self.icm1, self.icm2, self.urm, **self.param_dict.get('cbf_param_dict'))
                self.cbf.fit(self.param_dict.get('CBFNorm'))
                print("CBF finished")

            if self.enableCBU:
                self.cbu = CollaborativeFiltering()
                self.cbu.fit(self.urm_t, **self.param_dict.get('cbu_param_dict'))
                print("USER CF finished")

            if self.enableSLIM:
                self.loadSLIM = self.param_dict.get('loadSLIM')
                self.slimPath = self.param_dict.get('slimPath')

                self.slim = SLIM_BPR_Cython(self.urm.getCSR(), recompile_cython=False, positive_threshold=0,
                                            final_model_sparse_weights=True,
                                            train_with_sparse_weights=False)

                if self.loadSLIM :
                    print("Loading matrix")
                    self.slim.loadModel('',self.slimPath)

                else:
                    print("Calculating similarity matrix")
                    logFile = open("SLIM_BPR_Cython.txt", "a")
                    self.slim.fit(**self.param_dict.get('slim_param_dict'))
                    self.slim.saveModel('',self.slimPath)

                self.normalizeSLIM = self.param_dict.get('normalizeSLIM')

                if self.normalizeSLIM != None :
                    self.slim_sim = normalize(self.slim.get_similarity(), norm=self.normalizeSLIM, axis=1)
                else:
                    self.slim_sim = self.slim.get_similarity()

            if self.enableSVD:
                self.loadSVD = self.param_dict.get('loadSVD')
                self.svdPath = self.param_dict.get('svdPath')

                self.svd = IALS_numpy(**self.param_dict.get('svd_param_dict'))

                if self.loadSVD:
                     print("Loading svd")
                     self.svd.loadModel(self.svdPath)
                     self.svd.set_dataset(self.urm.getCSR())
                else:
                     print("Calculating svd")
                     self.svd.fit(self.urm.getCSR())
                     self.svd.saveModel(self.svdPath)

            print("Fitting Hybrid done ")

        def setEnables(self, enable_dict):
            self.enableCBF = enable_dict.get('enableCBF')
            self.enableCBI = enable_dict.get('enableCBI')
            self.enableRP3B = enable_dict.get('enableRP3B')
            self.enableCBU = enable_dict.get('enableCBU')
            self.enableSLIM = enable_dict.get('enableSLIM')
            self.enableSVD = enable_dict.get('enableSVD')
            self.enableSLEN = enable_dict.get('enableSLEN')


        def s_recommend(self, user, nRec=10):

            pred_row_sparse = 0
            norm_method = "max"

            if self.enableCBI:
                pred_row_sparse = pred_row_sparse + self.normalize_row(self.cbi.get_pred_row(user), norm_method) * self.cbi_weight

            if self.enableCBU:
                pred_row_sparse = pred_row_sparse + self.normalize_row(self.cbu.get_pred_row(user), norm_method) * self.cbu_weight

            if self.enableCBF:
                pred_row_sparse = pred_row_sparse + self.normalize_row(self.cbf.get_pred_row(user), norm_method) * self.cbf_weight

            if self.enableRP3B:
                pred_row_sparse = pred_row_sparse + self.normalize_row(self.rp3b.get_pred_row(user),norm_method) * self.rp3b_weight

            if self.enableSLIM:
                pred_row_sparse = pred_row_sparse + self.normalize_row(self.get_pred_row_slim(user),norm_method) * self.slim_weight

            if self.enableSLEN:
                pred_row_sparse = pred_row_sparse + self.normalize_row(self.slim_en.get_pred_row(user),norm_method) * self.slen_weight

            pred_row = np.array(pred_row_sparse.todense()).squeeze()

            #After it is dense add svd
            if self.enableSVD:
                pred_row = pred_row + self.normalize_row(self.svd.get_pred_row(user),norm_method) * self.svd_weight

            ranking = np.argsort(-pred_row)

            recommended_items = self._filter_seen(user, ranking)

            return recommended_items[0:nRec]


        def _filter_seen(self, user_id, ranking):
            seen = self.urm.extractTracksFromPlaylist(user_id)
            unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
            return ranking[unseen_mask]

        def normalize_row(self, recommended_items, method):
            if method == 'max':
                norm_factor = recommended_items.max()
                if norm_factor == 0: norm_factor = 1
                return recommended_items / norm_factor

            elif method == 'sum':
                norm_factor = recommended_items.sum()
                if norm_factor == 0: norm_factor = 1
                return recommended_items / norm_factor

            elif method == 'l1':
                return normalize(recommended_items, norm='l1')

            elif method == 'l2':
                return normalize(recommended_items, norm='l2')

            elif method == 'none':
                return recommended_items

            else:
                raise ValueError('Not a valid normalization method')

        def setWeights(self, weight_dict):
            self.cbi_weight = weight_dict.get('cbi_weight')
            self.rp3b_weight = weight_dict.get('rp3b_weight')
            self.cbf_weight = weight_dict.get('cbf_weight')
            self.cbu_weight = weight_dict.get('cbu_weight')
            self.slim_weight = weight_dict.get('slim_weight')
            self.svd_weight = weight_dict.get('svd_weight')
            self.slen_weight = weight_dict.get('slen_weight')

        def get_pred_row_slim(self, user):
            return self.urm.getCSR().getrow(user).dot(self.slim_sim)

        def m_recommend(self, target_ids, nRec=10):
            results = []
            for tid in tqdm(target_ids):
                results.append(self.s_recommend(tid, nRec))
            return results
Пример #10
0
class XGBoostRecommender():

    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test):
        self.urm = urm
        self.n_users, self.n_items = urm.getCSR().shape
        self.setEnables(enable_dict )

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)

        self.user = CollaborativeFiltering()
        self.user.fit(urm_t, k=100, h=0, mode='user')

        if self.enableSVD:
            self.svd = SVDRecommender(urm, nf=385)

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True)

        if self.enableSLIM:
            choice = 2
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0,
                                        URM_validation=urm_test.getCSR(), final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200,
                          sgd_mode="adagrad", learning_rate=0.075)

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")


    def buildXGBoostMatrix(self, recommenders, n):

        print("building XGBoost Matrix")
        user_id_col     = []
        slim_rec_col    = []
        itit_rec_col    = []
        p3a_rec_col     = []
        svd_rec_col     = []
        user_rec_col    = []
        lfm_rec_col     = []
        prof_len_col    = []

        for user in range(self.n_users):

            # Item Item
            itit_rec = self.item_item.s_recommend(user, n).tolist()
            user_id_col.extend(itit_rec)
            itit_rec_col.extend([user] * len(itit_rec))

            # User
            user_rec = self.user.g(user, n)
            user_id_col.extend(user_rec)
            user_rec_col.extend([user] * len(user_rec))

            # P3A
            if self.enableP3A:
                p3a_rec = self.p3a.s_recommend(user, n)
                user_id_col.extend(p3a_rec)
                p3a_rec_col.extend([user] * len(p3a_rec))

            # SVD
            if self.enableSVD:
                svd_rec = self.svd.s_recommend(user, n)
                user_id_col.extend(svd_rec)
                svd_rec_col.extend([user] * len(svd_rec))

            # LFM
            if self.enableLFM:
                lfm_rec = self.lfm.s_recommend(user, n)
                user_id_col.extend(lfm_rec)
                lfm_rec_col.extend([user] * len(lfm_rec))

            # SLIM
            if self.enableSLIM:
                slim_rec = self.slim.s_recommend(user, n)
                user_id_col.extend(slim_rec)
                slim_rec_col.extend([user] * len(slim_rec))

            # Profile Len
            profileLen = len(self.urm.extractTracksFromPlaylist(user))
            prof_len_col.extend([profileLen] * len(user_rec))

            dict = {"user_id": user_id_col,
                    "itit_rec_id": itit_rec_col,
                    "user_rec_id": user_rec_col}
                    # "slim_rec_id": slim_rec_col,
                    # "p3a_rec_id": p3a_rec_col,
                    # "lfm_rec_id": lfm_rec_col,
                    # "svd_rec_id": svd_rec_col,
                    # "profile_len": prof_len_col}

            self.buildDataFrame(dict)

    def setEnables(self, enable_dict):
        self.enableSVD  = enable_dict.get('enableSVD')
        self.enableSLIM = enable_dict.get('enableSLIM')
        self.enableCBF2 = enable_dict.get('enableCBF2')
        self.enableP3A  = enable_dict.get('enableP3A')
        self.enableLFM  = enable_dict.get('enableLFM')

    def buildDataFrame(self, dict):
        print("building dataframe")
        self.df = pd.DataFrame(dict)
        self.df.describe()
        print("built df")
class UserItemHybridRecommender_v2():
    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None):
        self.urm = urm
        self.setEnables(enable_dict)

        if self.enableSVD:
            self.svd = SVDRecommender(urm, nf=385)

        if self.enableSLIM:
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        URM_validation=urm_test.getCSR(),
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100,
                          validation_every_n=1,
                          logFile=logFile,
                          batch_size=5,
                          topK=200,
                          sgd_mode="adagrad",
                          learning_rate=0.075)

            self.slim_sim = self.slim.get_similarity()

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")

        # User based
        print("starting USER CF")
        self.cbu = CollaborativeFiltering()
        self.cbu.fit(urm_t, k=100, h=8, mode='user')
        print("USER CF finished")

        # Item based
        print("starting ITEM CF")
        self.cbi = CollaborativeFiltering()
        self.cbi.fit(urm, k=125, h=10, mode='item')
        print("ITEM CF finished")

        # Content based artist
        print("starting CBF")
        self.cbf = ContentBasedFiltering(icm, urm, k=25, shrinkage=100)
        self.cbf.fit()
        print("CBF finished")

        if self.enableCBF2:
            print("starting CBF2")
            self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=100)
            self.cbf2.fit()
            print("CBF2 finished")

    def fit(self, weights_dict, method='weight_norm'):

        self.user_weight = weights_dict.get('user_weight', 0)
        self.item_weight = weights_dict.get('item_weight', 0)
        self.cbf_weight = weights_dict.get('cbf_weight', 0)
        self.cbf2_weight = weights_dict.get('cbf2_weight', 0)
        self.svd_weight = weights_dict.get('svd_weight', 0)
        self.slim_weight = weights_dict.get('slim_weight', 0)
        self.lfm_weight = weights_dict.get('lfm_weight', 0)
        self.method = method

    def s_recommend(self, user, nRec=10, switchTH="15"):

        if self.method == 'weight_norm':

            norm_method = 'max'

            recommended_items_user = self.normalize_row(
                self.cbu.get_pred_row(user), method=norm_method)
            recommended_items_item = self.normalize_row(
                self.cbi.get_pred_row(user), method=norm_method)
            recommended_items_cbf = self.normalize_row(
                self.cbf.get_pred_row(user), method=norm_method)

            recommended_items_cbf2 = None
            if (self.enableCBF2):
                recommended_items_cbf2 = self.normalize_row(
                    self.cbf2.get_pred_row(user), method=norm_method)

            recommended_items_lfm = None
            if (self.enableLFM):
                recommended_items_lfm = self.normalize_row(
                    self.lfm.get_pred_row(user), method=norm_method)

            recommended_items_svd = None
            if (self.enableSVD):
                recommended_items_svd = self.normalize_row(
                    self.svd.get_pred_row(user), method=norm_method)

            recommended_items_slim = None
            if (self.enableSLIM):
                recommended_items_slim = self.normalize_row(
                    self.getSlimRow(user), method=norm_method)

            return self.predWeightRatingRows(
                user, nRec, recommended_items_user, recommended_items_item,
                recommended_items_cbf, recommended_items_cbf2,
                recommended_items_svd, recommended_items_slim)

        elif self.method == 'switch':

            if len(self.urm.extractTracksFromPlaylist(user)) < switchTH:
                # enough recommendations, use user
                return self.cbu.s_recommend(user, nRec=nRec)
            else:
                # not enough recommendations, use item
                return self.cbi.s_recommend(user, nRec=nRec)

        else:
            raise ValueError('Not a valid hybrid method')

    def m_recommend(self, user_ids, nRec=10):
        results = []
        for uid in user_ids:
            results.append(self.s_recommend(uid, nRec))
        return results

    def mixRecommendersRow(self, recommended_items_user,
                           recommended_items_item_item, nRec):

        # assign a score based on position

        # initialize
        result = {}
        for i in range(nRec + 3):
            result[str(recommended_items_user[i])] = 0
            result[str(recommended_items_item_item[i])] = 0

        # weight user based cf items
        for i in range(nRec + 3):
            result[str(
                recommended_items_user[i])] += (nRec - i) * self.user_weight

        # weight item based cf items
        for j in range(nRec + 3):
            result[str(
                recommended_items_item_item[j])] += (nRec -
                                                     j) * self.item_weight

        # sort the dict
        sorted_results = sorted(result.items(), key=itemgetter(1))
        rec_items = [x[0] for x in sorted_results]

        # flip to order by decreasing order
        rec_items = rec_items[::-1]

        # return only the topN recommendations
        return np.array(rec_items[0:nRec]).astype(int)

    def predWeightRatingRows(self, user, nRec, recommended_items_user,
                             recommended_items_item, recommended_items_cbf,
                             recommended_items_cbf2, recommended_items_svd,
                             recommended_items_slim):

        playlist_tracks = self.urm.extractTracksFromPlaylist(user)
        num_tracks = playlist_tracks.size
        extra_weight = num_tracks / 1000

        if (num_tracks > 8):
            extra_weight += 0.03
            if (num_tracks > 15):
                extra_weight += 0.03
                if (num_tracks > 20):
                    extra_weight += 0.03
                    if (num_tracks > 33):
                        extra_weight += 0.04

        pred_row_sparse = recommended_items_user * (self.user_weight + extra_weight) + recommended_items_item * self.item_weight \
                    + recommended_items_cbf * self.cbf_weight

        if self.enableSLIM:
            pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim

        if self.enableCBF2:
            pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2

        # needs to be before svd because svd output is dense
        pred_row = np.array(pred_row_sparse.todense()).squeeze()

        if self.enableSVD:
            pred_row = pred_row + self.svd_weight * recommended_items_svd

        ranking = np.argsort(-pred_row)
        recommended_items = self._filter_seen(user, ranking)

        return recommended_items[0:nRec]

    def _filter_seen(self, user_id, ranking):
        seen = self.urm.extractTracksFromPlaylist(user_id)
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]

    def getSlimRow(self, user):
        return self.urm.getCSR().getrow(user) * self.slim_sim

    def setEnables(self, enable_dict):
        self.enableSVD = enable_dict.get('enableSVD')
        self.enableSLIM = enable_dict.get('enableSLIM')
        self.enableCBF2 = enable_dict.get('enableCBF2')
        self.enableLFM = enable_dict.get('enableLFM')

    def normalize_row(self, recommended_items, method):
        if method == 'max':
            norm_factor = recommended_items.max()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == 'sum':
            norm_factor = recommended_items.sum()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        else:
            raise ValueError('Not a valid normalization method')
Пример #12
0
    cbf.fit()

    if submission:
        recommended_items = cbf.m_recommend(targetList, nRec=10)
        generate_output(targetList, recommended_items)
    else:
        cumulative_precision, cumulative_recall, cumulative_MAP = evaluate_algorithm(
            urm_test, cbf)
        print(
            "Recommender, performance is: Precision = {:.4f}, Recall = {:.4f}, MAP = {:.6f}"
            .format(cumulative_precision, cumulative_recall, cumulative_MAP))

elif htype == "slim":
    slim = SLIM_BPR_Cython(urm.getCSR(),
                           recompile_cython=False,
                           positive_threshold=0,
                           URM_validation=urm_test.getCSR(),
                           final_model_sparse_weights=True,
                           train_with_sparse_weights=False)
    logFile = open("SLIM_BPR_Cython.txt", "a")
    parameters = {
        'epochs': 10,
        'validation_every_n': 99,
        'logFile': logFile,
        'batch_size': 1,
        'topK': 200,
        'sgd_mode': "rmsprop",
        'learning_rate': 0.1,
        'gamma': 0.995,
        'beta_1': 0.,
        'beta_2': 0.0
    }