Python filter_seen示例，utils.auxUtils.filter_seen Python示例

示例#1

0

显示文件

    def recommend(self, playlist_ids):

        final_prediction = {}
        counter = 0

        for k in playlist_ids:
            try:
                row = self.compute_score_SVD(k)
                # aux contains the indices (track_id) of the most similar songs
                aux = row.argsort()[::-1]
                user_playlist = self.URM_train[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:self.at]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        return df

示例#2

0

显示文件

    def recommend(self, playlist_ids):
        print("Recommending...")

        final_prediction = {}  # pd.DataFrame([])

        print("STARTING ESTIMATION")
        # add ravel() ?
        estimated_ratings = self.get_estimated_ratings()

        counter = 0

        for k in playlist_ids:

            row = estimated_ratings.getrow(k)  # [k]

            # aux contains the indices (track_id) of the most similar songs
            indx = row.data.argsort()[::-1]
            aux = row.indices[indx]
            user_playlist = self.urm[k]

            aux = np.concatenate((aux, self.top_pop_songs), axis=None)
            top_songs = filter_seen(aux, user_playlist)[:self.at]

            string = ' '.join(str(e) for e in top_songs)
            final_prediction.update({k: string})

            if (counter % 5000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print("THEY ARE: ", abc)
        return df

示例#3

0

显示文件

    def recommend(self, playlist_ids):
        print("Recommending...")

        final_prediction = {}
        estimated_ratings = csr_matrix(self.URM_train.dot(self.W_sparse))
        counter = 0

        for k in playlist_ids:
            try:
                row = estimated_ratings[k]
                # aux contains the indices (track_id) of the most similar songs
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.URM_train[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:10]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})

                if (counter % 1000) == 0:
                    print("Playlist num", counter, "/10000")
            except IndexError:
                continue
            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

示例#4

0

显示文件

    def recommend_single(self, k):
        # print("Recommending...")
        # add ravel() ?
        row = self.urm[k]
        estimated_ratings = row.dot(self.sym)  # .toarray().ravel()
        # aux = estimated_ratings.argsort()[::-1]
        indx = estimated_ratings.data.argsort()[::-1]
        aux = estimated_ratings.indices[indx]

        top_songs = filter_seen(row, aux)[:self.at]
        return top_songs

示例#5

0

显示文件

    def recommend(self, playlist_ids, beta=5, gamma=7, delta=10, filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}
        counter = 0

        # e_r_ stands for estimated rating
        e_r_cbf = self.cbf_recommender.get_estimated_ratings()
        e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings()
        e_r_slim_bpr = self.slim_recommender.get_estimated_ratings()
        '''
        print("CBF")
        print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]])
        print("COL_I_I")
        print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]])
        print("COL_U_U")
        print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]])
        print("SLIM")
        print(e_r_slim_bpr[7].data[e_r_slim_bpr[7].data.argsort()[::-1]])
        '''
        estimated_ratings_aux = e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma)

        # print("Hybrid")
        # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]])

        estimated_ratings_final = estimated_ratings_aux + e_r_slim_bpr.multiply(delta)

        # print("FINAL")
        # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]])

        for k in playlist_ids:
            try:
                row = estimated_ratings_final[k]
                # aux contains the indices (track_id) of the most similar songs
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:self.at]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

示例#6

0

显示文件

    def recommend(self, playlist_ids, alpha=0.2, beta=10, gamma=1, delta=2, omega=30, eta=10, theta=30, sigma=1,
                  filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}

        cbf_sym = self.cbf_recommender.get_sym_matrix(gamma)

        cii_sym = self.col_i_i_recommender.get_sym_matrix(beta)
        p3a_sym = self.p3alpha.get_sym_matrix(theta)
        rp3b_sym = self.rp3beta.get_sym_matrix(sigma)
        slim_sym = self.slim_recommender.get_sym_matrix(delta)
        en_sym = self.slim_elasticNet_recommender.get_sym_matrix(omega)
        sym = cbf_sym + cii_sym + p3a_sym + slim_sym + en_sym + rp3b_sym
        # e_r_ stands for estimated rating
        e_r_hybrid = self.urm*sym
        # print(e_r_hybrid)
        e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings()
        '''
        e_r_slim_bpr = self.slim_recommender.get_estimated_ratings()
        e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings()

        '''
        # estimated_ratings_pureSVD = self.pureSVD.U.dot(self.pureSVD.s_Vt)
        # print(estimated_ratings_pureSVD)
        estimated_ratings_final = e_r_col_u_u.multiply(alpha) + e_r_hybrid  # + estimated_ratings_pureSVD * eta

        for k in tqdm(playlist_ids):
            try:
                row = estimated_ratings_final[k].toarray()[0] + (self.pureSVD.compute_score_SVD(k)*eta)
                '''
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                '''
                aux = row.argsort()[::-1]

                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:self.at]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")


        df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

示例#7

0

显示文件

    def recommend_single(self, k):

        print("Playlist num: ", k, "/50440")
        row = self.sym.getrow(k)
        # compute prediction
        est_row = csr_matrix(row.dot(self.urm))
        # retrieve the index
        # print(est_row)
        indx = est_row.data.argsort()[::-1]
        aux = est_row.indices[indx]

        user_playlist = self.urm[k]
        # filter the songs
        top_songs = filter_seen(user_playlist, aux)[:self.at]

        return top_songs

示例#8

0

显示文件

    def recommend(self, playlist_ids, omega=60, filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}
        counter = 0

        e_r_slim_bpr = self.slim_bpr_recommender.get_estimated_ratings()
        e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings(
        )
        '''
        print("SLIM_BPR")
        print(e_r_slim_bpr[7].data[e_r_slim_bpr[7].data.argsort()[::-1]])
        print("SLIM_ElasticNet")
        print(e_r_slim_elasticNet[7].data[e_r_slim_elasticNet[7].data.argsort()[::-1]])
        '''

        estimated_ratings_final = e_r_slim_bpr + e_r_slim_elasticNet.multiply(
            omega)

        # print("FINAL")
        # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]])

        for k in playlist_ids:
            try:
                row = estimated_ratings_final[k]
                # aux contains the indices (track_id) of the most similar songs
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:10]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

示例#9

0

显示文件

    def recommend(self, playlist_ids):

        print("Recommending...")

        final_prediction = {}

        if self.train_with_sparse_weights:
            matrix_W = self.W_sparse
        else:
            if self.sparse_weights:
                matrix_W = self.W_sparse
            else:
                matrix_W = self.W

        # what dimension does W have?
        self.W = csr_matrix(matrix_W, shape=(self.n_items, self.n_items))
        estimated_ratings = check_matrix(self.URM_train.dot(self.W), 'csr')

        counter = 0

        for k in playlist_ids:

            row = estimated_ratings[k]
            # aux contains the indices (track_id) of the most similar songs
            indx = row.data.argsort()[::-1]
            aux = row.indices[indx]
            user_playlist = self.URM_train[k]

            aux = np.concatenate((aux, self.top_pop_songs), axis=None)
            top_songs = filter_seen(aux, user_playlist)[:10]

            string = ' '.join(str(e) for e in top_songs)
            final_prediction.update({k: string})

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

示例#10

0

显示文件

    def recommend(self, playlist_ids):
        '''
            def recommend(self, user_id, cutoff=None, remove_seen_flag=True):
                ranking = scores.argsort()[::-1]
                # rank items
                if remove_seen_flag:
                    ranking = self._filter_seen(user_id, ranking)
                return ranking[:cutoff]
        '''
        print("Recommending...")
        final_prediction = {}
        estimated_ratings = np.dot(self.X, self.Y.T)

        counter = 0

        for k in playlist_ids:
            try:
                row = estimated_ratings[k]
                # aux contains the indices (track_id) of the most similar songs
                aux = row.argsort()[::-1]
                # aux = row[indx]
                user_playlist = self.dataset[k]

                # aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:10]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})

                if (counter % 1000) == 0:
                    print("Playlist num", counter, "/10000")
            except IndexError:
                continue
            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

示例#11

0

显示文件

    def recommend(self, playlist_ids, alpha=1, beta=5, gamma=7):
        print("Recommending...")

        final_prediction = {}
        counter = 0
        # alpha = 0.7  # best until now

        estimated_ratings_cbf = self.cbf_recommender.get_estimated_ratings()
        estimated_ratings_col_i_i = self.col_i_i_recommender.get_estimated_ratings()
        estimated_ratings_col_u_u = self.col_u_u_recommender.get_estimated_ratings()
        estimated_ratings_final = estimated_ratings_col_u_u.multiply(alpha)\
                                + estimated_ratings_col_i_i.multiply(beta)\
                                + estimated_ratings_cbf.multiply(gamma)

        for k in playlist_ids:
            try:
                row = estimated_ratings_final[k]
                # aux contains the indices (track_id) of the most similar songs
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:self.at]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

示例#12

0

显示文件

    def recommend(self, playlist_ids, alpha=0.1, beta=1, gamma=1, delta=2, omega=30, eta=0.8, filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}
        counter = 0

        # e_r_ stands for estimated rating
        e_r_cbf = self.cbf_recommender.get_estimated_ratings()
        e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings()
        e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings()
        e_r_slim_bpr = self.slim_recommender.get_estimated_ratings()
        e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings()
        '''
        print("CBF")
        print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]])
        print("COL_I_I")
        print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]])
        print("COL_U_U")
        print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]])
        
        print("pureSVD")
        print(e_r_pureSVD[7].data[e_r_pureSVD[7].data.argsort()[::-1]])
        '''
        estimated_ratings_aux1 = e_r_col_u_u.multiply(alpha) + e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma)

        estimated_ratings_aux2 = estimated_ratings_aux1 + e_r_slim_bpr.multiply(delta)

        estimated_ratings_final = estimated_ratings_aux2 + e_r_slim_elasticNet.multiply(omega)

        print('after sum..')
        for k in playlist_ids:
            try:
                row = estimated_ratings_final[k]
                # getting the row from svd
                # try with check matrix..
                mf_row = sparse.csr_matrix(self.pureSVD.compute_score_SVD(k)).multiply(eta)
                # summing it to the row we are considering
                row += mf_row
                # aux contains the indices (track_id) of the most similar songs
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)

                top_songs = filter_seen(aux, user_playlist)

                if filter_top_pop:
                    top_songs = filter_seen_array(top_songs, self.top_pop_songs)[:self.at]
                else:
                    top_songs = top_songs[:self.at]

                if len(top_songs) < 10:
                    print("Francisco was right once")

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids'])
        # print(df)
        return df