Пример #1
0
from parameterTunning.AbstractClassSearch import DictionaryKeys
from loader.loader import save_dataframe, train_data, target_data, test_data, tracks_data
from utils.auxUtils import Evaluator, buildURMMatrix, filter_seen
import pandas as pd

from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
import matplotlib.pyplot as plt
from parameterTunning.GridSearch import GridSearch
from sklearn.model_selection import GridSearchCV

URM_train = buildURMMatrix(train_data)
URM_test = buildURMMatrix(test_data)

rs = SLIM_BPR_Cython(train_data)

grid_param = {
    'lambda_i': [1e-1, 1e-2, 1e-3, 1e-4],
    'lambda_j': [1e-1, 1e-2, 1e-3, 1e-4],
    'topK': [300, 400, 500]
}

evaluator = Evaluator()

gd_sr = GridSearchCV(estimator=rs,
                     param_grid=grid_param,
                     scoring=evaluator.evaluate(rs.recommend(target_data["playlist_id"]), test_data),
                     n_jobs=2)

gd_sr.fit(URM_train)
Пример #2
0
from cbfRS.cbfRS import CbfRS
from loader.loader import save_dataframe, train_data, target_data, full_data, test_data, tracks_data
from utils.auxUtils import Evaluator
import pandas as pd
import matplotlib.pyplot as plt

evaluator = Evaluator()

df = pd.DataFrame([[0, 0, 0]], columns=['knn', 'map', 'shr'])
top_50 = pd.DataFrame([[0, 0, 0]], columns=['knn', 'map', 'shr'])
shrinkage = 0

plot_graph = False

while shrinkage < 50:
    map_list = []
    knn_list = []
    k = 10
    while k < 100:
        rs = CbfRS(tracks_data, 10, k, shrinkage, tf_idf=False, bm25=True)
        rs.fit(train_data)
        print('knn: ', k, ' shrinkage: ', shrinkage)
        predictions = rs.recommend(target_data['playlist_id'])
        map_ = (evaluator.evaluate(predictions, test_data))
        map_list.append(map_)
        df = df.append(
            pd.DataFrame([[k, map_, shrinkage]], columns=['knn', 'map',
                                                          'shr']))
        top_50 = df.sort_values(by=['map']).tail(50)
        knn_list.append(k)
        k += 10
Пример #3
0
from loader.loader import train_data, test_data, tracks_data, target_data, full_data, save_dataframe
from utils.auxUtils import Evaluator
from graphBased.rp3betaRS import RP3betaRecommender

r = RP3betaRecommender(train_data)
r.fit()
pred = r.recommend(target_data['playlist_id'])
e = Evaluator()
e.evaluate(pred, test_data)
Пример #4
0
from loader.loader import train_data, test_data, tracks_data, full_data, target_data, save_dataframe
from utils.auxUtils import buildURMMatrix, Evaluator
import numpy as np
from svdRS.pureSVD import PureSVDRecommender
from collaborative_filtering_RS.col_user_userRS import ColBfUURS
from collaborative_filtering_RS.col_item_itemRS import ColBfIIRS
from MatrixFactorization.mf_skl import MfNnz
from cbfRS.cbfRS import CbfRS
import matplotlib.pyplot as pyplot
from slimRS.slimElasticNet import SLIMElasticNetRecommender
from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from utils.auxUtils import buildICMMatrix
from FW_boosting.CFW_D_Similarity import CFW_D_Similarity_Linalg

URM_train = buildURMMatrix(train_data)
evaluator = Evaluator()
profile_length = np.ediff1d(URM_train.indptr)
block_size = int(len(profile_length) * 0.05)
sorted_users = np.argsort(profile_length)

rs_i_i_cf = ColBfIIRS(10, 750, 50, tf_idf=True)
rs_i_i_cf.fit(train_data)
# predictions_item_item = rs_i_i_cf.recommend(target_data['playlist_id'])
map_item_item = []
'''
rs_u_u_cf = ColBfUURS(10, 200, 50, tf_idf=True)
rs_u_u_cf.fit(train_data)
predictions_user_user = rs_u_u_cf.recommend(target_data['playlist_id'])
map_user_user = []
'''
rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True)
Пример #5
0
from hybrid_similarities.new_hybrid import HybridRS
from loader.loader import train_data, test_data, tracks_data, target_data, full_data, save_dataframe
from utils.auxUtils import Evaluator, submit_dataframe_to_kaggle
import pandas as pd
from mail_notification.notify import NotifyMail

# so far best hybrid with pureSVD = alpha=0.3 beta=10 gamma=1 eta=10

r = HybridRS(tracks_data)
e = Evaluator()

r.fit(train_data)
# content filter
gammas = [0, 0.8, 1]
# collaborative user user
alphas = [0, 0.2, 0.3]
# collaborative item item
betas = [0, 10]
# pureSVD
etas = [0, 10]
# graph based
thetas = [0, 20, 30]
# slim BPR
deltas = [0, 0.8, 1]
# slim EN
omegas = [0, 10, 30]
list_res = []
# 0.2 10 1.0 10 1 40.0 30
sigmas = [0, 20]
for gamma in gammas:
    for alpha in alphas:
Пример #6
0
from hybrid_col_cbf_RS.hybridRS import HybridRS
from loader.loader import save_dataframe, train_data, target_data, full_data, test_data, tracks_data
from utils.auxUtils import Evaluator
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame([[0, 0, 0, 0]], columns=['alpha', 'beta', 'gamma', 'map'])
top_50 = pd.DataFrame([[0, 0, 0, 0]],
                      columns=['alpha', 'beta', 'gamma', 'map'])
top_50_p = pd.DataFrame([[0, 0, 0, 0]],
                        columns=['alpha', 'beta', 'gamma', 'map'])

# Hybrid (cbf - colf)
rs = HybridRS(tracks_data, 10, tf_idf=True)
evaluator = Evaluator()
rs.fit(train_data)

alpha = 1

while alpha <= 10:
    beta = 1
    while beta <= 10:
        gamma = 1
        while gamma <= 19:

            hybrid = rs.recommend(target_data['playlist_id'], alpha, beta,
                                  gamma)
            print("Alpha: ", alpha, " Beta: ", beta, "Gamma: ", gamma)
            temp_map = evaluator.evaluate(hybrid, test_data)

            df = df.append(
Пример #7
0
from loader.loader import train_data, test_data, tracks_data, full_data, target_data
from utils.auxUtils import buildURMMatrix, Evaluator
import numpy as np
from svdRS.pureSVD import PureSVDRecommender
from collaborative_filtering_RS.col_user_userRS import ColBfUURS
from collaborative_filtering_RS.col_item_itemRS import ColBfIIRS
from MatrixFactorization.mf_skl import MfNnz
from cbfRS.cbfRS import CbfRS
import matplotlib.pyplot as pyplot
from slimRS.slimElasticNet import SLIMElasticNetRecommender
from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython

URM_train = buildURMMatrix(full_data)
evaluator = Evaluator()
profile_length = np.ediff1d(URM_train.indptr)
block_size = int(len(profile_length) * 0.05)
sorted_users = np.argsort(profile_length)

rs_i_i_cf = ColBfIIRS(10, 750, 50, tf_idf=True)
rs_i_i_cf.fit(train_data)
predictions_item_item = rs_i_i_cf.recommend(target_data['playlist_id'])
map_item_item = []

rs_u_u_cf = ColBfUURS(10, 200, 50, tf_idf=True)
rs_u_u_cf.fit(train_data)
predictions_user_user = rs_u_u_cf.recommend(target_data['playlist_id'])
map_user_user = []

rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True)
rs_content.fit(train_data)
predictions_content = rs_content.recommend(target_data['playlist_id'])
Пример #8
0
    def evaluateRecommendations(self,
                                URM_test,
                                at=10,
                                minRatingsPerUser=1,
                                exclude_seen=True,
                                mode='parallel',
                                filterTopPop=False,
                                filterCustomItems=np.array([], dtype=np.int),
                                filterCustomUsers=np.array([], dtype=np.int)):
        """
        Speed info:
        - Sparse weights: batch mode is 2x faster than sequential
        - Dense weights: batch and sequential speed are equivalent
        :param URM_test_new:            URM to be used for testing
        :param at: 10                   Length of the recommended items
        :param minRatingsPerUser: 1     Users with less than this number of interactions will not be evaluated
        :param exclude_seen: True       Whether to remove already seen items from the recommended items
        :param mode: 'sequential', 'parallel', 'batch'
        :param filterTopPop: False or decimal number        Percentage of items to be removed from recommended list and testing interactions
        :param filterCustomItems: Array, default empty           Items ID to NOT take into account when recommending
        :param filterCustomUsers: Array, default empty           Users ID to NOT take into account when recommending
        :return:
        """

        if len(filterCustomItems) == 0:
            self.filterCustomItems = False
        else:
            self.filterCustomItems = True
            self.filterCustomItems_ItemsID = np.array(filterCustomItems)
        '''
        if filterTopPop != False:
            self.filterTopPop = True
            _,_, self.filterTopPop_ItemsID = removeTopPop(self.URM_train, URM_2 = URM_test_new, percentageToRemove=filterTopPop)
            print("Filtering {}% TopPop items, count is: {}".format(filterTopPop*100, len(self.filterTopPop_ItemsID)))
            # Zero-out the items in order to be considered irrelevant
            URM_test_new = check_matrix(URM_test_new, format='lil')
            URM_test_new[:,self.filterTopPop_ItemsID] = 0
            URM_test_new = check_matrix(URM_test_new, format='csr')
        '''

        # During testing CSR is faster
        self.URM_test = check_matrix(URM_test, format='csr')
        self.evaluator = Evaluator()
        self.URM_train = check_matrix(self.URM_train, format='csr')
        self.at = at
        self.minRatingsPerUser = minRatingsPerUser
        self.exclude_seen = exclude_seen

        nusers = self.URM_test.shape[0]

        # Prune users with an insufficient number of ratings
        rows = self.URM_test.indptr
        numRatings = np.ediff1d(rows)
        mask = numRatings >= minRatingsPerUser
        usersToEvaluate = np.arange(nusers)[mask]

        if len(filterCustomUsers) != 0:
            print("Filtering {} Users".format(len(filterCustomUsers)))
            usersToEvaluate = set(usersToEvaluate) - set(filterCustomUsers)

        usersToEvaluate = list(usersToEvaluate)

        if mode == 'sequential':
            return self.evaluateRecommendationsSequential(usersToEvaluate)
        elif mode == 'parallel':
            return self.evaluateRecommendationsParallel(usersToEvaluate)
        elif mode == 'batch':
            return self.evaluateRecommendationsBatch(usersToEvaluate)
        elif mode == 'cython':
            return self.evaluateRecommendationsCython(usersToEvaluate)
        # elif mode=='random-equivalent':
        #     return self.evaluateRecommendationsRandomEquivalent(usersToEvaluate)
        else:
            raise ValueError("Mode '{}' not available".format(mode))
Пример #9
0
 def default_validation_function(self, playlist_ids):
     e = Evaluator()
     return e.evaluate_tuning(self.recommend(playlist_ids),
                              self.URM_validation)
Пример #10
0
class SLIM_BPR_Cython():
    def __init__(self,
                 train_data,
                 URM_validation=None,
                 recompile_cython=True,
                 final_model_sparse_weights=True,
                 train_with_sparse_weights=False,
                 symmetric=True):

        # super(SLIM_BPR_Cython, self).__init__()

        self.URM_train = check_matrix(buildURMMatrix(train_data), 'csr')
        self.top_pop_songs = train_data['track_id'].value_counts().head(
            20).index.values

        self.n_users = self.URM_train.shape[0]
        self.n_items = self.URM_train.shape[1]
        self.normalize = False

        self.train_with_sparse_weights = train_with_sparse_weights
        self.sparse_weights = final_model_sparse_weights

        if URM_validation is not None:
            self.URM_validation = URM_validation.copy()
        else:
            self.URM_validation = None

        if self.train_with_sparse_weights:
            self.sparse_weights = True

        self.URM_mask = self.URM_train.copy()

        self.URM_mask.eliminate_zeros()

        self.symmetric = symmetric

        if not self.train_with_sparse_weights:

            n_items = self.URM_train.shape[1]
            requiredGB = 8 * n_items**2 / 1e+06

            if symmetric:
                requiredGB /= 2

            print(
                "SLIM_BPR_Cython: Estimated memory required for similarity matrix of {} items is {:.2f} MB"
                .format(n_items, requiredGB))

        if recompile_cython:
            print("Compiling in Cython")
            self.runCompilationScript()
            print("Compilation Complete")

    def fit(self,
            epochs=160,
            logFile=None,
            playlist_ids=None,
            filterTopPop=False,
            batch_size=1000,
            lambda_i=0.001,
            lambda_j=0.001,
            learning_rate=0.001,
            topK=200,
            sgd_mode='sgd',
            gamma=0.995,
            beta_1=0.9,
            beta_2=0.999,
            stop_on_validation=False,
            lower_validatons_allowed=10,
            validation_metric="map",
            validation_function=None,
            validation_every_n=10):
        '''
        :param epochs:
        :param filterTopPop:
        :param batch_size:
        :param lambda_i: parameter for weighting the SLIM, proposed by paper: 0.0025
        :param lambda_j: parameter for weighting the SLIM, proposed by paper: 0.00025
        :param learning_rate: how much the algorithm is learning for each epoch
        :param topK: knn similarity
        :param sgd_mode: adagrad, rmsprop, adam, sgd
        :param gamma: rmsprop value
        :param beta_1: adam value proposed by paper: 0.9
        :param beta_2: adam value proposed by paper: 0.999
        '''

        print('Fitting..')
        # Import compiled module
        from slimRS.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch
        print('Cython module imported')
        # Select only positive interactions
        URM_train_positive = self.URM_train.copy()
        URM_train_positive.eliminate_zeros()

        self.sgd_mode = sgd_mode
        self.epochs = epochs
        self.cythonEpoch = SLIM_BPR_Cython_Epoch(
            self.URM_mask,
            train_with_sparse_weights=self.train_with_sparse_weights,
            final_model_sparse_weights=self.sparse_weights,
            topK=topK,
            learning_rate=learning_rate,
            li_reg=lambda_i,
            lj_reg=lambda_j,
            batch_size=1,
            symmetric=self.symmetric,
            sgd_mode=sgd_mode,
            gamma=gamma,
            beta_1=beta_1,
            beta_2=beta_2)

        if (topK != False and topK < 1):
            raise ValueError(
                "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'"
                .format(topK))
        self.topK = topK

        self.logFile = logFile

        if validation_every_n is not None:
            self.validation_every_n = validation_every_n
        else:
            self.validation_every_n = np.inf

        if validation_function is None:
            validation_function = self.default_validation_function

        print('After validation')

        self.batch_size = batch_size
        self.lambda_i = lambda_i
        self.lambda_j = lambda_j
        self.learning_rate = learning_rate

        start_time = time.time()

        print('Time has started')
        best_validation_metric = None
        lower_validatons_count = 0
        convergence = False

        self.S_incremental = self.cythonEpoch.get_S()
        self.S_best = self.S_incremental.copy()
        self.epochs_best = 0

        currentEpoch = 0

        while currentEpoch < self.epochs and not convergence:

            if self.batch_size > 0:
                self.cythonEpoch.epochIteration_Cython()
            else:
                print("No batch not available")

            # Determine whether a validaton step is required
            if self.URM_validation is not None and (
                    currentEpoch + 1) % self.validation_every_n == 0:

                print("SLIM_BPR_Cython: Validation begins...")

                self.get_S_incremental_and_set_W()

                results_run = validation_function(playlist_ids)

                print("SLIM_BPR_Cython: {}".format(results_run))

                # Update the D_best and V_best
                # If validation is required, check whether result is better
                if stop_on_validation:

                    current_metric_value = results_run  # results_run[validation_metric]

                    if best_validation_metric is None or best_validation_metric < current_metric_value:

                        best_validation_metric = current_metric_value
                        self.S_best = self.S_incremental.copy()
                        self.epochs_best = currentEpoch + 1
                        lower_validatons_count = 0

                    else:
                        lower_validatons_count += 1

                    if lower_validatons_count >= lower_validatons_allowed:
                        convergence = True
                        print(
                            "SLIM_BPR_Cython: Convergence reached! Terminating at epoch {}. Best value for '{}' at epoch {} is {:.4f}. Elapsed time {:.2f} min"
                            .format(currentEpoch + 1, validation_metric,
                                    self.epochs_best, best_validation_metric,
                                    (time.time() - start_time) / 60))

            # If no validation required, always keep the latest
            if not stop_on_validation:
                self.S_best = self.S_incremental.copy()

            print("SLIM_BPR_Cython: Epoch {} of {}. Elapsed time {:.2f} min".
                  format(currentEpoch + 1, self.epochs,
                         (time.time() - start_time) / 60))

            currentEpoch += 1

        self.get_S_incremental_and_set_W()
        print('Finishing...')
        sys.stdout.flush()

    def writeCurrentConfig(self, currentEpoch, results_run, logFile):

        current_config = {
            'lambda_i': self.lambda_i,
            'lambda_j': self.lambda_j,
            'batch_size': self.batch_size,
            'learn_rate': self.learning_rate,
            'topK_similarity': self.topK,
            'epoch': currentEpoch
        }

        print("Test case: {}\nResults {}\n".format(current_config,
                                                   results_run))
        # print("Weights: {}\n".format(str(list(self.weights))))

        sys.stdout.flush()

        if (logFile != None):
            logFile.write("Test case: {}, Results {}\n".format(
                current_config, results_run))
            # logFile.write("Weights: {}\n".format(str(list(self.weights))))
            logFile.flush()

    def runCompilationScript(self):

        # Run compile script setting the working directory to ensure the compiled file are contained in the
        # appropriate subfolder and not the project root

        compiledModuleSubfolder = "/slimRS/Cython"
        #fileToCompile_list = ['Sparse_Matrix_CSR.pyx', 'SLIM_BPR_Cython_Epoch.pyx']
        fileToCompile_list = ['SLIM_BPR_Cython_Epoch.pyx']

        for fileToCompile in fileToCompile_list:
            if platform.system() == 'Windows':
                cmd = 'python'
            else:
                cmd = 'python3'

            command = [
                cmd, 'compileCython.py', fileToCompile, 'build_ext',
                '--inplace'
            ]

            output = subprocess.check_output(' '.join(command),
                                             shell=True,
                                             cwd=os.getcwd() +
                                             compiledModuleSubfolder)

            try:

                command = ['cython', fileToCompile, '-a']

                output = subprocess.check_output(' '.join(command),
                                                 shell=True,
                                                 cwd=os.getcwd() +
                                                 compiledModuleSubfolder)

            except:
                pass

        print("Compiled module saved in subfolder: {}".format(
            compiledModuleSubfolder))

        # Command to run compilation script
        # python compileCython.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace

        # Command to generate html report
        # cython -a SLIM_BPR_Cython_Epoch.pyx

    def get_S_incremental_and_set_W(self):

        self.S_incremental = self.cythonEpoch.get_S()

        if self.train_with_sparse_weights:
            self.W_sparse = self.S_incremental
        else:
            if self.sparse_weights:
                self.W_sparse = similarityMatrixTopK(self.S_incremental,
                                                     k=self.topK)
            else:
                self.W = self.S_incremental

    def get_weight_matrix(self):
        if self.train_with_sparse_weights:
            matrix_w = self.W_sparse
        else:
            if self.sparse_weights:
                matrix_w = self.W_sparse
            else:
                matrix_w = self.W
        return csr_matrix(matrix_w, shape=(self.n_items, self.n_items))

    def get_estimated_ratings(self):
        matrix_W = self.get_weight_matrix()
        return check_matrix(self.URM_train.dot(matrix_W), 'csr')

    def get_sym_matrix(self, weight):
        return check_matrix(self.get_weight_matrix() * weight, 'csr')

    def recommend(self, playlist_ids):

        print("Recommending...")

        final_prediction = {}

        if self.train_with_sparse_weights:
            matrix_W = self.W_sparse
        else:
            if self.sparse_weights:
                matrix_W = self.W_sparse
            else:
                matrix_W = self.W

        # what dimension does W have?
        self.W = csr_matrix(matrix_W, shape=(self.n_items, self.n_items))
        estimated_ratings = check_matrix(self.URM_train.dot(self.W), 'csr')

        counter = 0

        for k in playlist_ids:

            row = estimated_ratings[k]
            # aux contains the indices (track_id) of the most similar songs
            indx = row.data.argsort()[::-1]
            aux = row.indices[indx]
            user_playlist = self.URM_train[k]

            aux = np.concatenate((aux, self.top_pop_songs), axis=None)
            top_songs = filter_seen(aux, user_playlist)[:10]

            string = ' '.join(str(e) for e in top_songs)
            final_prediction.update({k: string})

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print(df)
        return df

    def default_validation_function(self, playlist_ids):
        e = Evaluator()
        return e.evaluate_tuning(self.recommend(playlist_ids),
                                 self.URM_validation)

    def get_URM_train(self):
        return self.URM_train

    def evaluateRecommendations(self,
                                URM_test,
                                at=10,
                                minRatingsPerUser=1,
                                exclude_seen=True,
                                mode='parallel',
                                filterTopPop=False,
                                filterCustomItems=np.array([], dtype=np.int),
                                filterCustomUsers=np.array([], dtype=np.int)):
        """
        Speed info:
        - Sparse weights: batch mode is 2x faster than sequential
        - Dense weights: batch and sequential speed are equivalent
        :param URM_test_new:            URM to be used for testing
        :param at: 10                   Length of the recommended items
        :param minRatingsPerUser: 1     Users with less than this number of interactions will not be evaluated
        :param exclude_seen: True       Whether to remove already seen items from the recommended items
        :param mode: 'sequential', 'parallel', 'batch'
        :param filterTopPop: False or decimal number        Percentage of items to be removed from recommended list and testing interactions
        :param filterCustomItems: Array, default empty           Items ID to NOT take into account when recommending
        :param filterCustomUsers: Array, default empty           Users ID to NOT take into account when recommending
        :return:
        """

        if len(filterCustomItems) == 0:
            self.filterCustomItems = False
        else:
            self.filterCustomItems = True
            self.filterCustomItems_ItemsID = np.array(filterCustomItems)
        '''
        if filterTopPop != False:
            self.filterTopPop = True
            _,_, self.filterTopPop_ItemsID = removeTopPop(self.URM_train, URM_2 = URM_test_new, percentageToRemove=filterTopPop)
            print("Filtering {}% TopPop items, count is: {}".format(filterTopPop*100, len(self.filterTopPop_ItemsID)))
            # Zero-out the items in order to be considered irrelevant
            URM_test_new = check_matrix(URM_test_new, format='lil')
            URM_test_new[:,self.filterTopPop_ItemsID] = 0
            URM_test_new = check_matrix(URM_test_new, format='csr')
        '''

        # During testing CSR is faster
        self.URM_test = check_matrix(URM_test, format='csr')
        self.evaluator = Evaluator()
        self.URM_train = check_matrix(self.URM_train, format='csr')
        self.at = at
        self.minRatingsPerUser = minRatingsPerUser
        self.exclude_seen = exclude_seen

        nusers = self.URM_test.shape[0]

        # Prune users with an insufficient number of ratings
        rows = self.URM_test.indptr
        numRatings = np.ediff1d(rows)
        mask = numRatings >= minRatingsPerUser
        usersToEvaluate = np.arange(nusers)[mask]

        if len(filterCustomUsers) != 0:
            print("Filtering {} Users".format(len(filterCustomUsers)))
            usersToEvaluate = set(usersToEvaluate) - set(filterCustomUsers)

        usersToEvaluate = list(usersToEvaluate)

        if mode == 'sequential':
            return self.evaluateRecommendationsSequential(usersToEvaluate)
        elif mode == 'parallel':
            return self.evaluateRecommendationsParallel(usersToEvaluate)
        elif mode == 'batch':
            return self.evaluateRecommendationsBatch(usersToEvaluate)
        elif mode == 'cython':
            return self.evaluateRecommendationsCython(usersToEvaluate)
        # elif mode=='random-equivalent':
        #     return self.evaluateRecommendationsRandomEquivalent(usersToEvaluate)
        else:
            raise ValueError("Mode '{}' not available".format(mode))

    def evaluateOneUser(self, test_user):

        # Being the URM CSR, the indices are the non-zero column indexes
        # relevant_items = self.URM_test_relevantItems[test_user]
        relevant_items = self.URM_test[test_user].indices

        # this will rank top n items
        recommended_items = self.recommend(test_user)

        is_relevant = np.in1d(recommended_items,
                              relevant_items,
                              assume_unique=True)

        # evaluate the recommendation list with ranking metrics ONLY

        map_ = self.evaluator.map(is_relevant, relevant_items)

        return map_

    def evaluateRecommendationsParallel(self, usersToEvaluate):

        print("Evaluation of {} users begins".format(len(usersToEvaluate)))

        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(),
                                    maxtasksperchild=1)
        resultList = pool.map(self.evaluateOneUser, usersToEvaluate)

        # for i, _ in enumerate(pool.imap_unordered(self.evaluateOneUser, usersToEvaluate), 1):
        #    if(i%1000 == 0):
        #        sys.stderr.write('\rEvaluated {} users ({0:%})'.format(i , i / usersToEvaluate))

        # Close the pool to avoid memory leaks
        pool.close()

        n_eval = len(usersToEvaluate)
        map_ = 0.0

        # Looping is slightly faster then using the numpy vectorized approach, less data transformation
        for result in resultList:
            map_ += result[0]

        if (n_eval > 0):

            map_ /= n_eval

        else:
            print(
                "WARNING: No users had a sufficient number of relevant items")

        results_run = {}

        results_run["map"] = map_

        return (results_run)
Пример #11
0
from lightfm import LightFM
from lightfm.data import Dataset
from loader.loader import test_data, train_data, target_data, tracks_data
from utils.auxUtils import buildFMMatrix, buildURMMatrix, Evaluator
from scipy.sparse import coo_matrix
import numpy as np
import pandas as pd
from tqdm import tqdm

e = Evaluator()
# Instantiate and train the model
alpha = 1e-3
model = LightFM(no_components=30, loss='warp', learning_rate=0.01)
# todo add latent factors weights
# todo force the dimention of the data matrix
urm = coo_matrix(buildURMMatrix(train_data))
print('Fitting...')
# todo: item features
model.fit(urm, epochs=30, num_threads=4, item_features=item_feature)
final_prediction = {}
tracks = np.array(tracks_data['track_id'], dtype='int32')
for k in tqdm(target_data['playlist_id']):
    # user_index = np.full(len(tracks), k, dtype='int32')
    predictions = model.predict(k, tracks)
    ranking = (np.argsort(predictions)[::-1])[:10]
    string = ' '.join(str(e) for e in ranking)
    final_prediction.update({k: string})

df = pd.DataFrame(list(final_prediction.items()),
                  columns=['playlist_id', 'track_ids'])
print(df)