Пример #1
0
def test_LeaveOneOut():

    reader = Reader(line_format='user item rating', sep=' ', skip_lines=3,
                    rating_scale=(1, 5))
    custom_dataset_path = (os.path.dirname(os.path.realpath(__file__)) +
                           '/custom_dataset')
    data = Dataset.load_from_file(file_path=custom_dataset_path, reader=reader)

    loo = LeaveOneOut()
    with pytest.raises(ValueError):
        next(loo.split(data))  # Each user only has 1 item so trainsets fail

    reader = Reader('ml-100k')
    custom_dataset_path = (os.path.dirname(os.path.realpath(__file__)) +
                           '/u1_ml100k_test')
    data = Dataset.load_from_file(file_path=custom_dataset_path, reader=reader)

    # Test random_state parameter
    # If random_state is None, you get different split each time (conditioned
    # by rng of course)
    loo = LeaveOneOut(random_state=None)
    testsets_a = [testset for (_, testset) in loo.split(data)]
    testsets_b = [testset for (_, testset) in loo.split(data)]
    assert testsets_a != testsets_b
    # Repeated called to split when random_state is set lead to the same folds
    loo = LeaveOneOut(random_state=1)
    testsets_a = [testset for (_, testset) in loo.split(data)]
    testsets_b = [testset for (_, testset) in loo.split(data)]
    assert testsets_a == testsets_b

    # Make sure only one rating per user is present in the testset
    loo = LeaveOneOut()
    for _, testset in loo.split(data):
        cnt = Counter([uid for (uid, _, _) in testset])
        assert all(val == 1 for val in itervalues(cnt))
Пример #2
0
    def __init__(self, data, popularity_ranks, diversity=False, leave_one_out=False, anti_test=False):
        self.rankings = popularity_ranks
        self.random_state = 100
        
        # Build a full training set for evaluating overall properties
        self.full_train = data.build_full_trainset()
        
        if anti_test:
            self.full_test = self.full_train.build_anti_testset()
        
        # Build a 75/25 train/test split for measuring accuracy
        self.train, self.test = train_test_split(data, test_size=0.25, random_state=self.random_state)
        
        # Build a "leave one out" train/test split for evaluating top-N recommenders
        # Build an anti-test-set for building predictions
        if leave_one_out:
            LOOCV = LeaveOneOut(n_splits=1, random_state=self.random_state)
            for train, test in LOOCV.split(data):
                self.LOOCV_train = train
                self.LOOCV_test = test

            self.LOOCV_anti_test = self.LOOCV_train.build_anti_testset()
        
        # Build interaction matrix for diversity 
        if diversity:
            sim_options = {'name': 'cosine', 'user_based': False}
            self.similarites = KNNBaseline(sim_options=sim_options)
            self.similarites.fit(self.full_train)
Пример #3
0
    def __init__(self, data, popularityRankings):

        self.rankings = popularityRankings

        #すべての訓練データセットを構築する
        self.fullTrainSet = data.build_full_trainset()
        self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()

        #75対25に訓練データとテストデータとを分けて、精度を測定する
        self.trainSet, self.testSet = train_test_split(data,
                                                       test_size=.25,
                                                       random_state=1)

        #「leave one out」 法で、訓練データとテストデータとを分割しつつ、Top-Nを求める
        #さらに、テストデータに含まれないデータセットで予測する
        LOOCV = LeaveOneOut(n_splits=1, random_state=1)
        for train, test in LOOCV.split(data):
            self.LOOCVTrain = train
            self.LOOCVTest = test

        self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()

        #多様性が測定できるように、類似度の行列演算を行う
        sim_options = {'name': 'cosine', 'user_based': False}
        self.simsAlgo = KNNBaseline(sim_options=sim_options)
        self.simsAlgo.fit(self.fullTrainSet)
Пример #4
0
    def __init__(self, data, popularity_rankings):
        """ Init Data related variables to be used in evaluation.

        Parameters
        ----------
        data: DatasetAutoFolds
            Data which we are creating a model from. Should be variable derived from
            suprise Dataset class.
        popularity_rankings: defaultdict
            A dict contains the ranking of items
        """

        # Build a full training set for evaluating overall properties
        self.full_trainset = data.build_full_trainset()
        self.full_antiset = self.full_trainset.build_anti_testset()

        # Build a 75/25 train/test split for measuring accuracy
        self.trainset, self.testset = train_test_split(data,
                                                       test_size=.25,
                                                       random_state=1)

        # Build a "leave one out" train/test split for evaluating top-N recommenders
        # And build an anti-test-set for building predictions
        loocv = LeaveOneOut(n_splits=1, random_state=1)
        for train, test in loocv.split(data):
            self.loocv_train = train
            self.loocv_test = test

        self.loocv_anti_testset = self.loocv_train.build_anti_testset()
        self.rankings = popularity_rankings

        # Compute similarty matrix between items so we can measure diversity
        sim_options = {'name': 'cosine', 'user_based': False}
        self.sims_algo = KNNBaseline(sim_options=sim_options)
        self.sims_algo.fit(self.full_trainset)
Пример #5
0
    def __init__(self, data, popularityRankings):

        self.rankings = popularityRankings

        #Build a full training set for evaluating overall properties
        self.fullTrainSet = data.build_full_trainset()
        #And build an anti-test-set for building predictions
        self.fullAntiTestSet = self.fullTrainSet.build_anti_testset(
        )  # return  A list of tuples ``(uid, iid, fill)`` where ids are raw ids. The data set are all the ratings that are **not** in the trainset

        # Build a 75/25 train/test split for measuring accuracy
        # todo, k-folder cross validation
        self.trainSet, self.testSet = train_test_split(data,
                                                       test_size=.25,
                                                       random_state=1)

        #Build a "leave one out" train/test split for evaluating top-N recommenders, extract one rated movie from each user
        LOOCV = LeaveOneOut(n_splits=1, random_state=1)
        for train, test in LOOCV.split(data):
            self.LOOCVTrain = train
            self.LOOCVTest = test

        self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()

        #Compute similarty matrix between items so we can measure diversity
        sim_options = {'name': 'cosine', 'user_based': False}
        self.simsAlgo = KNNBaseline(sim_options=sim_options)
        self.simsAlgo.fit(self.fullTrainSet)
Пример #6
0
    def __init__(self, data, popularityRankings):

        self.rankings = popularityRankings

        #Build a full training set for evaluating overall properties
        self.fullTrainSet = data.build_full_trainset()
        self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()

        #Build a 75/25 train/test split for measuring accuracy
        self.trainSet, self.testSet = train_test_split(data,
                                                       test_size=.25,
                                                       random_state=1)

        #Build a "leave one out" train/test split for evaluating top-N recommenders
        #And build an anti-test-set for building predictions
        LOOCV = LeaveOneOut(n_splits=1, random_state=1)
        for train, test in LOOCV.split(data):
            self.LOOCVTrain = train
            self.LOOCVTest = test

        self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()

        #Compute similarty matrix between items so we can measure diversity
        sim_options = {'name': 'cosine', 'user_based': False}
        self.simsAlgo = KNNBaseline(sim_options=sim_options)
        self.simsAlgo.fit(self.fullTrainSet)
Пример #7
0
    def __init__(self, data):
        self.train_set, self.test_set = train_test_split(data,
                                                         test_size=0.25,
                                                         random_state=1)

        LOOX = LeaveOneOut(1, random_state=1)
        for x_train, x_test in LOOX.split(data):
            self.LOOX_trainSet = x_train
            self.LOOX_testSet = x_test
            del x_test, x_train
        self.LOOX_anti_testSet = self.LOOX_trainSet.build_anti_testset()

        self.full_trainSet = data.buid_full_trainset()
        self.full_anti_testSet = self.full_trainSet.build_anti_testset()
Пример #8
0
    def __init__(self, data):

        self.fullTrainSet = data.build_full_trainset()
        self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
        self.trainSet, self.testSet = train_test_split(data, test_size=0.25, random_state=1)

        LOOCV = LeaveOneOut(n_splits=1, random_state=1)
        for train, test in LOOCV.split(data):
            self.LOOCVTrain = train
            self.LOOCVTest = test
        self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()

        sim_options = {'name': 'cosine', 'user_based': False}
        self.simsAlgo = KNNBaseline(sim_options=sim_options)
        self.simsAlgo.fit(self.fullTrainSet)
Пример #9
0
    def __init__(self,data,withSim=False):
        self.trainSet, self.testSet = train_test_split(data, test_size=0.25, random_state=0)

        LOOX = LeaveOneOut(1, random_state=1)
        for xtrain, xtest in LOOX.split(data):
            self.LOOX_trainSet = xtrain
            self.LOOX_testSet = xtest
            del xtrain, xtest
        self.LOOX_antitestSet = self.LOOX_trainSet.build_anti_testset()

        self.full_trainSet = data.build_full_trainset()
        self.full_antitestSet = self.full_trainSet.build_anti_testset()
        if withSim:
            sim_options = {'name': 'cosine', 'user_based': False}
            self.simAlgo = KNNBaseline(sim_options=sim_options)
            self.simAlgo.fit(self.full_trainSet)
Пример #10
0
def test_LeaveOneOut(toy_data):

    loo = LeaveOneOut()
    with pytest.raises(ValueError):
        next(
            loo.split(toy_data))  # each user only has 1 item so trainsets fail

    reader = Reader('ml-100k')
    data_path = (os.path.dirname(os.path.realpath(__file__)) +
                 '/u1_ml100k_test')
    data = Dataset.load_from_file(file_path=data_path,
                                  reader=reader,
                                  rating_scale=(1, 5))

    # Test random_state parameter
    # If random_state is None, you get different split each time (conditioned
    # by rng of course)
    loo = LeaveOneOut(random_state=None)
    testsets_a = [testset for (_, testset) in loo.split(data)]
    testsets_b = [testset for (_, testset) in loo.split(data)]
    assert testsets_a != testsets_b
    # Repeated called to split when random_state is set lead to the same folds
    loo = LeaveOneOut(random_state=1)
    testsets_a = [testset for (_, testset) in loo.split(data)]
    testsets_b = [testset for (_, testset) in loo.split(data)]
    assert testsets_a == testsets_b

    # Make sure only one rating per user is present in the testset
    loo = LeaveOneOut()
    for _, testset in loo.split(data):
        cnt = Counter([uid for (uid, _, _) in testset])
        assert all(val == 1 for val in itervalues(cnt))

    # test the min_n_ratings parameter
    loo = LeaveOneOut(min_n_ratings=5)
    for trainset, _ in loo.split(data):
        assert all(len(ratings) >= 5 for ratings in itervalues(trainset.ur))

    loo = LeaveOneOut(min_n_ratings=10)
    for trainset, _ in loo.split(data):
        assert all(len(ratings) >= 10 for ratings in itervalues(trainset.ur))

    loo = LeaveOneOut(min_n_ratings=10000)  # too high
    with pytest.raises(ValueError):
        next(loo.split(data))
Пример #11
0
    def __init__(self, ratingsFilePath, moviesFilePath, verbose=True):
        self.ratingsPath = ratingsFilePath
        self.moviesPath = moviesFilePath
        
        if(verbose):
                print("\nLoading Movies and Ratings...")

        # load data
        self.movielens = MovieLens(self.ratingsPath, self.moviesPath)
        self.ratings = self.movielens.loadMovieLensLatestSmall()
        self.popularity_rankings = self.movielens.getPopularityRanks()
        
        ## Section for creating dataset for using full-input-dataset for training/test
        
        self.trainset_full = self.ratings.build_full_trainset()
        
        # create antitest set from full training set
        self.antitestset_full = self.trainset_full.build_anti_testset()

        ## Section for creating dataset for using train-test-split for training/test        
    
        # 75/25 train/test split 
        self.trainset_percent_split, self.testset_percent_split = train_test_split(self.ratings,test_size=0.25, random_state=1, shuffle=True)     


        # ## Section for creating dataset for using leave-one-out method for training/cv/test 

        #Build a "leave one out" train/test split for evaluating top-N recommenders
        LOOCV = LeaveOneOut(n_splits=1, random_state=1)
        for loocv_train, loocv_test in LOOCV.split(self.ratings):
            self.trainset_loocv = loocv_train 
            self.testset_loocv = loocv_test
            self.antitestset_loocv = self.trainset_loocv.build_anti_testset()  

        ## Compute similarty matrix between items so we can measure diversity
        similarity_options = {'name': 'cosine', 'user_based': False}
        self.similarity_algorithm = KNNBaseline(sim_options=similarity_options)
        self.similarity_algorithm.fit(self.trainset_full)

        if(verbose):
                print("\nMovies and Ratings loaded\n")
Пример #12
0
def test_LeaveOneOut(toy_data):

    loo = LeaveOneOut()
    with pytest.raises(ValueError):
        next(loo.split(toy_data))  # each user only has 1 item so trainsets fail

    reader = Reader('ml-100k')
    data_path = (os.path.dirname(os.path.realpath(__file__)) +
                 '/u1_ml100k_test')
    data = Dataset.load_from_file(file_path=data_path, reader=reader,
                                  rating_scale=(1, 5))

    # Test random_state parameter
    # If random_state is None, you get different split each time (conditioned
    # by rng of course)
    loo = LeaveOneOut(random_state=None)
    testsets_a = [testset for (_, testset) in loo.split(data)]
    testsets_b = [testset for (_, testset) in loo.split(data)]
    assert testsets_a != testsets_b
    # Repeated called to split when random_state is set lead to the same folds
    loo = LeaveOneOut(random_state=1)
    testsets_a = [testset for (_, testset) in loo.split(data)]
    testsets_b = [testset for (_, testset) in loo.split(data)]
    assert testsets_a == testsets_b

    # Make sure only one rating per user is present in the testset
    loo = LeaveOneOut()
    for _, testset in loo.split(data):
        cnt = Counter([uid for (uid, _, _) in testset])
        assert all(val == 1 for val in itervalues(cnt))

    # test the min_n_ratings parameter
    loo = LeaveOneOut(min_n_ratings=5)
    for trainset, _ in loo.split(data):
        assert all(len(ratings) >= 5 for ratings in itervalues(trainset.ur))

    loo = LeaveOneOut(min_n_ratings=10)
    for trainset, _ in loo.split(data):
        assert all(len(ratings) >= 10 for ratings in itervalues(trainset.ur))

    loo = LeaveOneOut(min_n_ratings=10000)  # too high
    with pytest.raises(ValueError):
        next(loo.split(data))
Пример #13
0
    def build_train_test(self, test_size=.25):
        # Train Set, Test Set to test results
        self.train_set, self.test_set = train_test_split(self.dataset,
                                                         test_size=test_size,
                                                         random_state=1)

        # https://surprise.readthedocs.io/en/stable/trainset.html#surprise.Trainset.build_anti_testset
        # Situation when the user u is known, the item is known, but the rating is not in the trainset
        self.anti_test_set = self.full_dataset.build_anti_testset()

        # Cross-validation iterator where each user has exactly one rating in the testset.
        leave_one_out_set = LeaveOneOut(n_splits=1, random_state=1)
        loo_train_set, loo_test_set = list(
            leave_one_out_set.split(self.dataset))[0]

        self.leave_one_out_train_set = loo_train_set
        self.leave_one_out_test_set = loo_test_set
        self.leave_one_out_anti_test_set = loo_train_set.build_anti_testset()

        # Compute similarity matrix between items so we can measure diversity
        sim_options = {'name': 'cosine', 'user_based': False}
        self.similarity_algorithm = KNNBaseline(sim_options=sim_options)
        self.similarity_algorithm.fit(self.full_dataset)
    def __init__(self, data, popularityRanking):
        self.ranking = popularityRanking

        #create train and anti test set to be used for prediction using KNNBasleine algorithm
        self.fullTrainingSet = data.build_full_trainset()
        self.fullAntiTestSet = self.fullTrainingSet.build_anti_testset()

        #create a training(75%) and test(25%) split. random_state specifies seed for Random Number Generator
        self.trainset, self.testset = train_test_split(data,
                                                       test_size=0.25,
                                                       random_state=1)

        #To check using Leave-One-Out-Cross-Validation for Top-N recommenders
        LOOCV = LeaveOneOut(n_splits=1, random_state=1)
        for train, test in LOOCV.split(data):
            self.LOOCVTrain = train
            self.LOOCVTest = test
        #Build anti test set for predictions
        self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()

        #Calculate similarity to measure diversity using cosine similarity
        sim_options = {'name': 'cosine', 'user_based': False}
        self.simsAlgo = KNNBaseline(sim_options=sim_options)
        self.simsAlgo.fit(self.fullTrainingSet)
Пример #15
0
    def __init__(self, df, popRankings):
        #Build a full training set for evaluating overall properties
        self.df = df
        self.data = self._convertToSurprise()
        self.rankings = popRankings

        # training set for the entire data
        self.fullTrainSet = self.data.build_full_trainset()
        # anti-test set for the entire training data
        self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()

        #Build a 75/25 train/test split for measuring accuracy
        self.trainSet, self.testSet = train_test_split(self.data,
                                                       test_size=.25,
                                                       random_state=1)

        #Build a "leave one out" train/test split for evaluating top-N recommenders
        #And build an anti-test-set for building predictions
        LOOCV = LeaveOneOut(n_splits=1, random_state=1)
        for train, test in LOOCV.split(self.data):
            self.LOOCVTrain = train
            self.LOOCVTest = test

        self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
Пример #16
0
algo = SVD(random_state=10)
algo.fit(trainSet)

print("\nComputing recommendations...")
predictions = algo.test(testSet)

print("\nEvaluating accuracy of model...")
print("RMSE: ", RecommenderMetrics.RMSE(predictions))
print("MAE: ", RecommenderMetrics.MAE(predictions))

print("\nEvaluating top-10 recommendations...")

# Set aside one rating per user for testing
LOOCV = LeaveOneOut(n_splits=1, random_state=1)

for trainSet, testSet in LOOCV.split(data):
    print("Computing recommendations with leave-one-out...")

    # Train model without left-out ratings
    algo.fit(trainSet)

    # Predicts ratings for left-out ratings only
    print("Predict ratings for left-out set...")
    leftOutPredictions = algo.test(testSet)

    # Build predictions for all ratings not in the training set
    print("Predict all missing ratings...")
    bigTestSet = trainSet.build_anti_testset()
    allPredictions = algo.test(bigTestSet)

    # Compute top 10 recs for each user
# define a cross-validation iterator
kf = KFold(n_splits=5, random_state=22)
print("\nKFold Cross Validation")
for trainset, testset in kf.split(data):
    # train and test algorithm.
    algo.fit(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.rmse(
        predictions,
        verbose=True)  # cross validation also gives around 87% accuracy

loo = LeaveOneOut(n_splits=5, random_state=22)
print("\nLeave One Out Cross Validation")
for trainset, testset in loo.split(data):
    # train and test algorithm.
    algo.fit(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)
# to know which parameter combination yields the best results, the GridSearchCV
# use GridSearchCV scheme

param_grid = {
    'n_epochs': [5, 10],
    'lr_all': [0.002, 0.005],
    'reg_all': [0.4, 0.6]
}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
Пример #18
0
class DataHandler:

    rating = './ml-latest-small/ratings.csv'
    movies = './ml-latest-small/movies.csv'

    # for testing purpose
    # rating = './test-data/ratings.csv'
    # movies = './test-data/movies.csv'
    """
        Load the rating data -- main dataset.
        Return: the main dataset
    """
    def LoadRating(self):
        reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
        return Dataset.load_from_file(self.rating, reader=reader)

    """
        Load the popularity data.
        Return: return the dictionary of rankings
    """

    def loadPopularityData(self):
        # similart to getOrDefault in Java
        ratingTimes = defaultdict(int)
        rankings = defaultdict(int)

        with open(self.rating, newline='') as csvfile:
            reader = csv.reader(csvfile)
            next(reader)
            for row in reader:
                movieId = int(row[1])
                ratingTimes[movieId] += 1
        rank = 1

        for movieID, count in sorted(ratingTimes.items(), key=lambda x: x[1], reverse=True):
            rankings[movieID] = rank
            rank += 1
        return rankings




    def getEvaluation(self):
        """
            Getter for evaluation data
            Return: the full dataset
        """
        return self.fulldata


    def getRank(self):
        """
            Getter for the rank data
            Return: the popularity data set
        """
        return self.popularitydata


    def __init__(self):
        # build the full data
        """
            The constructor that build different type of dataset prepared for fitting the model.
        """
        self.fulldata = self.LoadRating()
        self.fulldata = self.fulldata
        self.popularitydata = self.loadPopularityData()
        self.fullTrainData = self.fulldata.build_full_trainset()
        #build the full anti data test set
        self.fullAntiTestData = self.fullTrainData.build_anti_testset()
        self.fullTestData = self.fullTrainData.build_testset()

        #get 80% train data and 20% test data
        self.traindata, self.testdata = train_test_split(self.fulldata, test_size=0.2)


        #build leave-one-out cross validation
        self.LOO_Data = LeaveOneOut()
        for train, test in self.LOO_Data.split(self.fulldata):
            self.LOO_Train = train
            self.LOO_Test = test
        self.LOOAntiTest = self.LOO_Train.build_anti_testset()

        #pass the popularitydata
        self.rank = self.popularitydata

        #similarity used for diversity

        sim_options = {'name': 'cosine', 'user_based': False}  # compute  similarities between items
        self.sim_matrix = KNNBaseline(sim_options=sim_options)
        self.sim_matrix.fit(self.fullTrainData)

    """
    Getter for different datasets.
    """
    def GetFullTrainData(self):
        return self.fullTrainData

    def GetAntiTestData(self):
        return self.fullAntiTestData

    def GetAntiUserTestData(self,userId): #the same logic as the build_anti_test but for the spefic user
        trainset = self.fullTrainData
        temp = trainset.global_mean

        antiUserDataSet = []
        uidint = trainset.to_inner_uid(str(userId)) #find the specific user inner id
        user_watched_movies =set(x for (x,y) in trainset.ur[uidint]) #since int the train set, we use innter id
        antiUserDataSet+=[(trainset.to_raw_uid(uidint),trainset.to_raw_iid(i),temp) for i in trainset.all_items()
                          if i not in user_watched_movies] #since we find the data in the pandas later, we record the raw id
        return antiUserDataSet
    def GetFullTestData(self):
        return self.fullTestData

    def GetTrainData(self):
        return self.traindata

    def GetTestData(self):
        return self.testdata

    def GetLOOTrain(self):
        return self.LOO_Train

    def GetLOOTest(self):
        return self.LOO_Test

    def GetLOOAntiTestSet(self):
        return self.LOOAntiTest

    def GetPopularRankings(self):
        return self.rank

    def GetSimilarities(self):
        return self.sim_matrix
Пример #19
0


# The file was from the dataframe created by created from user_base_rs_v03 
file_nm =   "C:\\SYUE\\RecSys\\rs_df_sum_qty_final_case1.xlsx"
df_sum_qty_final = pd.read_excel(file_nm)


reader = Reader(rating_scale=(1, 10))  # Reader object; rating_scale is required 
data = Dataset.load_from_df(df_sum_qty_final[['HH_SK', 'PROD_SK', 'UNIT_QTY']], reader) # type:  surprise.dataset.DatasetAutoFolds

ft = data.build_full_trainset()
print("The total number of users in data:", ft.n_users,  "The total number of items in data:", ft.n_items )
# Set aside one rating per user for testing
LOOCV = LeaveOneOut(n_splits=1, random_state=1)
for train, test in LOOCV.split(data):
    trainSet = train
    testSet  =  test
print("The total number of users in trainSet:", trainSet.n_users,  "The total number of items in trainSet:", trainSet.n_items ) 
print("The total length of testSet:", len(testSet),"\n Example of testSet:", testSet[0:2]) 
_idx_user =  [trainSet.to_raw_uid(uiid) for uiid in range(trainSet.n_users)]
mtx_measure = 'cosine'; user_base = True
dir_loc = "C:\\SYUE\\RecSys\\"
xlsx_file = dir_loc + "u_sim_cosine_case1.xlsx"
df = f_rs_cr_sim_matrix(trainSet, xlsx_file, user_base, mtx_measure,  _idx_user)  
 

mtx_measure = 'msd'; user_base = True
xlsx_file = dir_loc + "u_sim_msd_case1.xlsx"
df_msd = f_rs_cr_sim_matrix(trainSet, xlsx_file, user_base, mtx_measure,  _idx_user)  
    already_watched = {movie_id: 1 for movie_id, _ in trainset.ur[user]}

    n_recommendations = 0
    for movie, final_score in sorted(candidates.items(),
                                     key=itemgetter(1),
                                     reverse=True):
        if movie not in already_watched:
            n_recommendations += 1
            movie_id = trainset.to_raw_iid(movie)
            top_n[int(trainset.to_raw_uid(user))].append((int(movie_id), 0.0))
            if n_recommendations >= recs:
                break

    # COLD START PROBLEM: RANDOM EXPLORATION
    random_movie = ml.get_random_movie()
    top_n[int(trainset.to_raw_uid(user))].append((random_movie.iloc[0], 0.0))


LOOCV = LeaveOneOut(n_splits=1, random_state=1)

for trainset, testset in LOOCV.split(data):

    algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
    algo.fit(trainset)

    top_n = defaultdict(list)
    for uiid in range(trainset.n_users):
        top_n_user_based_cf(trainset, uiid, recs=40, quality_threshold=0.95)

print("Hit Rate: ", hit_rate(top_n, testset))