示例#1
0
def user_based_cf(co_pe):
    # INITIALIZE REQUIRED PARAMETERS
    # path = 'ml-100k/u.user'
    prnt = "USER"
    sim_op = {'name': co_pe, 'user_based': True}
    algo = KNNBasic(sim_options=sim_op)

    reader = Reader(line_format="user item rating",
                    sep='\t',
                    rating_scale=(1, 5))
    df = Dataset.load_from_file('ml-100k/u.data', reader=reader)

    # START TRAINING
    trainset = df.build_full_trainset()

    # APPLYING ALGORITHM KNN Basic
    algo.train(trainset)
    print "ALGORITHM USED", co_pe

    # -------------------------------`-------------- MARKERS

    f = io.open("_AlgoHist_ub.txt", "wb")
    f.write(repr(co_pe))
    f.close()

    # --------------------------------------------- MARKERS END

    print "CF Type:", prnt, "BASED"

    # PEEKING PREDICTED VALUES
    search_key = raw_input("Enter User ID:")
    item_id = raw_input("Enter Item ID:")
    actual_rating = input("Enter actual Rating:")

    print algo.predict(str(search_key), item_id, actual_rating)

    testset = trainset.build_anti_testset()
    predictions = algo.test(testset=testset)

    top_n = get_top_n(predictions, 5)
    result_u = True

    k = input("Enter size of Neighborhood (Min:1, Max:40)")

    inner_id = algo.trainset.to_inner_iid(search_key)
    neighbors = algo.get_neighbors(inner_id, k=k)
    print "Nearest Matching users are:"
    for i in neighbors:
        print "\t " * 6, i
    return top_n, result_u
示例#2
0
    def Basic_CF(self):
        u_id = []
        I_id = []
        r_ui_ = np.array([])
        _est = np.array([])

        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNBasic(k=40, min_k=1, sim_options=sim_options)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)
                u_id.append(int(pred.uid))
                I_id.append(int(pred.iid))
                r_ui_ = np.append(r_ui_, pred.r_ui)
                _est = np.append(_est, pred.est)

        self.df_est = pd.DataFrame({
            'uid': u_id,
            'Iid': I_id,
            'r_ui': r_ui_,
            'est': _est
        })
        self.arr = self.df_est['uid'].unique()

        self.CF_ndcg_ = self.Calculate_NDCG()
示例#3
0
def detail(request, post_id):
    # 예상평점 알고리즘 넣기
    file_path = os.path.expanduser('stars.csv')
    reader = Reader(line_format='user item rating', sep=',')
    data = Dataset.load_from_file(file_path, reader=reader)
    trainset = data.build_full_trainset()
    algo = KNNBasic()
    algo.fit(trainset)
    uid = str(request.user.is_authenticated)  # 유저아이디 적어야함
    iid = str(post_id)  # raw item id (as in the ratings file). They are **strings**!

    pred = algo.predict(uid, iid, r_ui=4, verbose=True)   # 예상평점

    group = Matzip_list.objects.get(id=post_id)
    if not request.user.is_anonymous:
        if request.user.star_set.all().filter(matzip_id=post_id).first():
            my_rate = request.user.star_set.all().filter(matzip_id=post_id).first().rate
            is_rated = 1
        else:
            my_rate = pred
            is_rated = 0
    else:
        my_rate = "로그인을 해주세요"
        is_rated = 2
    images = re.sub("]|\[|'", "", group.images_url_preprocess).strip().split(',')


    context = {
        'group': group,
        'images': images,
        'my_rate': my_rate,
        'is_rated': is_rated,
        'pred': pred,
    }
    return render(request, 'posts/detail.html', context)
示例#4
0
def collaborative_filtering():
    history_list = History.objects.all()
    with open('recommend/dataset_cf.csv', 'w', encoding='utf-8', newline='') as csv_file:
        header = ['history_id', 'user_id', 'alco_name', 'data_joined', 'review']
        writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
        writer.writerow(header)
        for history in history_list:
            row = []
            row += [history.history_id,
                    history.user_id,
                    history.alco_name,
                    history.data_joined,
                    history.review]
            writer.writerow(row)

    alco = pandas.read_csv("recommend/alcohol_cf.csv", encoding='utf-8')
    alco = alco.set_index('alco_name')

    data = pandas.read_csv("recommend/dataset_cf.csv", encoding='utf-8').fillna(0)
    data = data.drop('history_id', axis=1)
    data = data.drop('data_joined', axis=1)
    alcohol_id_list = []
    for i in range(len(data.index)):
        alcohol_id_list.append(alco.at[data['alco_name'][i], 'alcohol_id'])

    data = data.drop('alco_name', axis=1)
    data['alcohol_id'] = alcohol_id_list
    data = data.loc[:, ["user_id", "alcohol_id", "review"]]
    data.to_csv("recommend/dataset_cf.score", sep=' ', header=None, index=False, encoding='utf-8')

    reader = Reader(line_format='user item rating', sep=' ')
    dataset = Dataset.load_from_file("recommend/dataset_cf.score", reader=reader)
    trainset = dataset.build_full_trainset()
    sim_options = {
        'name': 'pearson',  # 類似度を計算する方法を指定( cosine,msd,pearson,pearson_baseline )
        'user_based': True  # False にするとアイテムベースに
    }
    algo = KNNBasic(k=5, min_k=1, sim_options=sim_options)
    algo.fit(trainset)
    # algo = SVD()
    # algo.train(trainset)
    # print(algo.sim)

    alcohol_num = Alcohol.objects.latest('alcohol_id').alcohol_id
    user_num = History.objects.latest('user_id').user_id

    with open('recommend/answer_cf.csv', 'w', encoding='utf-8', newline='') as csv_file:
        header = ['user_id', 'alcohol_id', 'predicted_value']
        writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
        writer.writerow(header)
        for j in range(1, user_num + 1):
            user_id = j
            for i in range(1, alcohol_num + 1):
                item_id = i
                pred = algo.predict(uid=str(user_id), iid=str(item_id))
                row = []
                row += [pred.uid,
                        pred.iid,
                        pred.est]
                writer.writerow(row)
示例#5
0
def func3():
    from surprise import KNNBasic
    from surprise import Dataset

    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    algo = KNNBasic()
    algo.fit(trainset)
    uid = str(
        196)  # raw user id (as in the ratings file). They are **strings**!
    iid = str(
        302)  # raw item id (as in the ratings file). They are **strings**!
    pred = algo.predict(uid, iid, r_ui=4, verbose=True)
    def Basic_CF(self):
        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNBasic(k=40, min_k=1, sim_options=sim_options)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)

        return pred
def recommendation_base_on_itemCF(train_data, user_item_matrix, user_ID, N):
    # 阅读器
    reader = Reader(line_format='user item rating', sep=',')
    # 载入数据
    raw_data = Dataset.load_from_df(user_item_matrix, reader=reader)

    # 构建模型
    raw_data.split(n_folds=5)
    # kf = KFold(n_splits=5)
    knn_item = KNNBasic(k=40, sim_options={'user_based': False})
    # 训练数据,并返回rmse误差
    for train_set, test_set in raw_data.folds():
        knn_item.fit(train_set)
        predictions = knn_item.test(test_set)
        accuracy.rmse(predictions, verbose=True)

    # 用户听过的歌曲合集
    user_songs = {}
    for user, group in user_item_matrix.groupby('user'):
        user_songs[user] = group['item'].values.tolist()
    # 歌曲合集
    songs = user_item_matrix['item'].unique().tolist()
    # 歌曲ID和歌曲名称对应关系
    songID_titles = {}
    for index in train_data.index:
        songID_titles[train_data.loc[index, 'song']] = train_data.loc[index,
                                                                      'title']

    # itemCF
    # 用户听过的音乐集
    user_items = user_songs[user_ID]

    # 用户对未听过音乐的评分
    item_rating = {}
    for item in songs:
        if item not in user_items:
            item_rating[item] = knn_item.predict(user_ID, item).est

    # 找出评分靠前的N首歌曲
    song_id = dict(
        sorted(item_rating.items(), key=lambda x: x[1], reverse=True)[:N])
    song_topN = [songID_titles[s] for s in song_id.keys()]

    return song_topN
示例#8
0
class FactPrediction:
    """FactPrediction definition."""
    def train(self):
        """Trains the model."""
        from surprise import Reader, Dataset, KNNBasic

        directory = path.dirname(path.realpath(__file__))

        ratings = read_csv(path.join(directory, 'fact_ratings.csv'))
        ratings = Dataset.load_from_df(ratings[['userId', 'factId', 'rating']],
                                       Reader())

        trainset = ratings.build_full_trainset()
        self.model = KNNBasic()
        self.model.train(trainset)

    def predict(self, u_id, f_id):
        """Performs a prediction."""
        return self.model.predict(u_id, f_id)
def knn_basic_movie(train, test, ids, Xtest, Xids):
    """
    kNN basic approach on movies
    Argument : train, the trainset
               test, the testset
               ids, unknown ratings
               Xtest, predicted ratings for testset, to be used for final blending
               Xids, predicted ratings for unknown ratings, to be used for final blending
    """

    print('kNN Basic Movie')
    algo = KNNBasic(k=21,
                    name='msd',
                    min_support=2,
                    user_based=False,
                    verbose=False)

    #Train algorithm on training set
    algo.fit(train)

    #Predict on train and compute RMSE
    predictions = algo.test(train.build_testset())
    print('   Training RMSE: ', accuracy.rmse(predictions, verbose=False))

    #Predict on test and compute RMSE
    predictions = algo.test(test)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   Test RMSE: ', rmse)

    preds_test = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds_test[j] = pred.est

    #Predict unknown ratings
    preds_ids = []
    for i in range(len(ids[0])):
        pred = algo.predict(str(ids[0][i]), str(ids[1][i]))
        preds_ids.append(pred.est)

    Xtest.append(preds_test)
    Xids.append(preds_ids)
    return rmse, Xtest, Xids, preds_test, preds_ids
示例#10
0
 def alknnbasic(self, namefile, uid, iid, rati, value_uid, value_iid):
     test_data = pd.read_csv('./container/' + namefile)
     dt = pd.DataFrame(test_data)
     # Retrieve the trainset.
     reader = Reader(rating_scale=(0, 100))
     data = Dataset.load_from_df(dt[[uid, iid, rati]], reader)
     trainset = data.build_full_trainset()
     algo = KNNBasic()
     algo.fit(trainset)
     pred = algo.predict(float(value_uid),
                         float(value_iid),
                         r_ui=1,
                         verbose=True)
     #return result to json
     jsondata = {}
     jsondata = {}
     jsondata["uid"] = pred.uid
     jsondata["idd"] = pred.iid
     jsondata["rati"] = round(pred.est, 2)
     return jsondata
示例#11
0
def computeKNNBasicMovie(data, test_np):
    """Compute the k-NN basic item based method and return the predictions on the test into a file
     The method is on all the data and got the following settings:
         - Similarity function : MSD, item based
         - Number of closest neighbors : 23
         
         data : data frame which represent the train set
         test_np : data frame on which the prediction will be returned
         
         return : test_np with a column of prediction named 'knnbasic_item_rating'"""
    
    trainset, test = dataTrainSurprise(data, test_np)
    
    sim_options = {'name':'msd','user_based': False}
    knnbasic_algo = KNNBasic(k = 23, sim_options =sim_options).fit(trainset)
    
    test['knnbasic_item_rating'] = test[['user_id', 'movie_id']] \
    .apply(lambda row: knnbasic_algo.predict(row['user_id'], row['movie_id'])[3], axis=1)
    
    return test
示例#12
0
    def from_to(self, namefile, uid, iid, rati, from_uid, to_uid, from_iid,
                to_iid):
        test_data = pd.read_csv('./container/' + namefile)
        dt = pd.DataFrame(test_data)
        # Retrieve the trainset.
        reader = Reader(rating_scale=(0, 100))
        data = Dataset.load_from_df(dt[[uid, iid, rati]], reader)
        trainset = data.build_full_trainset()
        algo = KNNBasic()
        algo.fit(trainset)

        arr = []
        for value_uid in range(from_uid, to_uid):
            for value_iid in range(from_iid, to_iid):
                pred = algo.predict(value_uid, value_iid, r_ui=1, verbose=True)
                tempdata = []
                tempdata.append(pred.uid)
                tempdata.append(pred.iid)
                tempdata.append(round(pred.est, 2))
                arr.append(tempdata)
        #return result to json
        return arr
示例#13
0
    def __recommend_movies(self, username):
        reader = Reader(rating_scale=(1, 10))
        df = pd.DataFrame(self.ratings_dict)
        data = Dataset.load_from_df(df[["user", "item", "rating"]], reader)
        sim_options = {
            "name": "cosine",
            'user_based': True,
            # 'min_support': 2
        }
        algo = KNNBasic(sim_options=sim_options)
        # algo = SVD()

        algo.fit(data.build_full_trainset())

        self.__get_all_movies()

        for movies in self.movies:
            prediction = algo.predict(username, movies)
            self.predictions[movies] = prediction.est

        for user_rated_movies in self.__get_user_rated_movies(
                self.__get_username_id(username)):
            del self.predictions[user_rated_movies]
示例#14
0
r_ui1 = 4
r_ui2 = 4
r_ui3 = 1
r_ui4 = 3

verboseFlag = True

# get a prediction for specific users and items.
print("KNNBaseLine:")
predBaseLine1 = algoBaseLine.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predBaseLine2 = algoBaseLine.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predBaseLine3 = algoBaseLine.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predBaseLine4 = algoBaseLine.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)

print("\nKNNBasic:")
predBasic1 = algoBasic.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predBasic2 = algoBasic.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predBasic3 = algoBasic.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predBasic4 = algoBasic.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)

print("\nKNNWithMeans:")
predWithMeans1 = algoWithMeans.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predWithMeans2 = algoWithMeans.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predWithMeans3 = algoWithMeans.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predWithMeans4 = algoWithMeans.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)

print("\nKNNWithZScore:")
predWithZScore1 = algoWithZScore.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predWithZScore2 = algoWithZScore.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predWithZScore3 = algoWithZScore.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predWithZScore4 = algoWithZScore.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)
示例#15
0
prediction_mf

# Tes rekomendasinya
recom_svd = algo_svd.predict(uid='Jays',iid='AWMjT0WguC1rwyj_rFh3')
recom_svd

sim_options = {'name': 'pearson_baseline','shrinkage': 0}
algo = KNNBasic(sim_options=sim_options)
algo_knn = KNNBasic(k=50, sim_options=sim_options)
prediction_knn = algo_knn.fit(trainset).test(testset)

# Prediksi
prediction_knn

# Tes rekomendasinya
recom_knn = algo_knn.predict(uid='Jays',iid='AWMjT0WguC1rwyj_rFh3')
recom_knn


accuracy.mae(prediction_mf)
accuracy.fcp(prediction_mf)
accuracy.rmse(prediction_mf)


accuracy.mae(prediction_knn)
accuracy.fcp(prediction_knn)
accuracy.rmse(prediction_knn)


# Dataset yang akan dipakai untuk train test split dengan framework surprise
rating[['reviews.username','id','reviewsRating']]
示例#16
0
from surprise import Reader, Dataset, KNNBasic

# break data file down into an array full of strings
with open('./data.txt') as f:
    all_lines = f.readlines()
# load information from file into dataset using reader
reader = Reader(line_format='item user rating', sep=',', rating_scale=(1, 5))
data = Dataset.load_from_file('./data.txt', reader=reader)
# split dataset into n folds, can be changed
data.split(n_folds=5)
# using mean squared difference similarity measure here, with min_support set to 1 to consider only users who have at least 1 movie in common
sim_options = {'name': 'msd', 'user_based': False, 'min_support': 1}
trainingset = data.build_full_trainset()
# uses basic KNN algorithm to create a training set
algorithm = KNNBasic(sim_options=sim_options)
algorithm.train(trainingset)

# predict rating using item and user ID as input
userid = str(input("Please enter user ID: "))
itemid = str(input("Please enter movie ID: "))
print(algorithm.predict(userid, itemid))
from surprise import KNNBasic
from surprise import Dataset
from surprise import evaluate

# Load the movielens-100k dataset and split it into 3 folds for
# cross-validation.
data = Dataset.load_builtin('ml-100k')

# Retrieve the trainset.
trainset = data.build_full_trainset()

# Build an algorithm, and train it.
algo = KNNBasic()
algo.train(trainset)


##########################################
# we can now query for specific predicions

uid = str(196)  # raw user id (as in the ratings file). They are **strings**!
iid = str(302)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r=4, verbose=True)


##########################################
# Tired? You can still call the 'split' method!
data.split(n_folds=3)
evaluate(algo, data)
示例#18
0
class BaselineMF:
    def __init__(self, cf_algo=None, logit=False):
        """
        fit method takes a ContentDataset and fits it for num_epochs (passed at initialisation)

        Parameters
        ----------

        batch_size (int): the size of each training batch

        network (ContentMF): a network that fits using user_ids and item_texts

        num_epochs (int): the number of training epochs

        optim_params (dict): parameters passed to the Stochastic Gradient Descent (SGD) class

        use_cuda (bool): set to True to use the GPU

        """
        self.logit = logit
        self.question_truth_dict = {}
        self.average_true_rating = 0.5
        self.average_false_rating = 0.5
        self.loss_fn = nn.MSELoss(size_average=True)

        if cf_algo is None:
            self.cf_algo = KNNBasic(k=2)
        else:
            self.cf_algo = cf_algo

        #self.svd = SVD(n_epochs=500, verbose=True, lr_all=0.001, n_factors=50)

    def dataloader_extract(self, sample):
        ratings = pd.Series(np.array(list(sample['rating'])))
        user_ids = pd.Series(sample['user_id']).astype(str)
        item_ids = pd.Series(sample['item_id']).astype(str)

        return ratings, user_ids, item_ids

    def logit_fn(self, p, epsilon=1e-3):
        for item in p:
            if item == 0:
                item = epsilon
            if item == 1:
                item = 1 - epsilon
        return np.log(p / (1 - p))

    def sigmoid_fn(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, dataset, train_sampler):
        """Runs the fit method which simply works out the average response
        for 'true' and 'false' questions, where 'true' questions are those
        where the average rating is greater than 0.5"""
        t0 = time.time()
        data_loader = DataLoader(dataset,
                                 batch_size=len(train_sampler),
                                 sampler=train_sampler)
        sample = iter(data_loader).next()
        ratings, user_ids, item_ids = self.dataloader_extract(sample)
        if self.logit:
            ratings = self.logit_fn(ratings)
        possible_ratings = ratings.unique()

        ratings_dict = {
            'itemID': item_ids,
            'userID': user_ids,
            'rating': ratings
        }
        df = pd.DataFrame(ratings_dict)
        reader = Reader(rating_scale=(0, 1))
        data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
        trainset = data.build_full_trainset()
        self.cf_algo.train(trainset)

    def predict(self, dataset, sampler, batch_size=64):
        # I'm not entirely sure that the build_full_testset
        # function works as I'd expect, so instead we loop
        # through all the test ids and predict one-at-a-time
        preds = []
        data_loader = DataLoader(dataset,
                                 batch_size=len(dataset),
                                 sampler=sampler)
        sample = iter(data_loader).next()
        ratings, user_ids, item_ids = self.dataloader_extract(sample)
        for user_id, item_id in zip(user_ids, item_ids):
            pred = self.cf_algo.predict(str(user_id), str(item_id))[3]
            if self.logit:
                pred = self.sigmoid_fn(pred)
            preds.append(pred)

        return (preds)

    def score(self, dataset, sampler, batch_size=64, only_slow=True):
        """Scores the baseline on predictions made on the dataset provided,
        sampled with the given sampler. If `only_slow` is true, then only
        the slow judgments in the sampled part of the dataset are scored"""
        predictions = self.predict(dataset, sampler, batch_size)
        data_loader = DataLoader(dataset,
                                 batch_size=len(dataset),
                                 sampler=sampler)
        testset = iter(data_loader).next()
        ratings, user_ids, item_ids, = self.dataloader_extract(testset)
        user_ids = user_ids.astype(int)
        ratings = torch.Tensor(ratings)
        predictions = torch.Tensor(predictions)

        #Note that all baselines are passed flattened datasets, so we
        # have to work out which of the users correspond to the latest
        # times
        if only_slow:
            long_time_uids = [i for i in np.unique(user_ids) if i % 3 == 2]
            new_ratings = []
            new_preds = []
            for index, rating in enumerate(ratings):
                if user_ids[index] in long_time_uids:
                    new_ratings.append(rating)
            for index, pred in enumerate(predictions):
                if user_ids[index] in long_time_uids: new_preds.append(pred)
            loss = self.loss_fn(torch.Tensor(new_preds),
                                torch.Tensor(new_ratings).cpu())
            return loss.cpu().data.item()

        else:
            loss = self.loss_fn(predictions, ratings.cpu())
            return loss.cpu().data.item()
print("Usando o algoritmo KNNBasic com 50 vizinhos")
print("Algoritmo de similiraridade: Pearson")
algoritmo = KNNBasic(k=50,
                     sim_options={
                         'name': 'pearson',
                         'user_based': True,
                         'verbose': True
                     })

algoritmo.fit(trainset)

# Selecionamos o usuário e o filme que será analisado
# User 49. Tem entre 18 e 24 anos. É programador e mora em Huston, Texas
uid = str(49)
# Filme visto e avaliado: Negotiator, The (1998)::Action|Thriller. Avaliação 4
iid = str(2058)  # raw item id

# get a prediction for specific users and items.
print("Predição de avaliação: ")
pred = algoritmo.predict(uid, iid, r_ui=4, verbose=True)

# run the trained model against the testset
test_pred = algoritmo.test(testset)

# Avalia RMSE
print("Avaliação RMSE: ")
accuracy.rmse(test_pred, verbose=True)

# Avalia MAE
print("Avaliação MAE: ")
accuracy.mae(test_pred, verbose=True)
示例#20
0
# -*- coding: utf-8 -*-
"""
Created on Mon Sep  3 22:37:15 2018

@author: soug9
"""

from surprise import KNNBasic
from surprise import Dataset

data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()

algo = KNNBasic()

algo.fit(trainset)

algo.predict('197', '223', 8)

algo.predict('afsdf', 'gggegw')

algo.estimate('197', '223')
示例#21
0
from surprise import KNNBasic
from surprise import Dataset

data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()

algo = KNNBasic()
algo.fit(trainset)

uid = str(196)
iid = str(302)
algo.predict(uid, iid, r_ui=4, verbose=True)
# %% Loading Test Data
file_path = "Data/sample_submission.csv"
data_test = utils.load_data_desired(file_path)

# %% Test Prediction
Pred_Test_SVD = []
Pred_Test_NMF = []
Pred_Test_SL1 = []
Pred_Test_KNN = []
Pred_Test_BSL = []

start = time.time()
for line in data_test:
    Pred_Test_KNN.append(
        alg_KNN.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_SVD.append(
        alg_SVD.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_NMF.append(
        alg_NMF.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_SL1.append(
        alg_SL1.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_BSL.append(
        alg_BSL.predict(str(line[1]), str(line[0]), clip=False).est)

end = time.time()
print("***********************************************")
示例#23
0
algo.fit(trainset)

# test algorithm on testset
predictions = algo.test(testset)

# Retrieve top N predictions for each item in predictions (in the test set)


# make prediction
recipe = random.choice(df_users['item'].unique())

# pick a recipe that has multiple users rating it
uid = 613
iid = 2
print(users[uid], ':', titles[iid])
prediction = algo.predict(users[uid], titles[iid], 1)
prediction.est


# In[ ]:





# In[ ]:




示例#24
0
del df

print(time.asctime(), 'loaded training data, now building trainset')
trainset = data.build_full_trainset()
del data

print(time.asctime(), 'training set built, now training')
k, min_k = 20, 5
title = 'KNN_k'+str(k)+'_mink'+str(min_k)
algo = KNNBasic(k=k, min_k=min_k)
algo.fit(trainset)

print(time.asctime(), 'training complete, now loading prediction data')
to_predict = pd.read_csv(file_path_test, delimiter=' ', header=None)
to_predict = to_predict.values.T[0:2].T
predicted = np.zeros(len(to_predict))

print(time.asctime(), 'prediction data loaded, now predicting')
for i in range(len(predicted)):
    user = to_predict[i][0]
    item = to_predict[i][1]
    predicted[i] = algo.predict(uid=user, iid=item, verbose=0).est
    if (i%500000 == 0):
        print(i, 'of', len(predicted), 'predicted')

print(time.asctime(), 'now saving predictions')

# CHECK THE PATHS FOR YOUR OWN COMPUTER
np.savetxt('../custom_data/'+title+'.dta', predicted, fmt='%.3f')

print(time.asctime(), 'done')
示例#25
0
trainset, testset = train_test_split(data, test_size=.25)
length = len(testset)

algo = KNNBasic()

algo.fit(trainset)
predictions = algo.test(testset)


accuracy.rmse(predictions)
accuracy.mae(predictions)
acc = 0
ActualTrue = 0
ActPredTrue = 0
PredTrue = 0
for i in range(length):
    predic = algo.predict(testset[i][0],testset[i][1],testset[i][2])
    if(predic[3]-predic[2]<0.75 and predic[3]-predic[2]>-0.75 ):
        acc+=1
    if(predic[2]>=4):
        ActualTrue +=1
        if(predic[3]>=3.75):
            ActPredTrue +=1
    if(predic[3]>=3.5):
        PredTrue +=1
precition = ((1.0*ActPredTrue)/PredTrue)
recall = ((1.0*ActPredTrue)/ActualTrue)
accuracy = acc/length
print("\nrecall :", recall)
print("\nFinal Accuracy Values:", accuracy)
print("\nPrecision :", precition)
示例#26
0
"""
Created on Mon Feb  4 00:08:44 2019

@author: abhijithneilabraham
"""
from surprise import KNNBasic
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import Reader
from surprise.model_selection import train_test_split
import pandas as pd
customer=pd.read_csv('names.csv')

reader = Reader(line_format='user item rating',rating_scale=(1, 5),sep=',')
fieldnames = ['id', 'male_or_female']

for i in range(25):
    fieldnames.insert(2,'question'+str(i+1))

    data = Dataset.load_from_df(customer[fieldnames], reader)
    del fieldnames[2]
    trainset = data.build_full_trainset()
    
   
    algo = KNNBasic()
    algo.fit(trainset)
    uid=str(12)
    iid=str(0)
    pred=algo.predict(uid,iid,r_ui=None,verbose=True)
rawiid = 'NightListener'  # was not rated by Toby
rawiid = 'LadyinWater'  # was not rated by Toby
rawiid = 'JustMyLuck'  # was not rated by Toby

# convert user and items names (raw ids) into indexes (inner ids)
# (raw ids are the user & item names as given in the datafile, they can be ints or strings
# inner ids are indexes into the sorted rawids)
uid = trainset.to_inner_uid(rawuid)
uid
iid = trainset.to_inner_iid(rawiid)
iid

# if the actual rating is known it can be passed as an argument
realrating = dict(trainset.ur[uid])[iid]
realrating
pred = algo.predict(rawuid, rawiid, r_ui=realrating, verbose=True)

# if the actual rating is unknown use the below
pred = algo.predict(rawuid, rawiid)
pred

# FYI: can compare with prediction made using demolib (the library used in workshop1)
usersA, umap, imap = makeratingsmatrix(trans)
targetuser = usersA[umap[rawuid], ]
targetuser
predictrating_UU(targetuser, usersA, imap[rawiid], simfun=pearsonsim)

# FYI: to help understand how predictions are made when using matrix factorisation we can
# compute the prediction ourselves from the factorised matrices and the biases: pu,qi,bu,bi

# examine top-left part of the User and Item preference matrix
示例#28
0
    #algo = KNNWithMeans(k=40,min_k=5,sim_options=sim_options)
    #algo = KNNWithZScore(k=40,min_k=5,sim_options=sim_options)
    algo.fit(trainset)

    # get a prediction for specific users and items.
    uid = my_username
    iid = str(19)  # Monster
    #
    #pred = algo.predict(uid, iid, verbose=True)
    #
    #for x in range (1, 1001):
    #	pred = algo.predict(uid, str(x), verbose=True)

    my_dict = {}
    for id in anime_list:
        pred = algo.predict(uid, id, verbose=False)
        if (id not in user_ratings):
            if (pred[4]['was_impossible'] == False):
                #print('id:', pred[1], ':', pred[3], ', k:', pred[4]['actual_k'], pred[4])
                my_dict[id] = pred[3]

    top_list = sorted(my_dict, key=my_dict.get, reverse=True)[:50]
    for item in top_list:
        print('Rank:\t' + anime_list[item][1], '\t',
              anime_list[item][0] + ' (' + item + ')', '\t', my_dict[item])
    '''	this should print a list of your worst recommendations?
	print('\n WORST')
	worst_list = sorted(my_dict, key=my_dict.get, reverse=False)[:50]
	for item in worst_list:
		if(int(item) < 1000):
			print('Rank:\t' + anime_list[item][1], '\t', anime_list[item][0] + ' (' + item + ')', '\t', my_dict[item])
normal = NormalPredictor()
normal.fit(trainset)
normal_prediction_seq = []
for r_u, r_i, r in tqdm(testset,
                        desc="get prediction sequence of random algorithm"):
    res = estimate_rs = normal.predict(r_u, r_i)
    normal_prediction_seq.append(res.est)

if not os.path.exists("prediction_seqs/knn.json"):

    knn = KNNBasic(k=90)
    knn.fit(trainset)
    KNN_prediction_seq = []
    for r_u, r_i, r in tqdm(testset, desc="get prediction sequence of knn"):
        res = estimate_rs = knn.predict(r_u, r_i)
        KNN_prediction_seq.append(res.est)
    with open("prediction_seqs/knn.json", "w") as f:
        f.write(json.dumps(KNN_prediction_seq))
else:
    KNN_prediction_seq = json.loads(open("prediction_seqs/knn.json").read())

if not os.path.exists("prediction_seqs/svd.json"):

    svd = SVD(n_factors=40)
    svd.fit(trainset)
    svd_prediction_seq = []
    for r_u, r_i, r in tqdm(testset, desc="get prediction sequence of svd"):
        res = estimate_rs = svd.predict(r_u, r_i)
        svd_prediction_seq.append(res.est)
    with open("prediction_seqs/svd.json", "w") as f:
示例#30
0
# APPLYING ALGORITHM KNN Basic
algo.train(trainset)
print "ALGORITHM USED: \n", algo

testset = trainset.build_anti_testset()
predictions = algo.test(testset=testset)

top_n = get_top_n(predictions, 5)

# ---------------------------------------------------- PREDICTION VERIFICATION - CL0 (945)
print "\t\tINITIATING IN CLUSTER 0 (945)\n"
search_key = raw_input("Enter User ID:")
item_id = raw_input("Enter Item ID:")
actual_rating = input("Enter actual Rating:")

print algo.predict(str(search_key), item_id, actual_rating)

# ---------------------------------------------------- PREDICTION VERIFICATION - CL1 (944)
print "\t\tINITIATING IN CLUSTER 1 (944)\n"
search_key = raw_input("Enter User ID:")
item_id = raw_input("Enter Item ID:")
actual_rating = input("Enter actual Rating:")

print algo.predict(str(search_key), item_id, actual_rating)

# --------------------- GENERATE FULL PREDICTION
csvfile = 'pred_matrix-Cluster0.csv'
with open(csvfile, "w") as output:
    writer = csv.writer(output, delimiter=',', lineterminator='\n')
    writer.writerow(['uid', 'iid', 'rat'])
    for uid, user_ratings in top_n.items():