def user_based_cf(co_pe): # INITIALIZE REQUIRED PARAMETERS # path = 'ml-100k/u.user' prnt = "USER" sim_op = {'name': co_pe, 'user_based': True} algo = KNNBasic(sim_options=sim_op) reader = Reader(line_format="user item rating", sep='\t', rating_scale=(1, 5)) df = Dataset.load_from_file('ml-100k/u.data', reader=reader) # START TRAINING trainset = df.build_full_trainset() # APPLYING ALGORITHM KNN Basic algo.train(trainset) print "ALGORITHM USED", co_pe # -------------------------------`-------------- MARKERS f = io.open("_AlgoHist_ub.txt", "wb") f.write(repr(co_pe)) f.close() # --------------------------------------------- MARKERS END print "CF Type:", prnt, "BASED" # PEEKING PREDICTED VALUES search_key = raw_input("Enter User ID:") item_id = raw_input("Enter Item ID:") actual_rating = input("Enter actual Rating:") print algo.predict(str(search_key), item_id, actual_rating) testset = trainset.build_anti_testset() predictions = algo.test(testset=testset) top_n = get_top_n(predictions, 5) result_u = True k = input("Enter size of Neighborhood (Min:1, Max:40)") inner_id = algo.trainset.to_inner_iid(search_key) neighbors = algo.get_neighbors(inner_id, k=k) print "Nearest Matching users are:" for i in neighbors: print "\t " * 6, i return top_n, result_u
def Basic_CF(self): u_id = [] I_id = [] r_ui_ = np.array([]) _est = np.array([]) sim_options = {'name': 'cosine', 'user_based': True} algo = KNNBasic(k=40, min_k=1, sim_options=sim_options) algo.fit(self.trainset) for uid in (self.list): lids = self.data[self.data.uid == uid] a = self.data[self.data.uid == uid] for i in range(1, len(a)): lid = lids[i - 1:i].lid.values[0] r_ui = lids[i - 1:i].rate.values[0] pred = algo.predict(uid, lid, r_ui, verbose=True) u_id.append(int(pred.uid)) I_id.append(int(pred.iid)) r_ui_ = np.append(r_ui_, pred.r_ui) _est = np.append(_est, pred.est) self.df_est = pd.DataFrame({ 'uid': u_id, 'Iid': I_id, 'r_ui': r_ui_, 'est': _est }) self.arr = self.df_est['uid'].unique() self.CF_ndcg_ = self.Calculate_NDCG()
def detail(request, post_id): # 예상평점 알고리즘 넣기 file_path = os.path.expanduser('stars.csv') reader = Reader(line_format='user item rating', sep=',') data = Dataset.load_from_file(file_path, reader=reader) trainset = data.build_full_trainset() algo = KNNBasic() algo.fit(trainset) uid = str(request.user.is_authenticated) # 유저아이디 적어야함 iid = str(post_id) # raw item id (as in the ratings file). They are **strings**! pred = algo.predict(uid, iid, r_ui=4, verbose=True) # 예상평점 group = Matzip_list.objects.get(id=post_id) if not request.user.is_anonymous: if request.user.star_set.all().filter(matzip_id=post_id).first(): my_rate = request.user.star_set.all().filter(matzip_id=post_id).first().rate is_rated = 1 else: my_rate = pred is_rated = 0 else: my_rate = "로그인을 해주세요" is_rated = 2 images = re.sub("]|\[|'", "", group.images_url_preprocess).strip().split(',') context = { 'group': group, 'images': images, 'my_rate': my_rate, 'is_rated': is_rated, 'pred': pred, } return render(request, 'posts/detail.html', context)
def collaborative_filtering(): history_list = History.objects.all() with open('recommend/dataset_cf.csv', 'w', encoding='utf-8', newline='') as csv_file: header = ['history_id', 'user_id', 'alco_name', 'data_joined', 'review'] writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL) writer.writerow(header) for history in history_list: row = [] row += [history.history_id, history.user_id, history.alco_name, history.data_joined, history.review] writer.writerow(row) alco = pandas.read_csv("recommend/alcohol_cf.csv", encoding='utf-8') alco = alco.set_index('alco_name') data = pandas.read_csv("recommend/dataset_cf.csv", encoding='utf-8').fillna(0) data = data.drop('history_id', axis=1) data = data.drop('data_joined', axis=1) alcohol_id_list = [] for i in range(len(data.index)): alcohol_id_list.append(alco.at[data['alco_name'][i], 'alcohol_id']) data = data.drop('alco_name', axis=1) data['alcohol_id'] = alcohol_id_list data = data.loc[:, ["user_id", "alcohol_id", "review"]] data.to_csv("recommend/dataset_cf.score", sep=' ', header=None, index=False, encoding='utf-8') reader = Reader(line_format='user item rating', sep=' ') dataset = Dataset.load_from_file("recommend/dataset_cf.score", reader=reader) trainset = dataset.build_full_trainset() sim_options = { 'name': 'pearson', # 類似度を計算する方法を指定( cosine,msd,pearson,pearson_baseline ) 'user_based': True # False にするとアイテムベースに } algo = KNNBasic(k=5, min_k=1, sim_options=sim_options) algo.fit(trainset) # algo = SVD() # algo.train(trainset) # print(algo.sim) alcohol_num = Alcohol.objects.latest('alcohol_id').alcohol_id user_num = History.objects.latest('user_id').user_id with open('recommend/answer_cf.csv', 'w', encoding='utf-8', newline='') as csv_file: header = ['user_id', 'alcohol_id', 'predicted_value'] writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL) writer.writerow(header) for j in range(1, user_num + 1): user_id = j for i in range(1, alcohol_num + 1): item_id = i pred = algo.predict(uid=str(user_id), iid=str(item_id)) row = [] row += [pred.uid, pred.iid, pred.est] writer.writerow(row)
def func3(): from surprise import KNNBasic from surprise import Dataset data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() algo = KNNBasic() algo.fit(trainset) uid = str( 196) # raw user id (as in the ratings file). They are **strings**! iid = str( 302) # raw item id (as in the ratings file). They are **strings**! pred = algo.predict(uid, iid, r_ui=4, verbose=True)
def Basic_CF(self): sim_options = {'name': 'cosine', 'user_based': True} algo = KNNBasic(k=40, min_k=1, sim_options=sim_options) algo.fit(self.trainset) for uid in (self.list): lids = self.data[self.data.uid == uid] a = self.data[self.data.uid == uid] for i in range(1, len(a)): lid = lids[i - 1:i].lid.values[0] r_ui = lids[i - 1:i].rate.values[0] pred = algo.predict(uid, lid, r_ui, verbose=True) return pred
def recommendation_base_on_itemCF(train_data, user_item_matrix, user_ID, N): # 阅读器 reader = Reader(line_format='user item rating', sep=',') # 载入数据 raw_data = Dataset.load_from_df(user_item_matrix, reader=reader) # 构建模型 raw_data.split(n_folds=5) # kf = KFold(n_splits=5) knn_item = KNNBasic(k=40, sim_options={'user_based': False}) # 训练数据,并返回rmse误差 for train_set, test_set in raw_data.folds(): knn_item.fit(train_set) predictions = knn_item.test(test_set) accuracy.rmse(predictions, verbose=True) # 用户听过的歌曲合集 user_songs = {} for user, group in user_item_matrix.groupby('user'): user_songs[user] = group['item'].values.tolist() # 歌曲合集 songs = user_item_matrix['item'].unique().tolist() # 歌曲ID和歌曲名称对应关系 songID_titles = {} for index in train_data.index: songID_titles[train_data.loc[index, 'song']] = train_data.loc[index, 'title'] # itemCF # 用户听过的音乐集 user_items = user_songs[user_ID] # 用户对未听过音乐的评分 item_rating = {} for item in songs: if item not in user_items: item_rating[item] = knn_item.predict(user_ID, item).est # 找出评分靠前的N首歌曲 song_id = dict( sorted(item_rating.items(), key=lambda x: x[1], reverse=True)[:N]) song_topN = [songID_titles[s] for s in song_id.keys()] return song_topN
class FactPrediction: """FactPrediction definition.""" def train(self): """Trains the model.""" from surprise import Reader, Dataset, KNNBasic directory = path.dirname(path.realpath(__file__)) ratings = read_csv(path.join(directory, 'fact_ratings.csv')) ratings = Dataset.load_from_df(ratings[['userId', 'factId', 'rating']], Reader()) trainset = ratings.build_full_trainset() self.model = KNNBasic() self.model.train(trainset) def predict(self, u_id, f_id): """Performs a prediction.""" return self.model.predict(u_id, f_id)
def knn_basic_movie(train, test, ids, Xtest, Xids): """ kNN basic approach on movies Argument : train, the trainset test, the testset ids, unknown ratings Xtest, predicted ratings for testset, to be used for final blending Xids, predicted ratings for unknown ratings, to be used for final blending """ print('kNN Basic Movie') algo = KNNBasic(k=21, name='msd', min_support=2, user_based=False, verbose=False) #Train algorithm on training set algo.fit(train) #Predict on train and compute RMSE predictions = algo.test(train.build_testset()) print(' Training RMSE: ', accuracy.rmse(predictions, verbose=False)) #Predict on test and compute RMSE predictions = algo.test(test) rmse = accuracy.rmse(predictions, verbose=False) print(' Test RMSE: ', rmse) preds_test = np.zeros(len(predictions)) for j, pred in enumerate(predictions): preds_test[j] = pred.est #Predict unknown ratings preds_ids = [] for i in range(len(ids[0])): pred = algo.predict(str(ids[0][i]), str(ids[1][i])) preds_ids.append(pred.est) Xtest.append(preds_test) Xids.append(preds_ids) return rmse, Xtest, Xids, preds_test, preds_ids
def alknnbasic(self, namefile, uid, iid, rati, value_uid, value_iid): test_data = pd.read_csv('./container/' + namefile) dt = pd.DataFrame(test_data) # Retrieve the trainset. reader = Reader(rating_scale=(0, 100)) data = Dataset.load_from_df(dt[[uid, iid, rati]], reader) trainset = data.build_full_trainset() algo = KNNBasic() algo.fit(trainset) pred = algo.predict(float(value_uid), float(value_iid), r_ui=1, verbose=True) #return result to json jsondata = {} jsondata = {} jsondata["uid"] = pred.uid jsondata["idd"] = pred.iid jsondata["rati"] = round(pred.est, 2) return jsondata
def computeKNNBasicMovie(data, test_np): """Compute the k-NN basic item based method and return the predictions on the test into a file The method is on all the data and got the following settings: - Similarity function : MSD, item based - Number of closest neighbors : 23 data : data frame which represent the train set test_np : data frame on which the prediction will be returned return : test_np with a column of prediction named 'knnbasic_item_rating'""" trainset, test = dataTrainSurprise(data, test_np) sim_options = {'name':'msd','user_based': False} knnbasic_algo = KNNBasic(k = 23, sim_options =sim_options).fit(trainset) test['knnbasic_item_rating'] = test[['user_id', 'movie_id']] \ .apply(lambda row: knnbasic_algo.predict(row['user_id'], row['movie_id'])[3], axis=1) return test
def from_to(self, namefile, uid, iid, rati, from_uid, to_uid, from_iid, to_iid): test_data = pd.read_csv('./container/' + namefile) dt = pd.DataFrame(test_data) # Retrieve the trainset. reader = Reader(rating_scale=(0, 100)) data = Dataset.load_from_df(dt[[uid, iid, rati]], reader) trainset = data.build_full_trainset() algo = KNNBasic() algo.fit(trainset) arr = [] for value_uid in range(from_uid, to_uid): for value_iid in range(from_iid, to_iid): pred = algo.predict(value_uid, value_iid, r_ui=1, verbose=True) tempdata = [] tempdata.append(pred.uid) tempdata.append(pred.iid) tempdata.append(round(pred.est, 2)) arr.append(tempdata) #return result to json return arr
def __recommend_movies(self, username): reader = Reader(rating_scale=(1, 10)) df = pd.DataFrame(self.ratings_dict) data = Dataset.load_from_df(df[["user", "item", "rating"]], reader) sim_options = { "name": "cosine", 'user_based': True, # 'min_support': 2 } algo = KNNBasic(sim_options=sim_options) # algo = SVD() algo.fit(data.build_full_trainset()) self.__get_all_movies() for movies in self.movies: prediction = algo.predict(username, movies) self.predictions[movies] = prediction.est for user_rated_movies in self.__get_user_rated_movies( self.__get_username_id(username)): del self.predictions[user_rated_movies]
r_ui1 = 4 r_ui2 = 4 r_ui3 = 1 r_ui4 = 3 verboseFlag = True # get a prediction for specific users and items. print("KNNBaseLine:") predBaseLine1 = algoBaseLine.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predBaseLine2 = algoBaseLine.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predBaseLine3 = algoBaseLine.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predBaseLine4 = algoBaseLine.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag) print("\nKNNBasic:") predBasic1 = algoBasic.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predBasic2 = algoBasic.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predBasic3 = algoBasic.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predBasic4 = algoBasic.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag) print("\nKNNWithMeans:") predWithMeans1 = algoWithMeans.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predWithMeans2 = algoWithMeans.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predWithMeans3 = algoWithMeans.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predWithMeans4 = algoWithMeans.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag) print("\nKNNWithZScore:") predWithZScore1 = algoWithZScore.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predWithZScore2 = algoWithZScore.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predWithZScore3 = algoWithZScore.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predWithZScore4 = algoWithZScore.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)
prediction_mf # Tes rekomendasinya recom_svd = algo_svd.predict(uid='Jays',iid='AWMjT0WguC1rwyj_rFh3') recom_svd sim_options = {'name': 'pearson_baseline','shrinkage': 0} algo = KNNBasic(sim_options=sim_options) algo_knn = KNNBasic(k=50, sim_options=sim_options) prediction_knn = algo_knn.fit(trainset).test(testset) # Prediksi prediction_knn # Tes rekomendasinya recom_knn = algo_knn.predict(uid='Jays',iid='AWMjT0WguC1rwyj_rFh3') recom_knn accuracy.mae(prediction_mf) accuracy.fcp(prediction_mf) accuracy.rmse(prediction_mf) accuracy.mae(prediction_knn) accuracy.fcp(prediction_knn) accuracy.rmse(prediction_knn) # Dataset yang akan dipakai untuk train test split dengan framework surprise rating[['reviews.username','id','reviewsRating']]
from surprise import Reader, Dataset, KNNBasic # break data file down into an array full of strings with open('./data.txt') as f: all_lines = f.readlines() # load information from file into dataset using reader reader = Reader(line_format='item user rating', sep=',', rating_scale=(1, 5)) data = Dataset.load_from_file('./data.txt', reader=reader) # split dataset into n folds, can be changed data.split(n_folds=5) # using mean squared difference similarity measure here, with min_support set to 1 to consider only users who have at least 1 movie in common sim_options = {'name': 'msd', 'user_based': False, 'min_support': 1} trainingset = data.build_full_trainset() # uses basic KNN algorithm to create a training set algorithm = KNNBasic(sim_options=sim_options) algorithm.train(trainingset) # predict rating using item and user ID as input userid = str(input("Please enter user ID: ")) itemid = str(input("Please enter movie ID: ")) print(algorithm.predict(userid, itemid))
from surprise import KNNBasic from surprise import Dataset from surprise import evaluate # Load the movielens-100k dataset and split it into 3 folds for # cross-validation. data = Dataset.load_builtin('ml-100k') # Retrieve the trainset. trainset = data.build_full_trainset() # Build an algorithm, and train it. algo = KNNBasic() algo.train(trainset) ########################################## # we can now query for specific predicions uid = str(196) # raw user id (as in the ratings file). They are **strings**! iid = str(302) # raw item id (as in the ratings file). They are **strings**! # get a prediction for specific users and items. pred = algo.predict(uid, iid, r=4, verbose=True) ########################################## # Tired? You can still call the 'split' method! data.split(n_folds=3) evaluate(algo, data)
class BaselineMF: def __init__(self, cf_algo=None, logit=False): """ fit method takes a ContentDataset and fits it for num_epochs (passed at initialisation) Parameters ---------- batch_size (int): the size of each training batch network (ContentMF): a network that fits using user_ids and item_texts num_epochs (int): the number of training epochs optim_params (dict): parameters passed to the Stochastic Gradient Descent (SGD) class use_cuda (bool): set to True to use the GPU """ self.logit = logit self.question_truth_dict = {} self.average_true_rating = 0.5 self.average_false_rating = 0.5 self.loss_fn = nn.MSELoss(size_average=True) if cf_algo is None: self.cf_algo = KNNBasic(k=2) else: self.cf_algo = cf_algo #self.svd = SVD(n_epochs=500, verbose=True, lr_all=0.001, n_factors=50) def dataloader_extract(self, sample): ratings = pd.Series(np.array(list(sample['rating']))) user_ids = pd.Series(sample['user_id']).astype(str) item_ids = pd.Series(sample['item_id']).astype(str) return ratings, user_ids, item_ids def logit_fn(self, p, epsilon=1e-3): for item in p: if item == 0: item = epsilon if item == 1: item = 1 - epsilon return np.log(p / (1 - p)) def sigmoid_fn(self, x): return 1 / (1 + np.exp(-x)) def fit(self, dataset, train_sampler): """Runs the fit method which simply works out the average response for 'true' and 'false' questions, where 'true' questions are those where the average rating is greater than 0.5""" t0 = time.time() data_loader = DataLoader(dataset, batch_size=len(train_sampler), sampler=train_sampler) sample = iter(data_loader).next() ratings, user_ids, item_ids = self.dataloader_extract(sample) if self.logit: ratings = self.logit_fn(ratings) possible_ratings = ratings.unique() ratings_dict = { 'itemID': item_ids, 'userID': user_ids, 'rating': ratings } df = pd.DataFrame(ratings_dict) reader = Reader(rating_scale=(0, 1)) data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader) trainset = data.build_full_trainset() self.cf_algo.train(trainset) def predict(self, dataset, sampler, batch_size=64): # I'm not entirely sure that the build_full_testset # function works as I'd expect, so instead we loop # through all the test ids and predict one-at-a-time preds = [] data_loader = DataLoader(dataset, batch_size=len(dataset), sampler=sampler) sample = iter(data_loader).next() ratings, user_ids, item_ids = self.dataloader_extract(sample) for user_id, item_id in zip(user_ids, item_ids): pred = self.cf_algo.predict(str(user_id), str(item_id))[3] if self.logit: pred = self.sigmoid_fn(pred) preds.append(pred) return (preds) def score(self, dataset, sampler, batch_size=64, only_slow=True): """Scores the baseline on predictions made on the dataset provided, sampled with the given sampler. If `only_slow` is true, then only the slow judgments in the sampled part of the dataset are scored""" predictions = self.predict(dataset, sampler, batch_size) data_loader = DataLoader(dataset, batch_size=len(dataset), sampler=sampler) testset = iter(data_loader).next() ratings, user_ids, item_ids, = self.dataloader_extract(testset) user_ids = user_ids.astype(int) ratings = torch.Tensor(ratings) predictions = torch.Tensor(predictions) #Note that all baselines are passed flattened datasets, so we # have to work out which of the users correspond to the latest # times if only_slow: long_time_uids = [i for i in np.unique(user_ids) if i % 3 == 2] new_ratings = [] new_preds = [] for index, rating in enumerate(ratings): if user_ids[index] in long_time_uids: new_ratings.append(rating) for index, pred in enumerate(predictions): if user_ids[index] in long_time_uids: new_preds.append(pred) loss = self.loss_fn(torch.Tensor(new_preds), torch.Tensor(new_ratings).cpu()) return loss.cpu().data.item() else: loss = self.loss_fn(predictions, ratings.cpu()) return loss.cpu().data.item()
print("Usando o algoritmo KNNBasic com 50 vizinhos") print("Algoritmo de similiraridade: Pearson") algoritmo = KNNBasic(k=50, sim_options={ 'name': 'pearson', 'user_based': True, 'verbose': True }) algoritmo.fit(trainset) # Selecionamos o usuário e o filme que será analisado # User 49. Tem entre 18 e 24 anos. É programador e mora em Huston, Texas uid = str(49) # Filme visto e avaliado: Negotiator, The (1998)::Action|Thriller. Avaliação 4 iid = str(2058) # raw item id # get a prediction for specific users and items. print("Predição de avaliação: ") pred = algoritmo.predict(uid, iid, r_ui=4, verbose=True) # run the trained model against the testset test_pred = algoritmo.test(testset) # Avalia RMSE print("Avaliação RMSE: ") accuracy.rmse(test_pred, verbose=True) # Avalia MAE print("Avaliação MAE: ") accuracy.mae(test_pred, verbose=True)
# -*- coding: utf-8 -*- """ Created on Mon Sep 3 22:37:15 2018 @author: soug9 """ from surprise import KNNBasic from surprise import Dataset data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() algo = KNNBasic() algo.fit(trainset) algo.predict('197', '223', 8) algo.predict('afsdf', 'gggegw') algo.estimate('197', '223')
from surprise import KNNBasic from surprise import Dataset data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() algo = KNNBasic() algo.fit(trainset) uid = str(196) iid = str(302) algo.predict(uid, iid, r_ui=4, verbose=True)
# %% Loading Test Data file_path = "Data/sample_submission.csv" data_test = utils.load_data_desired(file_path) # %% Test Prediction Pred_Test_SVD = [] Pred_Test_NMF = [] Pred_Test_SL1 = [] Pred_Test_KNN = [] Pred_Test_BSL = [] start = time.time() for line in data_test: Pred_Test_KNN.append( alg_KNN.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_SVD.append( alg_SVD.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_NMF.append( alg_NMF.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_SL1.append( alg_SL1.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_BSL.append( alg_BSL.predict(str(line[1]), str(line[0]), clip=False).est) end = time.time() print("***********************************************")
algo.fit(trainset) # test algorithm on testset predictions = algo.test(testset) # Retrieve top N predictions for each item in predictions (in the test set) # make prediction recipe = random.choice(df_users['item'].unique()) # pick a recipe that has multiple users rating it uid = 613 iid = 2 print(users[uid], ':', titles[iid]) prediction = algo.predict(users[uid], titles[iid], 1) prediction.est # In[ ]: # In[ ]:
del df print(time.asctime(), 'loaded training data, now building trainset') trainset = data.build_full_trainset() del data print(time.asctime(), 'training set built, now training') k, min_k = 20, 5 title = 'KNN_k'+str(k)+'_mink'+str(min_k) algo = KNNBasic(k=k, min_k=min_k) algo.fit(trainset) print(time.asctime(), 'training complete, now loading prediction data') to_predict = pd.read_csv(file_path_test, delimiter=' ', header=None) to_predict = to_predict.values.T[0:2].T predicted = np.zeros(len(to_predict)) print(time.asctime(), 'prediction data loaded, now predicting') for i in range(len(predicted)): user = to_predict[i][0] item = to_predict[i][1] predicted[i] = algo.predict(uid=user, iid=item, verbose=0).est if (i%500000 == 0): print(i, 'of', len(predicted), 'predicted') print(time.asctime(), 'now saving predictions') # CHECK THE PATHS FOR YOUR OWN COMPUTER np.savetxt('../custom_data/'+title+'.dta', predicted, fmt='%.3f') print(time.asctime(), 'done')
trainset, testset = train_test_split(data, test_size=.25) length = len(testset) algo = KNNBasic() algo.fit(trainset) predictions = algo.test(testset) accuracy.rmse(predictions) accuracy.mae(predictions) acc = 0 ActualTrue = 0 ActPredTrue = 0 PredTrue = 0 for i in range(length): predic = algo.predict(testset[i][0],testset[i][1],testset[i][2]) if(predic[3]-predic[2]<0.75 and predic[3]-predic[2]>-0.75 ): acc+=1 if(predic[2]>=4): ActualTrue +=1 if(predic[3]>=3.75): ActPredTrue +=1 if(predic[3]>=3.5): PredTrue +=1 precition = ((1.0*ActPredTrue)/PredTrue) recall = ((1.0*ActPredTrue)/ActualTrue) accuracy = acc/length print("\nrecall :", recall) print("\nFinal Accuracy Values:", accuracy) print("\nPrecision :", precition)
""" Created on Mon Feb 4 00:08:44 2019 @author: abhijithneilabraham """ from surprise import KNNBasic from surprise import SVD from surprise import Dataset from surprise.model_selection import cross_validate from surprise import Reader from surprise.model_selection import train_test_split import pandas as pd customer=pd.read_csv('names.csv') reader = Reader(line_format='user item rating',rating_scale=(1, 5),sep=',') fieldnames = ['id', 'male_or_female'] for i in range(25): fieldnames.insert(2,'question'+str(i+1)) data = Dataset.load_from_df(customer[fieldnames], reader) del fieldnames[2] trainset = data.build_full_trainset() algo = KNNBasic() algo.fit(trainset) uid=str(12) iid=str(0) pred=algo.predict(uid,iid,r_ui=None,verbose=True)
rawiid = 'NightListener' # was not rated by Toby rawiid = 'LadyinWater' # was not rated by Toby rawiid = 'JustMyLuck' # was not rated by Toby # convert user and items names (raw ids) into indexes (inner ids) # (raw ids are the user & item names as given in the datafile, they can be ints or strings # inner ids are indexes into the sorted rawids) uid = trainset.to_inner_uid(rawuid) uid iid = trainset.to_inner_iid(rawiid) iid # if the actual rating is known it can be passed as an argument realrating = dict(trainset.ur[uid])[iid] realrating pred = algo.predict(rawuid, rawiid, r_ui=realrating, verbose=True) # if the actual rating is unknown use the below pred = algo.predict(rawuid, rawiid) pred # FYI: can compare with prediction made using demolib (the library used in workshop1) usersA, umap, imap = makeratingsmatrix(trans) targetuser = usersA[umap[rawuid], ] targetuser predictrating_UU(targetuser, usersA, imap[rawiid], simfun=pearsonsim) # FYI: to help understand how predictions are made when using matrix factorisation we can # compute the prediction ourselves from the factorised matrices and the biases: pu,qi,bu,bi # examine top-left part of the User and Item preference matrix
#algo = KNNWithMeans(k=40,min_k=5,sim_options=sim_options) #algo = KNNWithZScore(k=40,min_k=5,sim_options=sim_options) algo.fit(trainset) # get a prediction for specific users and items. uid = my_username iid = str(19) # Monster # #pred = algo.predict(uid, iid, verbose=True) # #for x in range (1, 1001): # pred = algo.predict(uid, str(x), verbose=True) my_dict = {} for id in anime_list: pred = algo.predict(uid, id, verbose=False) if (id not in user_ratings): if (pred[4]['was_impossible'] == False): #print('id:', pred[1], ':', pred[3], ', k:', pred[4]['actual_k'], pred[4]) my_dict[id] = pred[3] top_list = sorted(my_dict, key=my_dict.get, reverse=True)[:50] for item in top_list: print('Rank:\t' + anime_list[item][1], '\t', anime_list[item][0] + ' (' + item + ')', '\t', my_dict[item]) ''' this should print a list of your worst recommendations? print('\n WORST') worst_list = sorted(my_dict, key=my_dict.get, reverse=False)[:50] for item in worst_list: if(int(item) < 1000): print('Rank:\t' + anime_list[item][1], '\t', anime_list[item][0] + ' (' + item + ')', '\t', my_dict[item])
normal = NormalPredictor() normal.fit(trainset) normal_prediction_seq = [] for r_u, r_i, r in tqdm(testset, desc="get prediction sequence of random algorithm"): res = estimate_rs = normal.predict(r_u, r_i) normal_prediction_seq.append(res.est) if not os.path.exists("prediction_seqs/knn.json"): knn = KNNBasic(k=90) knn.fit(trainset) KNN_prediction_seq = [] for r_u, r_i, r in tqdm(testset, desc="get prediction sequence of knn"): res = estimate_rs = knn.predict(r_u, r_i) KNN_prediction_seq.append(res.est) with open("prediction_seqs/knn.json", "w") as f: f.write(json.dumps(KNN_prediction_seq)) else: KNN_prediction_seq = json.loads(open("prediction_seqs/knn.json").read()) if not os.path.exists("prediction_seqs/svd.json"): svd = SVD(n_factors=40) svd.fit(trainset) svd_prediction_seq = [] for r_u, r_i, r in tqdm(testset, desc="get prediction sequence of svd"): res = estimate_rs = svd.predict(r_u, r_i) svd_prediction_seq.append(res.est) with open("prediction_seqs/svd.json", "w") as f:
# APPLYING ALGORITHM KNN Basic algo.train(trainset) print "ALGORITHM USED: \n", algo testset = trainset.build_anti_testset() predictions = algo.test(testset=testset) top_n = get_top_n(predictions, 5) # ---------------------------------------------------- PREDICTION VERIFICATION - CL0 (945) print "\t\tINITIATING IN CLUSTER 0 (945)\n" search_key = raw_input("Enter User ID:") item_id = raw_input("Enter Item ID:") actual_rating = input("Enter actual Rating:") print algo.predict(str(search_key), item_id, actual_rating) # ---------------------------------------------------- PREDICTION VERIFICATION - CL1 (944) print "\t\tINITIATING IN CLUSTER 1 (944)\n" search_key = raw_input("Enter User ID:") item_id = raw_input("Enter Item ID:") actual_rating = input("Enter actual Rating:") print algo.predict(str(search_key), item_id, actual_rating) # --------------------- GENERATE FULL PREDICTION csvfile = 'pred_matrix-Cluster0.csv' with open(csvfile, "w") as output: writer = csv.writer(output, delimiter=',', lineterminator='\n') writer.writerow(['uid', 'iid', 'rat']) for uid, user_ratings in top_n.items():