def SVDloadData(): svd = SVD() recsys.algorithm.VERBOSE = True dat_file = '/home/commons/RecSys/MOVIEDATA/MOVIEDATA/ml-1m/ratings.dat' svd.load_data(filename=dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) print svd.get_matrix() return svd
def quickstart(): svd = SVD() recsys.algorithm.VERBOSE = True # load movielens data dat_file = DATA_DIR + 'ml-1m-ratings.dat' svd.load_data(filename=dat_file, sep='::', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) # compute svd k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) pdb.set_trace() # movie id's ITEMID1 = 1 # toy story ITEMID2 = 1221 # godfather II # get movies similar to toy story print svd.similar(ITEMID1) # get predicted rating for given user & movie MIN_RATING = 0.0 MAX_RATING = 5.0 USERID = 1 ITEMID = 1 # get predicted rating for user1 and item1, mapped onto min max pred = svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) actual = svd.get_matrix().value(ITEMID, USERID) print 'predicted rating = {0}'.format(pred) print 'actual rating = {0}'.format(actual) print 'which users should see Toy Story?:' print svd.recommend(ITEMID)
def loadSVD(): filename = 'favRate.dat' svd = SVD() svd.load_data(filename=filename, sep='::', format={'col':0, 'row':1, 'value':2}) svd.save_data("svd.dat", False) K=20 svd.compute(k=K, min_values=1, pre_normalize="rows", mean_center=False, post_normalize=True, savefile='.') #svd.recommend(USERID, n=10, only_unknowns=True, is_row=False) sparse_matrix = svd.get_matrix() sim_matrix = svd.get_matrix_similarity() print sparse_matrix #print sim_matrix #1173893,1396943 sim = svd.similar(897346, 10) filename = 'swoffering.yaml' titleStream = file(filename, 'r') titleList = yaml.load(titleStream) #print sim for row in sim: (offid, similar) = row print offid, titleList[str(offid)], similar
def quickstart(): svd = SVD() recsys.algorithm.VERBOSE = True # load movielens data dat_file = 'ml-1m/ratings.dat' svd.load_data(filename=dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) # compute svd k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) pdb.set_trace() # movie id's ITEMID1 = 1 # toy story ITEMID2 = 1221 # godfather II # get movies similar to toy story svd.similar(ITEMID1) # get predicted rating for given user & movie MIN_RATING = 0.0 MAX_RATING = 5.0 USERID = 1 ITEMID = 1 # get predicted rating pred = svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) actual = svd.get_matrix().value(ITEMID, USERID) print 'predicted rating = {0}'.format(pred) print 'actual rating = {0}'.format(actual) # which users should see Toy Story? svd.recommend(ITEMID)
'col': 0, 'row': 1, 'value': 2, 'ids': float }) k = 30 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='/tmp/movielens') # ITEMID1 = 1 # Toy Story (1995) # ITEMID2 = 2355 # A bug's life (1998) # print svd.similarity(ITEMID1, ITEMID2) MIN_RATING = 1.0 MAX_RATING = 5.0 USERID = 1 ITEMID = 1129 print svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) print svd.predict(1953, 1, MIN_RATING, MAX_RATING) # Predicted value 5.0 print svd.get_matrix().value(1953, 1) # Real value 5.0
print "similaridad entre items sin usar la matrix que ya se genero " print svd2.similarity(ITEMID1, ITEMID2) print "similaridad entre items usando la matrix guardada" print svd.similarity(ITEMID1, ITEMID2) print "Recomendaciones para el itemid1" print svd.similar(ITEMID1) #Haciendo las predicciones MIN_RATING = 0 MAX_RATING = 5 ITEMID = 1 USERID = 1 print svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) print svd.get_matrix().value(ITEMID, USERID) #HACUIENDO RECOMENDACIONES AL USUARIO Y POR TITEM print svd.recommend( USERID, is_row=False) #cols are users and rows are items, thus we set is_row=False print svd.recommend(ITEMID) print "se deben mostrar 5 recomendaciones para el item 1" print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False) #usando la matriz que ya esta generada from recsys.utils.svdlibc import SVDLIBC svdlibc = SVDLIBC('./data/ratings.dat') svdlibc.to_sparse_matrix(sep='::', format={
pre_normalize=None, mean_center=True, post_normalize=True) # predicted_rating = svd.predict(int(5), 'A1', 1, 10) # predicted_rating2 = svd.predict(int(1), 'A1', 1, 10) # print('Predicted rating', predicted_rating) # print('Predicted rating', predicted_rating2) records = ETLUtils.load_csv_file(file_name_header, '|') errors = [] for record in records: try: # print(record['user'], record['item'], record['rating']) user = record['user'] item = int(record['item']) predicted_rating = svd.predict(item, user, 1, 5) print(record['user'], record['item'], predicted_rating) # predicted_rating = round(predicted_rating) actual_rating = svd.get_matrix().value(item, user) error = abs(predicted_rating - actual_rating) errors.append(error) except KeyError: continue mean_absolute_error = MeanAbsoluteError.compute_list(errors) root_mean_square_error = RootMeanSquareError.compute_list(errors) print('Mean Absolute error: %f' % mean_absolute_error) print('Root mean square error: %f' % root_mean_square_error)
print(json.dumps(similaries, ensure_ascii=False)) # import pdb;pdb.set_trace() import sys sys.exit(0) print(svd.similar(ITEMID1)) # Returns: <ITEMID, Cosine Similarity Value> MIN_RATING = 0.0 MAX_RATING = 1.0 ITEMID = 109 USERID = 3837663637323963363639393565373833613237396534393132376338386362 print('testing..') print(svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)) # Predicted value 5.0 print(svd.get_matrix().value(ITEMID, USERID)) # Real value 5.0 # Recommend (non-rated) movies to a user: print('recommend to user') print(svd.recommend(USERID, is_row=False)) #cols are users and rows are items, thus we set is_row=False print(svd.recommend(ITEMID)) import pdb;pdb.set_trace()
(595, 0.46031829709743477), # Beauty and the Beast (1907, 0.44589398718134365), # Mulan (364, 0.42908159895574161), # The Lion King (2081, 0.42566581277820803), # The Little Mermaid (3396, 0.42474056361935913), # The Muppet Movie (2761, 0.40439361857585354)] # The Iron Giant MIN_RATING = 0.0 MAX_RATING = 5.0 ITEMID = 1 USERID = 1 svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) # Predicted value 5.0 svd.get_matrix().value(ITEMID, USERID) # Real value 5.0 svd.recommend(USERID, is_row=False) #cols are users and rows are items, thus we set is_row=False # Returns: <ITEMID, Predicted Rating> [(2905, 5.2133848204673416), # Shaggy D.A., The (318, 5.2052108435956033), # Shawshank Redemption, The (2019, 5.1037438278755474), # Seven Samurai (The Magnificent Seven) (1178, 5.0962756861447023), # Paths of Glory (1957) (904, 5.0771405690055724), # Rear Window (1954) (1250, 5.0744156653222436), # Bridge on the River Kwai, The (858, 5.0650911066862907), # Godfather, The (922, 5.0605327279819408), # Sunset Blvd. (1198, 5.0554543765500419), # Raiders of the Lost Ark (1148, 5.0548789542105332)] # Wrong Trousers, The
k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='./data/MERGED6_svd') # to load a saved svd # svd = SVD(filename='./data/MERGED_svd') # Loading already computed SVD model # get the item_id with available results (n<10 rows & columns were cut out) m = svd.get_matrix() rowlabl = m._matrix.row_labels ids = np.array(rowlabl) # ==== can further reduce the tables using this list of id. # calculate cosine similarity score between 2 items: # svd.similarity(ids[0], ids[100]) # cosine similarity # For each movie: # 1. get the top 50 books # 2. eliminating duplicates by comparing titles # 3. save id and scores for the final 10 books & movies
k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) # predicted_rating = svd.predict(int(5), 'A1', 1, 10) # predicted_rating2 = svd.predict(int(1), 'A1', 1, 10) # print('Predicted rating', predicted_rating) # print('Predicted rating', predicted_rating2) records = ETLUtils.load_csv_file(file_name_header, '|') errors = [] for record in records: try: # print(record['user'], record['item'], record['rating']) user = record['user'] item = int(record['item']) predicted_rating = svd.predict(item, user, 1, 5) print(record['user'], record['item'], predicted_rating) # predicted_rating = round(predicted_rating) actual_rating = svd.get_matrix().value(item, user) error = abs(predicted_rating - actual_rating) errors.append(error) except KeyError: continue mean_absolute_error = MeanAbsoluteError.compute_list(errors) root_mean_square_error = RootMeanSquareError.compute_list(errors) print('Mean Absolute error: %f' % mean_absolute_error) print('Root mean square error: %f' % root_mean_square_error)
from recsys.algorithm.factorize import SVD from recsys.datamodel.data import Data data = [(4.0, 'user1', 'item1'), (2.0, 'user1', 'item3'), (1.0, 'user2', 'item1'), (5.0, 'user2', 'item4')] d = Data() d.set(data) svd = SVD() svd.set_data(d) m = svd.get_matrix() svd.compute(k=2) print svd.similar('user1') print svd.predict('user1', 'item1')
#svd.compute(k=K, pre_normalize=None, mean_center=True, post_normalize=True) print '' print 'COMPUTING SIMILARITY' print svd.similarity(1, 2) # similarity between items print svd.similar(1, 5) # show 5 similar items print '' print 'GENERATING PREDICTION' MIN_RATING = 0.0 MAX_RATING = 5.0 ITEMID = 1 USERID = 1 print svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) # predicted rating value print svd.get_matrix().value(ITEMID, USERID) # real rating value print '' print 'GENERATING RECOMMENDATION' print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() spearman = SpearmanRho() kendall = KendallTau() #decision = PrecisionRecallF1() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating)