def get_preference(user_List): #generate list of users preference_dict={} user_map={} data = Data() #saving rating data i=1 for user in user_List: user_id=(str(user)) url = "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?\ key=147CBF377C6B648EC3DC73499CE73D32&steamid="+user+"&format=json" response = urllib2.urlopen(url) owned_gameData = json.loads(response.read().decode('utf-8-sig')) user_Pref={} #print (user) try: if owned_gameData['response']['game_count']!=0: user_Pref={} for games in owned_gameData['response']['games']: if games['playtime_forever']>0: user_Pref[games['appid']]= math.log(games['playtime_forever']) data.add_tuple((math.log(games['playtime_forever'], 10), games['appid'], i)) user_map[i]=user except: continue i=i+1 preference_dict[user]=user_Pref data.save('rating.dat')
def build_model(self,uids,kn): data = Data() for uid,songs in uids.items(): for song in songs: data.add_tuple((1,song,uid)) svd = SVD() svd.set_data(data) svd.compute(k=kn,min_values=1) self.model = svd
def recommended_files(user): if not type(user) is str: user = unidecode.unidecode(user) if db.done_users.find_one({'user':user})['recommended']==False: user_files = db.user_list.find({'user':user}) f = open('./dc_recom.dat','a') for u in user_files: f.write(u['user'] + '::' + u['tth']) f.write('\n') f.close() db.done_users.update({'user': user}, {'user':user, 'recommended': True}) data = Data() data.load('./dc_recom.dat', sep='::', format={'col':1,'row':0}) svd = SVD() svd.set_data(data) svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) similar_users = [i[0] for i in svd.similar(user,n=10)] newdata = Data() for i in range(0,len(similar_users),1): files = db.user_list.find({'user':similar_users[i]}) for f in files: newdata.add_tuple((1.0,similar_users[i],f['tth'])) svd.set_data(newdata) svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) recoms = svd.recommend(user,is_row=True,only_unknowns=True,n=100) res = [] c_res = 0 for p in recoms: flag=0 for r in res: if similar(db.tths.find_one({'tth':p[0]})['name'],db.tths.find_one({'tth':r[0]})['name']): flag = 1 break if flag == 0: res.append(p) c_res += 1 if c_res > 10: k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:'+i[0] k.append(j) return k k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:'+i[0] k.append(j) return k
def build_model(self, uids, kn): data = Data() for uid, songs in uids.items(): for song in songs: data.add_tuple((1, song, uid)) svd = SVD() svd.set_data(data) svd.compute(k=kn, min_values=1) self.model = svd
def get_data_model_matrix(data): """ This method process raw data and store rating/users/movies in a matrix <value/row/column> respectively using recsys library :return: data object (recsys.datamodel.Data()) ) """ processed_data = Data() for user, review in data.items(): for mov, rat in review.items(): processed_data.add_tuple((rat, user, mov)) return processed_data
def get_data_model_matrix(data): """ This method process raw data and store rating/users/movies in a matrix <value/row/column> respectively using recsys library :return: data object (recsys.datamodel.Data()) ) """ processed_data = Data() for user, review in data.items(): for mov, rat in review.items(): processed_data.add_tuple((rat, user, mov)) return processed_data
def prepare_data(raw_data): idata = Data() u_idx = 0 for u_id in raw_data.keys(): i_idx = 0 u_idx += 1 pre_u_raw_data = raw_data[u_id] for i_id in pre_u_raw_data.keys(): i_idx += 1 rate, _ = pre_u_raw_data[i_id] idata.add_tuple((float(rate), u_idx, i_idx)) return idata
def prepare_data(raw_data): idata = Data() u_idx = 0 for u_id in raw_data.keys(): i_idx = 0 u_idx += 1 pre_u_raw_data = raw_data[u_id] for i_id in pre_u_raw_data.keys(): i_idx += 1 rate, _ = pre_u_raw_data[i_id] idata.add_tuple((float(rate),u_idx,i_idx)) return idata
def setup_svd(self, vote_list): if self.svd is None: self.cache['svd'] = SVD() data = Data() for vote in vote_list: user_id = vote[0].id item_id = vote[1] value = float(vote[2]) data.add_tuple((value, item_id, user_id)) # Tuple format is: <value, row, column> self.cache['svd'].set_data(data) self.cache['svd'].compute(k=self.k, min_values=1) return self.svd
def get_friend_matrix(u_ids, raw_data): idata = Data() u_idx = 0 for u_id in u_ids: u_idx += 1 i_idx = 0 i_ids = raw_data[u_id].keys() for i_id in i_ids: i_idx += 1 rate, ts = raw_data[u_id][i_id] idata.add_tuple((float(rate),u_idx,i_idx)) return idata
def setUp(self): data = Data() for stars, item_id, user_id in ratings: data.add_tuple((stars, item_id, user_id)) movies = dict() for mid, name, genres in movie_genres: movie = Item(mid) movie.add_data({'name': name, 'genres': genres}) movies[mid] = movie self.ratings = data self.movies = movies
def setUp(self): data = Data() for stars, item_id, user_id in ratings: data.add_tuple((stars, item_id, user_id)) movies = dict() for mid, name, genres in movie_genres: movie = Item(mid) movie.add_data({'name': name, 'genres': genres}) movies[mid] = movie self.ratings = data self.movies = movies
def setup_svd(self, vote_list): if self.svd is None: self.cache['svd'] = SVD() data = Data() for vote in vote_list: user_id = vote[0].id item_id = vote[1] value = float(vote[2]) data.add_tuple( (value, item_id, user_id)) # Tuple format is: <value, row, column> self.cache['svd'].set_data(data) self.cache['svd'].compute(k=self.k, min_values=1) return self.svd
def set_rating(rating, userID=45, probCode='GSS1', compute=False, SVDNeighbourhood=False): problems_recsys = pickle.load( open( os.path.join(utils.get_add_dir(), 'problems_recsys'), "rb" ) ) problem_keys = list(problems_recsys) data = Data() data.load(os.path.join(utils.get_add_dir(), 'ratings'), pickle=True) data.add_tuple((rating, problem_keys.index(probCode), userID)) data.save(os.path.join(utils.get_add_dir(), 'ratings'), pickle=True) if compute: if SVDNeighbourhood: compute_SVDNeighbourhood() else: compute_SVD()
def test_utf8_data(): data_in = Data() NUM_PLAYS = 69 ITEMID = u'Bj\xf6rk' data_in.add_tuple([NUM_PLAYS, ITEMID, USERID1]) NUM_PLAYS = 34 ITEMID = 'Björk' data_in.add_tuple([NUM_PLAYS, ITEMID, USERID2]) data_in.save(os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8')) data_saved = Data() data_saved.load(os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8')) assert_equal(len(data_in), len(data_saved))
def build_svd_item_based(user_op_item_cnt, item_op_users, user_idx, item_idx, min_nonzero): svd = SVD() data = Data() item_lst = [] for ui in user_op_item_cnt: if len(user_op_item_cnt[ui]) < min_nonzero: continue for ti in user_op_item_cnt[ui]: if item_op_users[ti] < min_nonzero: continue if 1.0*user_op_item_cnt[ui][ti] < 1: continue item_lst.append(ti) data.add_tuple(((1.0*user_op_item_cnt[ui][ti]), item_idx[ti], user_idx[ui])) item_lst = list(set(item_lst)) svd.set_data(data) return svd, item_lst
def build_svd_cat_based(user_op_cat_cnt, cat_op_users, user_idx, cat_idx, min_nonzero): svd = SVD() data = Data() cat_lst = [] for ui in user_op_cat_cnt: if len(user_op_cat_cnt[ui]) < min_nonzero: continue for ci in user_op_cat_cnt[ui]: if cat_op_users[ci] < min_nonzero: continue if 1.0*user_op_cat_cnt[ui][ci] < 1: continue cat_lst.append(ci) data.add_tuple(((1.0*user_op_cat_cnt[ui][ci]), cat_idx[ci], user_idx[ui])) cat_lst = list(set(cat_lst)) print 'cat =', len(cat_lst) svd.set_data(data) return svd, cat_lst
def test_utf8_data(): data_in = Data() NUM_PLAYS = 69 ITEMID = u'Bj\xf6rk' data_in.add_tuple([NUM_PLAYS, ITEMID, USERID1]) NUM_PLAYS = 34 ITEMID = 'Björk' data_in.add_tuple([NUM_PLAYS, ITEMID, USERID2]) data_in.save(os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8')) data_saved = Data() data_saved.load( os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8')) assert_equal(len(data_in), len(data_saved))
def color_user(input_file, output_file, data_file): data = Data() # VALUE = 1.0 # for username in likes: # for user_likes in likes[username]: # data.add_tuple((VALUE, username, user_likes)) # Tuple format is: <value, row, column> #读取所有user的履历,制作成SVD可执行的matrix f_r = open(data_file, 'r') for line in f_r: info = line.split(',') data.add_tuple((1.0, info[0], info[1])) svd = SVD() svd.set_data(data) k = 5 # Usually, in a real dataset, you should set a higher number, e.g. 100 svd.compute(k=k, min_values=3, pre_normalize=None, mean_center=False, post_normalize=True) #从question里读取需要被推荐的userid fr = open(input_file, 'r') for line in fr: userid = line user_list = svd.similar(userid) #print('=============================================') #print(user_list) #print(len(user_list)) #保存所有相似度大于50%的用户id到answer file fw = open(output_file, 'w') del user_list[0] #删除需要被推荐的用户自身id for user in user_list: if user[1] > 0.5: fw.write(user[0] + '\n') fw.close()
def recommended_files(user): if not type(user) is str: user = unidecode.unidecode(user) if db.done_users.find_one({'user': user})['recommended'] == False: user_files = db.user_list.find({'user': user}) f = open('./dc_recom.dat', 'a') for u in user_files: f.write(u['user'] + '::' + u['tth']) f.write('\n') f.close() db.done_users.update({'user': user}, { 'user': user, 'recommended': True }) data = Data() data.load('./dc_recom.dat', sep='::', format={'col': 1, 'row': 0}) svd = SVD() svd.set_data(data) svd.compute(k=1000, min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) similar_users = [i[0] for i in svd.similar(user, n=10)] newdata = Data() for i in range(0, len(similar_users), 1): files = db.user_list.find({'user': similar_users[i]}) for f in files: newdata.add_tuple((1.0, similar_users[i], f['tth'])) svd.set_data(newdata) svd.compute(k=1000, min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) recoms = svd.recommend(user, is_row=True, only_unknowns=True, n=100) res = [] c_res = 0 for p in recoms: flag = 0 for r in res: if similar( db.tths.find_one({'tth': p[0]})['name'], db.tths.find_one({'tth': r[0]})['name']): flag = 1 break if flag == 0: res.append(p) c_res += 1 if c_res > 10: k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:' + i[ 0] + "&dn=" + unidecode.unidecode( db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:' + i[0] k.append(j) return k k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:' + i[ 0] + "&dn=" + unidecode.unidecode( db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:' + i[0] k.append(j) return k
month -= 1 day += 31 band = user[brandID] band.append((month, day, actionType)) users = [] for (userID, info) in userInfo.iteritems(): users.append(User(userID, info)) data = Data() for user in users: print user.id for brand in user.train_label: if user.data[brand][1] > 0: data.add_tuple((10, brand, user.id)) else: data.add_tuple((1, brand, user.id)) train, test = data.split_train_test(percent=0.8) svd = SVD() svd.set_data(data) svd.compute(k=100, min_values=1) # rmse = RMSE() # mae = MAE() # for rating, item_id, user_id in test.get(): # try: # pred_rating = svd.predict(item_id, user_id) # rmse.add(rating, pred_rating)
def test_data_add_tuple(): VALUE = 4.0 tuple = (VALUE, 'row_id', 'col_id') data = Data() data.add_tuple(tuple) assert_equal(data[0][0], VALUE)
}, "priya": { "heart", "mountaineering", "sky diving", "sony", "apple", "pop", "perfumes", "luxury", "eminem", "lil wayne" }, "brenda": { "cute guys", "xbox", "shower", "beach", "summer", "english", "french", "country music", "office", "birds" } } data = Data() VALUE = 1.0 for username in likes: for user_likes in likes[username]: data.add_tuple((VALUE, username, user_likes)) # Tuple format is: <value, row, column> svd = SVD() svd.set_data(data) k = 5 # Usually, in a real dataset, you should set a higher number, e.g. 100 svd.compute(k=k, min_values=3, pre_normalize=None, mean_center=False, post_normalize=True) print(svd.similar('sheila')) print("######################") import difflib for key in likes: rajat = likes['rajat']
def test_data_add_tuple(): VALUE = 4.0 tuple = (VALUE, 'row_id', 'col_id') data = Data() data.add_tuple(tuple) assert_equal(data[0][0], VALUE)
}) item2 = Item(1) item2.add_data({'name': 'project1', 'popularity': 0.5, 'tags': [0, 0, 1] }) # create a user userId = 0 user = User(userId) # link an item with a user rating = 1 user.add_item(itemId, rating) data = Data() data.add_tuple((rating, itemId, userId)) data.add_tuple((10, 1, 2)) svd = SVD() svd.set_data(data) svd.compute(k=100, min_values=0, pre_normalize=None, mean_center=True, post_normalize=True) svd.similarity(0, 0) l1 = ['a', 0, 1, 1] l2 = ['b', 0, 1, 1] print 1- spatial.distance.cosine(l1, l2) cosine_similarity(l1, l2)