def get_preference(user_List):
    #generate list of users
    
    preference_dict={}
    user_map={}
    data = Data() #saving rating data
    i=1
    for user in user_List:
        user_id=(str(user))
        url = "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?\
key=147CBF377C6B648EC3DC73499CE73D32&steamid="+user+"&format=json"
        response = urllib2.urlopen(url)
        owned_gameData = json.loads(response.read().decode('utf-8-sig'))
        user_Pref={}
        #print (user)
        try: 
            if owned_gameData['response']['game_count']!=0:
                user_Pref={}
                for games in owned_gameData['response']['games']:
                    if games['playtime_forever']>0:
                        user_Pref[games['appid']]= math.log(games['playtime_forever'])
                        data.add_tuple((math.log(games['playtime_forever'], 10), games['appid'], i))
                        user_map[i]=user
        except:
            continue
        i=i+1
        preference_dict[user]=user_Pref
    data.save('rating.dat')
Пример #2
0
	def build_model(self,uids,kn):
		data = Data()
		for uid,songs in uids.items():
			for song in songs:
				data.add_tuple((1,song,uid))
		svd = SVD()
		svd.set_data(data)
		svd.compute(k=kn,min_values=1)
		self.model = svd
Пример #3
0
def recommended_files(user):
    if not type(user) is str:
        user = unidecode.unidecode(user)
    if db.done_users.find_one({'user':user})['recommended']==False:
        user_files = db.user_list.find({'user':user})
        f = open('./dc_recom.dat','a')
        for u in user_files:
            f.write(u['user'] + '::' + u['tth'])
            f.write('\n')
        f.close()
        db.done_users.update({'user': user}, {'user':user, 'recommended': True})

    data = Data()
    data.load('./dc_recom.dat', sep='::', format={'col':1,'row':0})
    svd = SVD()
    svd.set_data(data)
    svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True)
    similar_users = [i[0] for i in svd.similar(user,n=10)]

    newdata = Data()
    for i in range(0,len(similar_users),1):
        files = db.user_list.find({'user':similar_users[i]})
        for f in files:
            newdata.add_tuple((1.0,similar_users[i],f['tth']))
    svd.set_data(newdata)
    svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True)
    recoms = svd.recommend(user,is_row=True,only_unknowns=True,n=100)

    res = []
    c_res = 0
    for p in recoms:
        flag=0
        for r in res:
            if similar(db.tths.find_one({'tth':p[0]})['name'],db.tths.find_one({'tth':r[0]})['name']):
                flag = 1
                break
        if flag == 0:
            res.append(p)
            c_res += 1
            if c_res > 10:
                k = []
                for i in res:
                    try:
                        j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name'])
                    except:
                        j = 'magnet:?xt=urn:tree:tiger:'+i[0]
                    k.append(j)
                return k
    k = []
    for i in res:
        try:
            j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name'])
        except:
            j = 'magnet:?xt=urn:tree:tiger:'+i[0]
        k.append(j)

    return k
Пример #4
0
 def build_model(self, uids, kn):
     data = Data()
     for uid, songs in uids.items():
         for song in songs:
             data.add_tuple((1, song, uid))
     svd = SVD()
     svd.set_data(data)
     svd.compute(k=kn, min_values=1)
     self.model = svd
def get_data_model_matrix(data):
    """
    This method process raw data and store rating/users/movies in a matrix <value/row/column> respectively
    using recsys library
    :return: data object (recsys.datamodel.Data()) )
    """
    processed_data = Data()
    for user, review in data.items():
        for mov, rat in review.items():
            processed_data.add_tuple((rat, user, mov))
    return processed_data
def get_data_model_matrix(data):
    """
    This method process raw data and store rating/users/movies in a matrix <value/row/column> respectively
    using recsys library
    :return: data object (recsys.datamodel.Data()) )
    """
    processed_data = Data()
    for user, review in data.items():
        for mov, rat in review.items():
            processed_data.add_tuple((rat, user, mov))
    return processed_data
Пример #7
0
def prepare_data(raw_data):
    idata = Data()
    u_idx = 0
    for u_id in raw_data.keys():
        i_idx = 0
        u_idx += 1
        pre_u_raw_data = raw_data[u_id]
        for i_id in pre_u_raw_data.keys():
            i_idx += 1
            rate, _ = pre_u_raw_data[i_id]
            idata.add_tuple((float(rate), u_idx, i_idx))

    return idata
Пример #8
0
def prepare_data(raw_data):
    idata = Data()
    u_idx = 0
    for u_id in raw_data.keys():
        i_idx = 0
        u_idx += 1
        pre_u_raw_data = raw_data[u_id]
        for i_id in pre_u_raw_data.keys():
            i_idx += 1
            rate, _ = pre_u_raw_data[i_id]
            idata.add_tuple((float(rate),u_idx,i_idx))

    return idata
Пример #9
0
    def setup_svd(self, vote_list):
        if self.svd is None:
            self.cache['svd'] = SVD()
            data = Data()

            for vote in vote_list:
                user_id = vote[0].id
                item_id = vote[1]
                value = float(vote[2])
                data.add_tuple((value, item_id, user_id))  # Tuple format is: <value, row, column>
            self.cache['svd'].set_data(data)
            self.cache['svd'].compute(k=self.k, min_values=1)
        return self.svd
Пример #10
0
def get_friend_matrix(u_ids, raw_data):
    idata = Data()
    u_idx = 0
    for u_id in u_ids:
        u_idx += 1
        i_idx = 0
        i_ids = raw_data[u_id].keys()
        for i_id in i_ids:
            i_idx += 1
            rate, ts = raw_data[u_id][i_id]
            idata.add_tuple((float(rate),u_idx,i_idx))

    return idata
Пример #11
0
 def setUp(self):
     
     data = Data()
     for stars, item_id, user_id in ratings:
         data.add_tuple((stars, item_id, user_id))
     
     movies = dict()
     for mid, name, genres in movie_genres:
         movie = Item(mid)
         movie.add_data({'name': name, 'genres': genres})
         movies[mid] = movie
     
     self.ratings = data
     self.movies = movies
Пример #12
0
    def setUp(self):

        data = Data()
        for stars, item_id, user_id in ratings:
            data.add_tuple((stars, item_id, user_id))

        movies = dict()
        for mid, name, genres in movie_genres:
            movie = Item(mid)
            movie.add_data({'name': name, 'genres': genres})
            movies[mid] = movie

        self.ratings = data
        self.movies = movies
Пример #13
0
    def setup_svd(self, vote_list):
        if self.svd is None:
            self.cache['svd'] = SVD()
            data = Data()

            for vote in vote_list:
                user_id = vote[0].id
                item_id = vote[1]
                value = float(vote[2])
                data.add_tuple(
                    (value, item_id,
                     user_id))  # Tuple format is: <value, row, column>
            self.cache['svd'].set_data(data)
            self.cache['svd'].compute(k=self.k, min_values=1)
        return self.svd
Пример #14
0
def set_rating(rating, userID=45, probCode='GSS1', compute=False, SVDNeighbourhood=False):
	problems_recsys = pickle.load( open( os.path.join(utils.get_add_dir(), 'problems_recsys'), "rb" ) )
	problem_keys = list(problems_recsys)
	
	data = Data()
	data.load(os.path.join(utils.get_add_dir(), 'ratings'), pickle=True)

	data.add_tuple((rating, problem_keys.index(probCode), userID))

	data.save(os.path.join(utils.get_add_dir(), 'ratings'), pickle=True)

	if compute:
		if SVDNeighbourhood:
			compute_SVDNeighbourhood()
		else:
			compute_SVD()
Пример #15
0
def test_utf8_data():
    data_in = Data()

    NUM_PLAYS = 69
    ITEMID = u'Bj\xf6rk' 
    data_in.add_tuple([NUM_PLAYS, ITEMID, USERID1])

    NUM_PLAYS = 34
    ITEMID = 'Björk' 
    data_in.add_tuple([NUM_PLAYS, ITEMID, USERID2])

    data_in.save(os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8'))

    data_saved = Data()
    data_saved.load(os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8'))

    assert_equal(len(data_in), len(data_saved))
def build_svd_item_based(user_op_item_cnt, item_op_users, user_idx, item_idx, min_nonzero):
    svd = SVD()
    data = Data()
    item_lst = []
    for ui in user_op_item_cnt:
        if len(user_op_item_cnt[ui]) < min_nonzero:
            continue
        for ti in user_op_item_cnt[ui]:
            if item_op_users[ti] < min_nonzero:
                continue
            if 1.0*user_op_item_cnt[ui][ti] < 1:
                continue
            item_lst.append(ti)
            data.add_tuple(((1.0*user_op_item_cnt[ui][ti]), item_idx[ti], user_idx[ui]))
    item_lst = list(set(item_lst))
    svd.set_data(data)
    return svd, item_lst
def build_svd_cat_based(user_op_cat_cnt, cat_op_users, user_idx, cat_idx, min_nonzero):
    svd = SVD()
    data = Data()
    cat_lst = []
    for ui in user_op_cat_cnt:
        if len(user_op_cat_cnt[ui]) < min_nonzero:
            continue
        for ci in user_op_cat_cnt[ui]:
            if cat_op_users[ci] < min_nonzero:
                continue
            if 1.0*user_op_cat_cnt[ui][ci] < 1:
                continue
            cat_lst.append(ci)
            data.add_tuple(((1.0*user_op_cat_cnt[ui][ci]), cat_idx[ci], user_idx[ui]))
    cat_lst = list(set(cat_lst))
    print 'cat =', len(cat_lst)
    svd.set_data(data)
    return svd, cat_lst
Пример #18
0
def test_utf8_data():
    data_in = Data()

    NUM_PLAYS = 69
    ITEMID = u'Bj\xf6rk'
    data_in.add_tuple([NUM_PLAYS, ITEMID, USERID1])

    NUM_PLAYS = 34
    ITEMID = 'Björk'
    data_in.add_tuple([NUM_PLAYS, ITEMID, USERID2])

    data_in.save(os.path.join(MOVIELENS_DATA_PATH,
                              'ratings.matrix.saved.utf8'))

    data_saved = Data()
    data_saved.load(
        os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8'))

    assert_equal(len(data_in), len(data_saved))
Пример #19
0
def color_user(input_file, output_file, data_file):

    data = Data()

    # VALUE = 1.0
    # for username in likes:
    #     for user_likes in likes[username]:
    #         data.add_tuple((VALUE, username, user_likes)) # Tuple format is: <value, row, column>

    #读取所有user的履历,制作成SVD可执行的matrix
    f_r = open(data_file, 'r')
    for line in f_r:
        info = line.split(',')
        data.add_tuple((1.0, info[0], info[1]))

    svd = SVD()
    svd.set_data(data)
    k = 5 # Usually, in a real dataset, you should set a higher number, e.g. 100
    svd.compute(k=k, min_values=3, pre_normalize=None, mean_center=False, post_normalize=True)

    #从question里读取需要被推荐的userid
    fr = open(input_file, 'r')
    for line in fr:
        userid = line
        user_list = svd.similar(userid) 

    #print('=============================================')
    #print(user_list)
    #print(len(user_list))

    #保存所有相似度大于50%的用户id到answer file
    fw = open(output_file, 'w')

    del user_list[0] #删除需要被推荐的用户自身id

    for user in user_list:
        if user[1] > 0.5: 
            fw.write(user[0] + '\n')
    fw.close()
Пример #20
0
def recommended_files(user):
    if not type(user) is str:
        user = unidecode.unidecode(user)
    if db.done_users.find_one({'user': user})['recommended'] == False:
        user_files = db.user_list.find({'user': user})
        f = open('./dc_recom.dat', 'a')
        for u in user_files:
            f.write(u['user'] + '::' + u['tth'])
            f.write('\n')
        f.close()
        db.done_users.update({'user': user}, {
            'user': user,
            'recommended': True
        })

    data = Data()
    data.load('./dc_recom.dat', sep='::', format={'col': 1, 'row': 0})
    svd = SVD()
    svd.set_data(data)
    svd.compute(k=1000,
                min_values=0,
                pre_normalize=None,
                mean_center=False,
                post_normalize=True)
    similar_users = [i[0] for i in svd.similar(user, n=10)]

    newdata = Data()
    for i in range(0, len(similar_users), 1):
        files = db.user_list.find({'user': similar_users[i]})
        for f in files:
            newdata.add_tuple((1.0, similar_users[i], f['tth']))
    svd.set_data(newdata)
    svd.compute(k=1000,
                min_values=0,
                pre_normalize=None,
                mean_center=False,
                post_normalize=True)
    recoms = svd.recommend(user, is_row=True, only_unknowns=True, n=100)

    res = []
    c_res = 0
    for p in recoms:
        flag = 0
        for r in res:
            if similar(
                    db.tths.find_one({'tth': p[0]})['name'],
                    db.tths.find_one({'tth': r[0]})['name']):
                flag = 1
                break
        if flag == 0:
            res.append(p)
            c_res += 1
            if c_res > 10:
                k = []
                for i in res:
                    try:
                        j = 'magnet:?xt=urn:tree:tiger:' + i[
                            0] + "&dn=" + unidecode.unidecode(
                                db.tths.find_one({'tth': i[0]})['name'])
                    except:
                        j = 'magnet:?xt=urn:tree:tiger:' + i[0]
                    k.append(j)
                return k
    k = []
    for i in res:
        try:
            j = 'magnet:?xt=urn:tree:tiger:' + i[
                0] + "&dn=" + unidecode.unidecode(
                    db.tths.find_one({'tth': i[0]})['name'])
        except:
            j = 'magnet:?xt=urn:tree:tiger:' + i[0]
        k.append(j)

    return k
Пример #21
0
                month -= 1
                day += 31

            band = user[brandID]
            band.append((month, day, actionType))

    users = []
    for (userID, info) in userInfo.iteritems():
        users.append(User(userID, info))

    data = Data()
    for user in users:
        print user.id
        for brand in user.train_label:
            if user.data[brand][1] > 0:
                data.add_tuple((10, brand, user.id))
            else:
                data.add_tuple((1, brand, user.id))

    train, test = data.split_train_test(percent=0.8)

    svd = SVD()
    svd.set_data(data)
    svd.compute(k=100, min_values=1)

    # rmse = RMSE()
    # mae = MAE()
    # for rating, item_id, user_id in test.get():
    #     try:
    #         pred_rating = svd.predict(item_id, user_id)
    #         rmse.add(rating, pred_rating)
Пример #22
0
def test_data_add_tuple():
    VALUE = 4.0
    tuple = (VALUE, 'row_id', 'col_id')
    data = Data()
    data.add_tuple(tuple)
    assert_equal(data[0][0], VALUE)
    },
    "priya": {
        "heart", "mountaineering", "sky diving", "sony", "apple", "pop",
        "perfumes", "luxury", "eminem", "lil wayne"
    },
    "brenda": {
        "cute guys", "xbox", "shower", "beach", "summer", "english", "french",
        "country music", "office", "birds"
    }
}

data = Data()
VALUE = 1.0
for username in likes:
    for user_likes in likes[username]:
        data.add_tuple((VALUE, username,
                        user_likes))  # Tuple format is: <value, row, column>

svd = SVD()
svd.set_data(data)
k = 5  # Usually, in a real dataset, you should set a higher number, e.g. 100
svd.compute(k=k,
            min_values=3,
            pre_normalize=None,
            mean_center=False,
            post_normalize=True)

print(svd.similar('sheila'))
print("######################")
import difflib
for key in likes:
    rajat = likes['rajat']
Пример #24
0
def test_data_add_tuple():
    VALUE = 4.0
    tuple = (VALUE, 'row_id', 'col_id')
    data = Data()
    data.add_tuple(tuple)
    assert_equal(data[0][0], VALUE)
Пример #25
0
               })

item2 = Item(1)
item2.add_data({'name': 'project1',
               'popularity': 0.5,
               'tags': [0, 0, 1]
               })

# create a user
userId = 0
user = User(userId)

# link an item with a user
rating = 1
user.add_item(itemId, rating)

data = Data()
data.add_tuple((rating, itemId, userId))
data.add_tuple((10, 1, 2))


svd = SVD()
svd.set_data(data)
svd.compute(k=100, min_values=0, pre_normalize=None, mean_center=True, post_normalize=True)

svd.similarity(0, 0)

l1 = ['a', 0, 1, 1]
l2 = ['b', 0, 1, 1]
print 1- spatial.distance.cosine(l1, l2)
cosine_similarity(l1, l2)