def test_pa3(self): testdata = zip([(1024, 77), (1024, 268), (1024, 462), (1024, 393), (1024, 36955), (2048, 77), (2048, 36955), (2048, 788)], [ "1024,77,4.3848,Memento (2000)", "1024,268,2.8646,Batman (1989)", "1024,462,3.1082,Erin Brockovich (2000)", "1024,393,3.8722,Kill Bill: Vol. 2 (2004)", "1024,36955,2.3524,True Lies (1994)", "2048,77,4.8493,Memento (2000)", "2048,36955,3.9698,True Lies (1994)", "2048,788,3.8509,Mrs. Doubtfire (1993)", ]) data = DataIO(verbose=False) data.load('testdata/ratings.csv', items_file='testdata/movie-titles.csv') model = UserModel(verbose=False, normalize=True) model.build(data) for ((u, i), s) in testdata: self.assertTrue( '%s' % s == '%d,%d,%.4f,%s' % (u, i, user_based_knn(model, 30, [data.new_user_idx(u)], [data.new_item_idx(i)], cosine, promote_users=True, normalize='centered'), data.title(i)))
def test_unnormalized(self): u = 3712 expected = [(641,5.000), (603,4.856), (105,4.739)] R = user_based_knn(self.model, 5, [self.data.new_user_idx(u)], range(self.data.num_items()), pearson, promote_users = False) recs = top_ns([R],3, keep_order = True) self.assertTrue(','.join(['%d,%.3f' % (self.data.old_item_idx(a),b) for (a,b) in recs[0]]) == ','.join(['%d,%.3f' % a for a in expected]))
def test_unnormalized(self): u = 3712 expected = [(641, 5.000), (603, 4.856), (105, 4.739)] R = user_based_knn(self.model, 5, [self.data.new_user_idx(u)], range(self.data.num_items()), pearson, promote_users=False) recs = top_ns([R], 3, keep_order=True) self.assertTrue(','.join( ['%d,%.3f' % (self.data.old_item_idx(a), b) for ( a, b) in recs[0]]) == ','.join(['%d,%.3f' % a for a in expected]))
def test_pa3(self): testdata = zip([(1024,77),(1024,268),(1024,462),(1024,393),(1024,36955),(2048,77),(2048,36955),(2048,788)], [ "1024,77,4.3848,Memento (2000)", "1024,268,2.8646,Batman (1989)", "1024,462,3.1082,Erin Brockovich (2000)", "1024,393,3.8722,Kill Bill: Vol. 2 (2004)", "1024,36955,2.3524,True Lies (1994)", "2048,77,4.8493,Memento (2000)", "2048,36955,3.9698,True Lies (1994)", "2048,788,3.8509,Mrs. Doubtfire (1993)", ]) data = DataIO(verbose = False) data.load('testdata/ratings.csv', items_file = 'testdata/movie-titles.csv') model = UserModel(verbose = False, normalize = True) model.build(data) for ((u,i),s) in testdata: self.assertTrue('%s' % s == '%d,%d,%.4f,%s' % (u,i,user_based_knn(model, 30, [data.new_user_idx(u)],[data.new_item_idx(i)], cosine, promote_users = True, normalize = 'centered'), data.title(i)))
NN = 5 n = 3 part_1_file = "part_1.csv" part_2_file = "part_2.csv" # part 1 data = DataIO() data.load(ratings_file) model = UserModel(normalize=False) model.build(data) given_users = data.translate_users(given_users) given_items = range(data.num_items()) R = user_based_knn(model, NN, given_users, given_items, pearson, promote_users=False) recs = top_ns(R, n, keep_order=True) file = open(part_1_file, "w") file.write("\n".join(["%d %.3f" % (data.old_item_idx(i), s) for u in recs for (i, s) in u])) file.close() # part 2 R = user_based_knn( model, NN, given_users, given_items, pearson, promote_users=False, exclude_seen=False, normalize=True ) recs = top_ns(R, n, keep_order=True) file = open(part_2_file, "w") file.write("\n".join(["%d %.3f" % (data.old_item_idx(i), s) for u in recs for (i, s) in u]))
(5399,14), (5399,187), (5399,602), (5399,629), (3613,329), (3613,604), (3613,134), (3613,1637), (3613,278), (1873,786), (1873,2502), (1873,550), (1873,1894), (1873,1422), (4914,268), (4914,36658), (4914,786), (4914,161), (4914,854)] file = open(answer_file,'w') file.write('\n'.join( ['%d,%d,%.4f,%s' % ( u, i, user_based_knn(model, NN, [data.new_user_idx(u)], [data.new_item_idx(i)], cosine, promote_users = True, normalize = 'centered')[0], data.title(i)) for (u,i) in inputs])) file.close()
ratings_file = '../data/ratings.csv' items_file = '../data/movie-titles.csv' NN = 30 answer_file = 'part_1.csv' # part 1 data = DataIO() data.load(ratings_file, items_file=items_file) model = UserModel(normalize=True) model.build(data) inputs = [(4169, 161), (4169, 36955), (4169, 453), (4169, 857), (4169, 238), (5399, 1891), (5399, 14), (5399, 187), (5399, 602), (5399, 629), (3613, 329), (3613, 604), (3613, 134), (3613, 1637), (3613, 278), (1873, 786), (1873, 2502), (1873, 550), (1873, 1894), (1873, 1422), (4914, 268), (4914, 36658), (4914, 786), (4914, 161), (4914, 854)] file = open(answer_file, 'w') file.write('\n'.join([ '%d,%d,%.4f,%s' % (u, i, user_based_knn(model, NN, [data.new_user_idx(u)], [data.new_item_idx(i)], cosine, promote_users=True, normalize='centered')[0], data.title(i)) for (u, i) in inputs ])) file.close()
part_1_file = 'part_1.csv' part_2_file = 'part_2.csv' # part 1 data = DataIO() data.load(ratings_file) model = UserModel(normalize=False) model.build(data) given_users = data.translate_users(given_users) given_items = range(data.num_items()) R = user_based_knn(model, NN, given_users, given_items, pearson, promote_users=False) recs = top_ns(R, n, keep_order=True) file = open(part_1_file, 'w') file.write('\n'.join( ['%d %.3f' % (data.old_item_idx(i), s) for u in recs for (i, s) in u])) file.close() # part 2 R = user_based_knn(model, NN, given_users, given_items,
def test_user_knn(self): expected = np.matrix([[ 2.47335263, 2.72], [ 3.20666667, 5.34]]) self.assertTrue(stringify_matrix(user_based_knn(self.model, 30, [0,2], [2,3], cosine)) == stringify_matrix(expected))
def test_user_knn(self): expected = np.matrix([[2.47335263, 2.72], [3.20666667, 5.34]]) self.assertTrue( stringify_matrix( user_based_knn(self.model, 30, [0, 2], [2, 3], cosine)) == stringify_matrix(expected))