def main(ratings_components=300, features_components=300, print_scores=False): np.random.seed(42) tf.set_random_seed(1984) data_path = '../data/goodbooks-10k/' book_features = get_book_features(get_book_dataframe(data_path)) reduced_item_features, _, _ = reduce_matrix(book_features, n_components=features_components) goodreads_path = '../data/goodbooks-10k/ratings.csv' amazon_path = '../data/amazon/ratings_amazon.csv' spr = get_ratings(goodreads_path, amazon_path, min_amazon_items=6) n_folds = 5 scores = np.zeros((n_folds, 2)) kf = ColumnwiseKFold(n_folds, random_seed=30) for i, (X, (user_indices, item_indices)) in enumerate(kf.split(spr)): _, _, rating_VT = reduce_matrix(X, n_components=ratings_components) reduced_item_ratings = rating_VT.T items = get_reduced_joint(reduced_item_ratings, reduced_item_features) tf.reset_default_graph() encoder = BookEncoder(user_input_dim=10000, book_input_dim=items.shape[1], user_hidden=150, book_hidden=150) with tf.Session() as sess: encoder.initialize(sess) encoder.train(sess, X, items) scores[i, :] = encoder.test(sess, spr, X, items, user_indices, item_indices) if print_scores: print_evaluation(scores[i, 0], scores[i, 1]) scores = np.mean(scores, axis=0) if print_scores: print('{0:d}-Fold Scores:'.format(n_folds)) print_evaluation(scores[0], scores[1]) return scores
def main(ratings_components=100, features_components=100, print_scores=False): #data_path = '../data/goodbooks-10k/' data_path = '../../goodbooks-10k/' book_features = get_book_features(get_book_dataframe(data_path)) reduced_item_features, _, _ = reduce_matrix( book_features, n_components=features_components) goodreads_path = data_path + 'ratings.csv' amazon_path = data_path + 'ratings_amazon.csv' spr = get_ratings(goodreads_path, amazon_path, min_amazon_items=6) n_folds = 5 scores = np.zeros((n_folds, 2)) kf = ColumnwiseKFold(n_folds, random_seed=30) for i, (X, (user_incides, item_indices)) in enumerate(kf.split(spr)): _, _, rating_VT = reduce_matrix(X, n_components=ratings_components) reduced_item_ratings = rating_VT.T items = get_reduced_joint(reduced_item_ratings, reduced_item_features) sim = (cosine_similarity(items) + 1) / 2 scores[i, :] = evaluate(spr, X, sim, user_incides, item_indices) if print_scores: print_evaluation(scores[i, 0], scores[i, 1]) scores = np.mean(scores, axis=0) if print_scores: print('{0:d}-Fold Scores:') print_evaluation(scores[0], scores[1]) return scores
def get_user_vector(user_input): try: q = np.load('../.tmp/user_'+user_input+'.npy') print('found user_vector...') return q except: # Set this to where you save and load all data data_path = '../../goodbooks-10k/' # Get dataframe from books books = get_book_dataframe(data_path) mapper = get_mapper(data_path + 'books.csv') # make an array for myself q = np.zeros((10000), dtype = np.int) # username = secret.USERNAME api_key = secret.API_KEY if not user_input.isdigit(): user_id = get_id_from_username(user_input, api_key) else: user_id = user_input if user_id is None: return None page = 1 while True: response = requests.get('https://www.goodreads.com/review/list/?v=2&id='+user_id+'&shelf=read&format=xml&key='+api_key+'&per_page=200&page=' + str(page)) tree = ElementTree.fromstring(response.content) reviews = tree.find('reviews') for review in reviews: goodreads_book_id = str(review.find('book').find('id').text) if goodreads_book_id in mapper: book_id = int(mapper[goodreads_book_id]) rating = int(review.find('rating').text) q[book_id-1] = float(rating) page += 1 print(len(reviews)) if len(reviews) < 1: break for i in range(len(q)): if q[i] != 0: title = books.iloc[i]['title'] print("%s --> %s" % (q[i], title)) # Turn 1-5 rating scale into negative - positive scale ratings_mapper = {0:0, 1:-2, 2:-1, 3:1, 4:2, 5:3} for i in range(len(q)): q[i] = ratings_mapper[q[i]] print('saving user_vector...') np.save('../.tmp/user_'+user_input, q) return q
def main(): """ Sample program to verify the code. This method will load in the book features, do some preprocessing, and use SVD to reduce it to 100 dimensions. It will then output the top 10 singular values. """ # Set this to where you save and load all data # data_path = '../data/goodbooks-10k/' data_path = '../../goodbooks-10k/' df = get_book_dataframe(data_path) fv = get_book_features(df) U, S, VT = reduce_matrix(fv, 100, random_state = 42) print(S[:10])
def main(): """ Sample program to verify the code. This method will load and join ratings """ # Set this to where you save and load all data # data_path = '../data/goodbooks-10k/' data_path = '../../goodbooks-10k/' goodreads_path = data_path + 'ratings.csv' amazon_path = data_path + 'ratings_amazon.csv' ratings = get_ratings(goodreads_path, amazon_path) book_features = get_book_features(get_book_dataframe(data_path)) joint = get_joint(ratings.T, book_features, 30, 30) print(joint.shape)