def load(self, ratings, train, validation): self.train = train self.validation = validation self.num_users = movielens_extractor.get_num_users(ratings) self.num_items = movielens_extractor.get_num_items(ratings) self.train = train self.validation = validation self.mean_rating = np.mean(self.train[:, 2]) self.ratings_test = np.float64(validation[:, 2]) self.item_features = 0.1 * NormalRandom.generate_matrix( self.num_items, self.num_features) self.user_features = 0.1 * NormalRandom.generate_matrix( self.num_users, self.num_features) self.df_post_item = self.df_item + self.num_items self.df_post_user = self.df_user + self.num_users # num_user, num_item, ratings = build_ml_1m() self.matrix = build_rating_matrix(self.num_users, self.num_items, train) self.matrix = self.matrix.T self.counter_prob = 1 self.probe_rat_all = self.pred(self.item_features, self.user_features, self.validation, self.mean_rating) self.estimate()
def update_user_features(self): self.matrix = self.matrix.T # print('matrix', self.matrix.shape, self.matrix[0:10, 0:5]) # Gibbs sampling for user features for user_id in range(self.num_users): self.results_file.write('User %d\n' % (user_id + 1)) items = self.matrix[:, user_id] > 0.0 features = self.item_features[items, :] ratings = self.matrix[items, user_id] - self.mean_rating rating_len = len(ratings) ratings = np.reshape(ratings, (rating_len, 1)) covar = inv(self.alpha_user + self.beta * np.dot(features.T, features)) temp = self.beta * \ np.dot(features.T, ratings) + np.dot( self.alpha_user, self.mu_user) mean = np.dot(covar, temp) lam = cholesky(covar) temp_feature = mean + np.dot( lam, NormalRandom.generate_matrix(self.num_features, 1)) temp_feature = np.reshape(temp_feature, (self.num_features, )) self.user_features[user_id, :] = temp_feature self.results_file.write( 'user_features \t (%d,%d) \t %16.16f\n' % (self.user_features.shape[0], self.user_features.shape[1], self.user_features[0, 0]))
def sample_wishart(sigma, dof): ''' Returns a sample from the Wishart distn, conjugate prior for precision matrices. ''' n = sigma.shape[0] chol = np.linalg.cholesky(sigma).T rnd_matrix = NormalRandom.generate_matrix(dof, n) X = np.dot(rnd_matrix, chol) W = np.dot(X.T, X) return W
def udpate_item_features(self): self.matrix = self.matrix.T # Gibbs sampling for item features for item_id in range(self.num_items): self.results_file.write('Item %d\n' % (item_id + 1)) users = self.matrix[:, item_id] > 0.0 features = self.user_features[users, :] ratings = self.matrix[users, item_id] - self.mean_rating rating_len = len(ratings) ratings = np.reshape(ratings, (rating_len, 1)) covar = inv(self.alpha_item + self.beta * np.dot(features.T, features)) lam = cholesky(covar) temp = self.beta * \ np.dot(features.T, ratings) + np.dot( self.alpha_item, self.mu_item) mean = np.dot(covar, temp) temp_feature = mean + np.dot( lam, NormalRandom.generate_matrix(self.num_features, 1)) temp_feature = np.reshape(temp_feature, (self.num_features, )) self.item_features[item_id, :] = temp_feature
def update_user_params(self): x_bar = np.mean(self.user_features, 0).T x_bar = np.reshape(x_bar, (self.num_features, 1)) S_bar = np.cov(self.user_features.T) norm_X_bar = self.mu0_user - x_bar WI_post = inv(inv(self.WI_user) + self.num_users * S_bar + \ np.dot(norm_X_bar, norm_X_bar.T) * \ (self.num_users * self.beta_user) / (self.beta_user + self.num_users)) # Not sure why we need this... WI_post = (WI_post + WI_post.T) / 2.0 # update alpha_user self.alpha_user = sample_wishart(WI_post, self.df_post_user) # update mu_item mu_temp = (self.beta_user * self.mu0_user + self.num_users * x_bar) / \ (self.beta_user + self.num_users) lam = cholesky(inv( (self.beta_user + self.num_users) * self.alpha_user)) self.mu_user = mu_temp + np.dot( lam, NormalRandom.generate_matrix(self.num_features, 1))