def create_user_vectors(self, alpha, epsilon=None, metric='log'): """ Create user vectors as a weighted average of the document vectors of the items they interacted with. The weight is the confidence of the interaction that is computed using the log, or lin metric. Attributes: alpha, epsilon : hyperparameters for computing the confidence metric : metric for computing the confidence (log, or lin) """ sparse = make_sparse(self.dataframe.df, self.url_2_id, self.uid_2_id) if metric == 'log': sparse = sparse.to_coo(alpha=alpha, epsilon=epsilon, metrics='log') elif metric == 'lin': sparse = sparse.to_coo(alpha=alpha, metrics='lin') sparse = sparse.toarray() confidence_sums = sparse.sum(axis=1) confidence_sums = confidence_sums.reshape((len(confidence_sums), 1)) # ommit zero division :) confidence_sums[confidence_sums == 0] = 1 user_matrix = sparse.dot(self.doc_vectors) user_matrix = user_matrix / confidence_sums self.__user_vectors = user_matrix
def train(self, train_df, factors=None, iterations=None, alpha=None, epsilon=None, metric='log'): print("##################### ALS model #####################") self.__train_df = train_df print("[1] Creating sparse interaction matrix") sparse = make_sparse(train_df, self.__url_2_id, self.__uid_2_id, users_in_rows=False) if metric == 'log': sparse = sparse.to_coo(metrics='log', alpha=alpha, epsilon=epsilon) elif metric == 'lin': sparse = sparse.to_coo(metrics='lin', alpha=alpha) elif metric == 'bin': sparse = sparse.to_coo(metrics='bin') else: return sparse = sparse.tocsc() self.__model = implicit.als.AlternatingLeastSquares( factors=factors, iterations=iterations, num_threads=0) print("\n[2] Fitting the matrix to the model") self.__model.fit(sparse) self.__user_factors = self.model.user_factors self.__item_factors = self.model.item_factors print("\n[DONE] ALS successfull")
def create_user_vectors_bin(self): """ Creates user vectors as a sum of their document vectors, meaning document vectors of items the interacted with. """ sparse = make_sparse(self.dataframe.df, self.url_2_id, self.uid_2_id) sparse = sparse.to_coo(metrics='bin') sparse = sparse.toarray() self.__user_vectors = sparse.dot(self.doc_vectors)
def train(self, train_df, alpha=None, epsilon=None, min_interactions=1, metric='bin'): print("##################### SVD model #####################") self.__train_df = train_df if min_interactions != 1: train_df = helpers.df_min_interactions(df=train_df, min_interactions=min_interactions) print("[1] Making sparse interaction matrix") sparse = make_sparse(df=train_df, url_2_id=self.url_2_id, uid_2_id=self.uid_2_id) if metric == 'log': sparse = sparse.to_coo(metrics='log', alpha=alpha, epsilon=epsilon) elif metric == 'bin': sparse = sparse.to_coo(metrics='bin') elif metric == 'lin': sparse = sparse.to_coo(metrics='lin', alpha=alpha) print("[2] Performing SVD on interaction matrix") self.__u, self.__s, self.__vh = np.linalg.svd(sparse.toarray(), full_matrices=False) print("[DONE] SVD successfull")
def mse(self, test_df): iteration = 0 mse_sum = 0 print('[1] Creating sparse matrix for whole dataset') sparse = make_sparse(df=test_df, url_2_id=self.model.url_2_id, uid_2_id=self.model.uid_2_id) sparse = sparse.to_coo() sparse = sparse.toarray() test_df = test_df.groupby('uid')['page_url'].unique() bar = Bar('Processing', max=test_df.shape[0]) for uid, urls in test_df.items(): for url in urls: real = sparse[self.model.uid_2_id[uid]][ self.model.url_2_id[url]] score = self.model.predict(uid, url) mse_ui = real - score mse_ui = mse_ui**2 mse_sum += mse_ui iteration += 1 bar.next() bar.finish() return mse_sum / iteration