def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none", interactions_feature_weighting="none", **similarity_args): if interactions_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, interactions_feature_weighting)) if interactions_feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train.T).T self.URM_train = check_matrix(self.URM_train, 'csr') elif interactions_feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train.T).T self.URM_train = check_matrix(self.URM_train, 'csr') super().fit(topK=topK, shrink=shrink, similarity=similarity, normalize=normalize, feature_weighting=feature_weighting, **similarity_args)
def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none", **similarity_args): self.topK = topK self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'".format( self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') elif feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') similarity = Compute_Similarity(self.URM_train.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.W_sparse = similarity.compute_similarity() self.W_sparse = check_matrix(self.W_sparse, format='csr')
def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none", **similarity_args): self.topK = topK self.topComputeK = topK + len(self.cold_users) self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = okapi_BM_25(self.UCM_train) elif feature_weighting == "TF-IDF": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = TF_IDF(self.UCM_train) similarity = Compute_Similarity(self.UCM_train.T, shrink=shrink, topK=self.topComputeK, normalize=normalize, similarity=similarity, **similarity_args) self.W_sparse = similarity.compute_similarity() self.W_sparse = self.W_sparse.tocsc() for user in self.cold_users: self.W_sparse.data[self.W_sparse.indptr[user]:self.W_sparse. indptr[user + 1]] = 0 self.W_sparse.eliminate_zeros() self.W_sparse = self.W_sparse.tocsr() self.W_sparse = similarityMatrixTopK(self.W_sparse, k=self.topK).tocsr() self.W_sparse = check_matrix(self.W_sparse, format='csr') # Add identity matrix to the recommender self.recommender.W_sparse = self.recommender.W_sparse + sps.identity( self.recommender.W_sparse.shape[0], format="csr")
def precompute_best_item_indices(self, URM: sps.csr_matrix): URM = URM.copy() if self.feature_weighting == "BM25": URM = URM.astype(np.float32) URM = okapi_BM_25(URM) URM = check_matrix(URM, 'csr') elif self.feature_weighting == "TF-IDF": URM = URM.astype(np.float32) URM = TF_IDF(URM) URM = check_matrix(URM, 'csr') similarity = Compute_Similarity(URM, shrink=self.shrink, topK=self.topK, normalize=self.normalize, similarity="cosine") similarity_matrix = similarity.compute_similarity() self.sorted_indices = np.array( np.argsort(-similarity_matrix.todense(), axis=1))
def apply_feature_weighting(matrix, feature_weighting="none"): from course_lib.Base.IR_feature_weighting import okapi_BM_25, TF_IDF from course_lib.Base.Recommender_utils import check_matrix FEATURE_WEIGHTING_VALUES = ["BM25", "TF-IDF", "none"] if feature_weighting not in FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": matrix = matrix.astype(np.float32) matrix = okapi_BM_25(matrix) matrix = check_matrix(matrix, 'csr') elif feature_weighting == "TF-IDF": matrix = matrix.astype(np.float32) matrix = TF_IDF(matrix) matrix = check_matrix(matrix, 'csr') return matrix
def fit(self, topK=50, shrink=100, normalize=True, feature_weighting="none"): self.topK = topK self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train.T).T self.URM_train = check_matrix(self.URM_train, 'csr') elif feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train.T).T self.URM_train = check_matrix(self.URM_train, 'csr') denominator = 1 if shrink == 0 else shrink self.W_sparse = self.URM_train.T.dot( self.URM_train) * (1 / denominator) if self.topK >= 0: self.W_sparse = userSimilarityMatrixTopK(self.W_sparse, k=self.topK).tocsr() if normalize: self.W_sparse = normalize_sk(self.W_sparse, norm="l2", axis=1) self.W_sparse = check_matrix(self.W_sparse, format='csr')
def fit(self, user_topK=50, user_shrink=100, user_similarity_type='cosine', user_normalize=True, user_feature_weighting="none", user_asymmetric_alpha=0.5, item_topK=50, item_shrink=100, item_similarity_type='cosine', item_normalize=True, item_feature_weighting="none", item_asymmetric_alpha=0.5, interactions_feature_weighting="none"): if interactions_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, interactions_feature_weighting)) if interactions_feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') elif interactions_feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') # User Similarity Computation self.user_topK = user_topK self.user_shrink = user_shrink if user_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, user_feature_weighting)) if user_feature_weighting == "BM25": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = okapi_BM_25(self.UCM_train) elif user_feature_weighting == "TF-IDF": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = TF_IDF(self.UCM_train) kwargs = {"asymmetric_alpha": user_asymmetric_alpha} user_similarity_compute = Compute_Similarity( self.UCM_train.T, shrink=user_shrink, topK=user_topK, normalize=user_normalize, similarity=user_similarity_type, **kwargs) self.user_W_sparse = user_similarity_compute.compute_similarity() self.user_W_sparse = check_matrix(self.user_W_sparse, format='csr') # Item Similarity Computation self.item_topK = item_topK self.item_shrink = item_shrink if item_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, item_feature_weighting)) if item_feature_weighting == "BM25": self.ICM_train = self.ICM_train.astype(np.float32) self.ICM_train = okapi_BM_25(self.ICM_train) elif item_feature_weighting == "TF-IDF": self.ICM_train = self.ICM_train.astype(np.float32) self.ICM_train = TF_IDF(self.ICM_train) kwargs = {"asymmetric_alpha": item_asymmetric_alpha} item_similarity_compute = Compute_Similarity( self.ICM_train.T, shrink=item_shrink, topK=item_topK, normalize=item_normalize, similarity=item_similarity_type, **kwargs) self.item_W_sparse = item_similarity_compute.compute_similarity() self.item_W_sparse = check_matrix(self.item_W_sparse, format='csr')