def evaluate(self, category_projection): assert type(category_projection) == CategoryProjection try: from astropy.stats import RipleysKEstimator except: raise Exception("Please install astropy") ripley_estimator = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.) proj = category_projection.projection[:, [category_projection.x_dim, category_projection.y_dim]] scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T radii = np.linspace(0, self.max_distance, 1000) deviances = np.abs(ripley_estimator(scaled_proj, radii, mode='ripley') - ripley_estimator.poisson(radii)) return np.trapz(deviances, x=radii)
def get_optimal_category_projection( corpus, n_dims=3, n_steps=10, projector=lambda n_terms, n_dims: CategoryProjector( AssociationCompactor(n_terms, scorer=RankDifference), projector=PCA(n_dims)), verbose=False): try: from astropy.stats import RipleysKEstimator except: raise Exception("Please install astropy") ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.) min_dev = None best_k = None best_x = None best_y = None best_projector = None for k in np.power( 2, np.linspace( np.log(corpus.get_num_categories()) / np.log(2), np.log(corpus.get_num_terms()) / np.log(2), n_steps)).astype(int): r = np.linspace(0, np.sqrt(2), 100) category_projector = projector(k, n_dims) category_projection = category_projector.project(corpus) for dim_1 in range(0, n_dims): for dim_2 in range(dim_1 + 1, n_dims): proj = category_projection.projection[:, [dim_1, dim_2]] scaled_proj = np.array( [stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T dev = np.sum( np.abs( ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r))) if min_dev is None or dev < min_dev: min_dev = dev best_k = k best_projector = category_projector best_x, best_y = (dim_1, dim_2) if verbose: print(k, dim_1, dim_2, dev, best_k, best_x, best_y, min_dev) if verbose: print(best_k, best_x, best_y) return best_projector.project(corpus, best_x, best_y)
def get_optimal_category_projection_by_rank( corpus, n_dims=2, n_steps=20, projector=lambda rank, n_dims: CategoryProjector( AssociationCompactorByRank(rank), projector=PCA(n_dims)), verbose=False): try: from astropy.stats import RipleysKEstimator except: raise Exception("Please install astropy") ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.) min_dev = None best_rank = None best_x = None best_y = None best_projector = None for rank in np.linspace(1, TermCategoryRanker().get_max_rank(corpus), n_steps): r = np.linspace(0, np.sqrt(2), 100) category_projector = projector(rank, n_dims) category_projection = category_projector.project(corpus) for dim_1 in range(0, n_dims): for dim_2 in range(dim_1 + 1, n_dims): proj = category_projection.projection[:, [dim_1, dim_2]] scaled_proj = np.array( [stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T dev = np.sum( np.abs( ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r))) if min_dev is None or dev < min_dev: min_dev = dev best_rank = rank best_projector = category_projector best_x, best_y = (dim_1, dim_2) if verbose: print('rank', rank, 'dims', dim_1, dim_2, 'K', dev) print(' best rank', best_rank, 'dims', best_x, best_y, 'K', min_dev) if verbose: print(best_rank, best_x, best_y) return best_projector.project(corpus, best_x, best_y)
def evaluate(self, category_projection): assert type(category_projection) == CategoryProjection proj = category_projection.projection[:, [ category_projection.x_dim, category_projection.y_dim ]] scaled_proj = np.array( [stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T morista_sum = 0 N = scaled_proj.shape[0] for i in range(self.num_bin_range[0], self.num_bin_range[1]): bins, _, _ = np.histogram2d(scaled_proj.T[0], scaled_proj.T[1], i) # I_M = Q * (\sum_{k=1}^{Q}{n_k * (n_k - 1)})/(N * (N _ 1)) Q = len(bins) # num_quadrants # Eqn 1. morista_sum += Q * np.sum(np.ravel(bins) * (np.ravel(bins) - 1)) / (N * (N - 1)) return morista_sum / (self.num_bin_range[1] - self.num_bin_range[0])
def get_optimal_category_projection( corpus, n_dims=3, n_steps=10, projector=lambda n_terms, n_dims: CategoryProjector( selector=AssociationCompactor(n_terms, scorer=RankDifference), projector=PCA(n_dims)), optimizer=ripley_poisson_difference, verbose=False): min_dev = None best_k = None best_x = None best_y = None best_projector = None for k in np.power( 2, np.linspace( np.log(corpus.get_num_categories()) / np.log(2), np.log(corpus.get_num_terms()) / np.log(2), n_steps)).astype(int): category_projector = projector(k, n_dims) category_projection = category_projector.project(corpus) for dim_1 in range(0, n_dims): for dim_2 in range(dim_1 + 1, n_dims): proj = category_projection.projection[:, [dim_1, dim_2]] scaled_proj = np.array( [stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T dev = optimizer(scaled_proj) #dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r))) if min_dev is None or dev < min_dev: min_dev = dev best_k = k best_projector = category_projector best_x, best_y = (dim_1, dim_2) if verbose: print(k, dim_1, dim_2, dev, best_k, best_x, best_y, min_dev) if verbose: print(best_k, best_x, best_y) return best_projector.project(corpus, best_x, best_y)
def get_optimal_category_projection_by_rank( corpus, n_dims=2, n_steps=20, projector=lambda rank, n_dims: CategoryProjector(AssociationCompactorByRank(rank), projector=PCA(n_dims)), verbose=False ): try: from astropy.stats import RipleysKEstimator except: raise Exception("Please install astropy") ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.) min_dev = None best_rank = None best_x = None best_y = None best_projector = None for rank in np.linspace(1, TermCategoryRanker().get_max_rank(corpus), n_steps): r = np.linspace(0, np.sqrt(2), 100) category_projector = projector(rank, n_dims) category_projection = category_projector.project(corpus) for dim_1 in range(0, n_dims): for dim_2 in range(dim_1 + 1, n_dims): proj = category_projection.projection[:, [dim_1, dim_2]] scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r))) if min_dev is None or dev < min_dev: min_dev = dev best_rank = rank best_projector = category_projector best_x, best_y = (dim_1, dim_2) if verbose: print('rank', rank, 'dims', dim_1, dim_2, 'K', dev) print(' best rank', best_rank, 'dims', best_x, best_y, 'K', min_dev) if verbose: print(best_rank, best_x, best_y) return best_projector.project(corpus, best_x, best_y)
def get_optimal_category_projection( corpus, n_dims=3, n_steps=10, projector=lambda n_terms, n_dims: CategoryProjector(AssociationCompactor(n_terms, scorer=RankDifference), projector=PCA(n_dims)), verbose=False ): try: from astropy.stats import RipleysKEstimator except: raise Exception("Please install astropy") ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.) min_dev = None best_k = None best_x = None best_y = None best_projector = None for k in np.power(2, np.linspace(np.log(corpus.get_num_categories()) / np.log(2), np.log(corpus.get_num_terms()) / np.log(2), n_steps)).astype(int): r = np.linspace(0, np.sqrt(2), 100) category_projector = projector(k, n_dims) category_projection = category_projector.project(corpus) for dim_1 in range(0, n_dims): for dim_2 in range(dim_1 + 1, n_dims): proj = category_projection.projection[:, [dim_1, dim_2]] scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r))) if min_dev is None or dev < min_dev: min_dev = dev best_k = k best_projector = category_projector best_x, best_y = (dim_1, dim_2) if verbose: print(k, dim_1, dim_2, dev, best_k, best_x, best_y, min_dev) if verbose: print(best_k, best_x, best_y) return best_projector.project(corpus, best_x, best_y)
def evaluate(self, category_projection): assert type(category_projection) == CategoryProjection try: from astropy.stats import RipleysKEstimator except: raise Exception("Please install astropy") ripley_estimator = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.) proj = category_projection.projection[:, [ category_projection.x_dim, category_projection.y_dim ]] scaled_proj = np.array( [stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T radii = np.linspace(0, self.max_distance, 1000) deviances = np.abs( ripley_estimator(scaled_proj, radii, mode='ripley') - ripley_estimator.poisson(radii)) return np.trapz(deviances, x=radii)