def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor, sigma:float = 1., eps:float = 0.05): ''' Helper function to preprocess data for block-sparse reduction of the Gaussian kernel Args: x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y) K_ij[LazyTensor_n] = symbolic representation of K(x,y) eps[float] = size for square bins Returns: K_ij[LazyTensor_n] = symbolic representation of K(x,y) with set sparse ranges ''' # class labels x_labels = grid_cluster(x, eps) y_labels = grid_cluster(y, eps) # compute one range and centroid per class x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels) y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels) # sort points x, x_labels = sort_clusters(x, x_labels) y, y_labels = sort_clusters(y, y_labels) # Compute a coarse Boolean mask: D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2) keep = D < (4 * sigma) ** 2 # self.sigma # mask -> set of integer tensors ranges_ij = from_matrix(x_ranges, y_ranges, keep) K_ij.ranges = ranges_ij # block-sparsity pattern return K_ij
def _Gauss_block_sparse_pre(self, x: np.array, y: np.array, K_ij: LazyTensor): ''' Helper function to preprocess data for block-sparse reduction of the Gaussian kernel Args: x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y) K_ij[LazyTensor_n] = symbolic representation of K(x,y) eps[float] = size for square bins Returns: K_ij[LazyTensor_n] = symbolic representation of K(x,y) with set sparse ranges ''' # labels for low dimensions if x.shape[1] < 4 or y.shape[1] < 4: x_labels = grid_cluster(x, self.eps) y_labels = grid_cluster(y, self.eps) # range and centroid per class x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels) y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels) else: # labels for higher dimensions x_labels, x_centroids = self._KMeans(x) y_labels, y_centroids = self._KMeans(y) # compute ranges x_ranges = cluster_ranges(x_labels) y_ranges = cluster_ranges(y_labels) # sort points x, x_labels = sort_clusters(x, x_labels) y, y_labels = sort_clusters(y, y_labels) # Compute a coarse Boolean mask: if self.kernel == 'rbf': D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :])**2, 2) elif self.kernel == 'exp': D = np.sqrt( np.sum((x_centroids[:, None, :] - y_centroids[None, :, :])**2, 2)) keep = D < (self.mask_radius)**2 # mask -> set of integer tensors ranges_ij = from_matrix(x_ranges, y_ranges, keep) K_ij.ranges = ranges_ij # block-sparsity pattern return K_ij
# into groups which should neither be too **small** (performances on clusters # with less than ~200 points each are suboptimal) # nor too **many** (the :func:`from_matrix() <pykeops.numpy.cluster.from_matrix>` # pre-processor can become a bottleneck when working with >2,000 clusters # per point cloud). # # In this tutorial, we use the :func:`grid_cluster() <pykeops.numpy.cluster.grid_cluster>` # routine which simply groups points into **cubic bins** of arbitrary size: from pykeops.numpy.cluster import grid_cluster eps = .05 # Size of our square bins Start = time.time() start = time.time() x_labels = grid_cluster(x, eps) # class labels y_labels = grid_cluster(y, eps) # class labels end = time.time() print("Perform clustering : {:.4f}s".format(end - start)) ########################################################################## # Once (integer) cluster labels have been computed, # we can compute the **centroids** and **memory footprint** of each class: from pykeops.numpy.cluster import cluster_ranges_centroids # Compute one range and centroid per class: start = time.time() x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels) y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels) end = time.time()
# **skip computations** between pairs of points that are far away from each other. # # As explained in :doc:`the documentation <../../python/sparsity>`, # fast GPU routines rely heavily on **memory contiguity**: # before going any further, we must # **sort our input dataset** to make sure that neighboring points are stored # next to each other on the device memory. As detailed in the # :doc:`KeOps+NumPy tutorial on block-sparse reductions <../../_auto_examples/numpy/plot_grid_cluster_numpy>`, # a simple way of doing so is to write: # Import the KeOps helper routines for block-sparse reductions: from pykeops.numpy.cluster import grid_cluster, cluster_ranges_centroids, sort_clusters, from_matrix # Put our points in cubic bins of size eps, as we compute a vector of class labels: eps = .05 x_labels = grid_cluster(x, eps) # Compute the memory footprint and centroid of each of those non-empty "cubic" clusters: x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels) # Sort our dataset according to the vector of labels: x, x_labels = sort_clusters(x, x_labels) ############################################################################# # # .. note:: # In higher-dimensional settings, the simplistic # :func:`grid_cluster <pykeops.numpy.cluster.grid_cluster>` # scheme could be replaced by a more versatile routine such as # our :doc:`KeOps+NumPy K-means implementation <../kmeans/plot_kmeans_numpy>`. # # Points are now roughly sorted # according to their locations, with each cluster corresponding to