def predict_fine(self, x, coarse_codes=None): """ Compute the fine codes for a datapoint. :param ndarray x: the point to code :param ndarray coarse_codes: the coarse codes for the point if they are already computed :returns tuple: a tuple of fine codes """ if coarse_codes is None: coarse_codes = self.predict_coarse(x) px = self.project(x, coarse_codes) fine_codes = [] for cx, split in iterate_splits(px, self.num_coarse_splits): # Get product quantizer parameters for this split _, _, _, subC = self.get_split_parameters(split) # Compute subquantizer codes fine_codes += [ predict_cluster(fx, subC[sub_split]) for fx, sub_split in iterate_splits(cx, self.num_fine_splits) ] return tuple(fine_codes)
def accumulate_covariance_estimators(data, C): """ Accumulate covariance estimators for each cluster with a pass through the data. :param ndarray data: NxD array - observations on the rows :param ndarray C: VxD array of cluster centroids :returns ndarray A: VxDxD array - total sum of residual outer products for each cluster :returns ndarray mu: VxD array of total sum of residuals per cluster :returns ndarray count: Vx1 array of cluster sizes :returns ndarray assignments: Nx1 array of cluster assignments :returns ndarray residuals: NxD array of data residuals """ V = C.shape[0] N = data.shape[0] D = data.shape[1] # Essential variables A = np.zeros( (V, D, D)) # accumulators for covariance estimator per cluster mu = np.zeros((V, D)) # residual means count = np.zeros(V, dtype=int) # count of points per cluster assignments = np.zeros(N, dtype=int) # point cluster assignments residuals = np.zeros( (N, D)) # residual for data points given cluster assignment # Iterate data points, accumulate estimators for i in np.arange(N): d = data[i] # Find cluster assignment and residual cluster = predict_cluster(d, C) centroid = C[cluster] residual = d - centroid assignments[i] = cluster # Accumulate estimators for covariance matrix for the assigned cluster mu[cluster] += residual count[cluster] += 1 A[cluster] += np.outer(residual, residual) residuals[i] = residual return A, mu, count, assignments, residuals
def predict_coarse(self, x): """ Compute the coarse codes for a datapoint. :param ndarray x: the point to code :returns tuple: a tuple of coarse codes """ return tuple([ predict_cluster(cx, self.Cs[split]) for cx, split in iterate_splits(x, self.num_coarse_splits) ])
with model: advi_mf = pm.ADVI() advi_mf.fit(10000, more_replacements={X_shared: X_minibatch}, obj_optimizer=pm.adagrad(learning_rate=1e-2)) fig = plt.figure() plt.plot(advi_mf.hist) plt.title("loss function") plt.savefig(out_path + "/" + "lossPlot.jpg") print("making prediction...") # Prediction y, point = predict_cluster(approx=advi_mf.approx, nsample=1000, X=X, model=model, xobs=xobs, K=K) nrows, ncols = img.shape[0], img.shape[1] segmented_img = np.zeros((nrows, ncols, D), dtype='int') cluster_reshape = y.reshape(nrows, ncols) for i in range(nrows): for j in range(ncols): cluster_number = cluster_reshape[i, j] segmented_img[i, j] = \ point['mu{0:d}'.format(cluster_number)].astype(int) fig = plt.figure() plt.imshow(segmented_img) plt.grid(None) plt.title("Segmented image using {0:d} clusters".format(K))
with model: advi_mf = pm.ADVI() advi_mf.fit(10000, more_replacements={X_shared: X_minibatch}, obj_optimizer=pm.adagrad(learning_rate=1e-2)) fig = plt.figure() plt.plot(advi_mf.hist) plt.title("loss function") plt.savefig(out_path + "/lossPlot.jpg") print("making prediction...") # Prediction y, point = predict_cluster(approx=advi_mf.approx, nsample=1000, X=X, model=model, K=K, cov="cov_diagonal") nrows, ncols = img.shape[0], img.shape[1] segmented_img = np.zeros((nrows, ncols, D), dtype='int') cluster_reshape = y.reshape(nrows, ncols) for i in range(nrows): for j in range(ncols): cluster_number = cluster_reshape[i, j] segmented_img[i, j] = \ point['mu{0:d}'.format(cluster_number)].astype(int) fig = plt.figure() plt.imshow(segmented_img) plt.grid(None) plt.title("Segmented image using {0:d} clusters".format(K))