def test_subsample(): """ Test subsampling """ # Sub-sampling 100 out of a random collection of 150 unit-vectors: bvecs = np.array([ozu.unit_vector(x) for x in np.random.randn(3,150)]) # The following runs through most of the module w/o verifying correctness: sub_sample = ozb.subsample(bvecs, 100) # optionally, you can provide elec_points as input. Here we test this with # the same points sub_sample = ozb.subsample(bvecs, 100, elec_points=ozu.get_camino_pts(100).T)
def test_subsample(): """ Test subsampling """ # Sub-sampling 100 out of a random collection of 150 unit-vectors: bvecs = np.array([ozu.unit_vector(x) for x in np.random.randn(3, 150)]) # The following runs through most of the module w/o verifying correctness: sub_sample = ozb.subsample(bvecs, 100) # optionally, you can provide elec_points as input. Here we test this with # the same points sub_sample = ozb.subsample(bvecs, 100, elec_points=ozu.get_camino_pts(100).T)
def _vec_handler(this_vec, figure, origin, tube_radius=None): """ Some boiler-plate used to plot any ol' vector with vector RGB coloring and tube-radius scaled by the magnitude of the vector """ xyz = this_vec.squeeze() if tube_radius is None: tube_radius = np.dot(xyz, xyz) r = np.abs(xyz[0])/np.sum(np.abs(xyz)) g = np.abs(xyz[1])/np.sum(np.abs(xyz)) b = np.abs(xyz[2])/np.sum(np.abs(xyz)) xyz = ozu.unit_vector(xyz)/4.0 maya.plot3d([origin[0], xyz[0]+origin[0]], [origin[1], xyz[1]+origin[1]], [origin[2], xyz[2]+origin[2]], tube_radius=tube_radius, tube_sides=20, figure=figure, color=(r, g, b))
def _vec_handler(this_vec, figure, origin, tube_radius=None): """ Some boiler-plate used to plot any ol' vector with vector RGB coloring and tube-radius scaled by the magnitude of the vector """ xyz = this_vec.squeeze() if tube_radius is None: tube_radius = np.dot(xyz, xyz) r = np.abs(xyz[0]) / np.sum(np.abs(xyz)) g = np.abs(xyz[1]) / np.sum(np.abs(xyz)) b = np.abs(xyz[2]) / np.sum(np.abs(xyz)) xyz = ozu.unit_vector(xyz) / 4.0 maya.plot3d([origin[0], xyz[0] + origin[0]], [origin[1], xyz[1] + origin[1]], [origin[2], xyz[2] + origin[2]], tube_radius=tube_radius, tube_sides=20, figure=figure, color=(r, g, b))
def spkm(data, k, weights=None, seeds=None, antipodal=True, max_iter=1000, calc_sse=True): """ Spherical k means. Parameters ---------- data : 2d float array Unit vectors on the hyper-sphere. This array has n data points rows, by m feature columns. k : int The number of clusters weights : 1d float array Some data-points may be more important than others, so they will receive more weighting in determining the centroids seeds : float array (optional). If n by k array is provided, these are used as centroids to initialize the algorithm. Otherwise, random centroids are chosen antipodal : bool In cases in which antipodal symmetry can be assumed, we want to cluster together points that are pointing close to *opposite* directions. In that case, correlations between putative centroids and each data point treats correlation and anti-correlation in equal vein. max_iter : int If you run this many iterations without convergence, warn and exit. calc_sse : bool Whether to calculate SSE or not. Returns ------- mu : the estimated centroid y_n : assignments of each data point to a centroid SSE : the sum of squared error in centroid-to-data-point assignment """ # 0. Preliminaries: # For the calculation of the centroids, we want to make sure that the data # are all pointing into the same hemisphere (expects 3 by n): data = ozu.vecs2hemi(data.T).T # If no weights are provided treat all data points equally: if weights is None: weights = np.ones(data.shape[0]) # 1. Initialization: if seeds is None: # Choose random seeds. # thetas are uniform [0,pi]: theta = np.random.rand(k) * np.pi # phis are uniform [0, 2pi] phi = np.random.rand(k) * 2 * np.pi # They're all unit vectors: r = np.ones(k) # et voila: seeds = np.array(geo.sphere2cart(theta, phi, r)).T mu = seeds.copy() is_changing = True last_y_n = False iter = 0 while is_changing: # Make sure they're all unit vectors, so that correlation below is scaled # properly: mu = np.array([ozu.unit_vector(x) for x in mu]) data = np.array([ozu.unit_vector(x) for x in data]) # 2. Data assignment: # Calculate all the correlations in one swoop: corr = np.dot(data, mu.T) # In cases where antipodal symmetry is assumed, if antipodal == True: corr = np.abs(corr) # This chooses the centroid for each one: y_n = np.argmax(corr, -1) # 3. Centroid estimation: for this_k in range(k): idx = np.where(y_n == this_k) if len(idx[0]) > 0: # The average will be based on the data points that are considered # in this centroid with a weighted average: this_sum = np.dot(weights[idx], data[idx]) # This goes into the volume of the sphere, so we renormalize to the # surface (or to the origin, if it's 0): this_norm = ozu.l2_norm(this_sum) if this_norm > 0: # Scale by the mean of the weights mu[this_k] = (this_sum / this_norm) * np.mean(weights[idx]) elif this_norm < 0: mu[this_k] = np.array([0, 0, 0]) # Did it change? if np.all(y_n == last_y_n): # 4. Stop if there's no change in assignment: is_changing = False else: last_y_n = y_n # Another stopping condition is if this has gone on for a while iter += 1 if iter > max_iter: is_changing = False # Once you are done computing 'em all, calculate the resulting SSE: SSE = 0 if calc_sse: for this_k in range(k): idx = np.where(y_n == this_k) len_idx = len(idx[0]) if len_idx > 0: scaled_data = data[idx] * weights[idx].reshape(len_idx, 1) SSE += np.sum((mu[this_k] - scaled_data) ** 2) return mu, y_n, SSE
def spkm(data, k, weights=None, seeds=None, antipodal=True, max_iter=1000, calc_sse=True): """ Spherical k means. Parameters ---------- data : 2d float array Unit vectors on the hyper-sphere. This array has n data points rows, by m feature columns. k : int The number of clusters weights : 1d float array Some data-points may be more important than others, so they will receive more weighting in determining the centroids seeds : float array (optional). If n by k array is provided, these are used as centroids to initialize the algorithm. Otherwise, random centroids are chosen antipodal : bool In cases in which antipodal symmetry can be assumed, we want to cluster together points that are pointing close to *opposite* directions. In that case, correlations between putative centroids and each data point treats correlation and anti-correlation in equal vein. max_iter : int If you run this many iterations without convergence, warn and exit. calc_sse : bool Whether to calculate SSE or not. Returns ------- mu : the estimated centroid y_n : assignments of each data point to a centroid SSE : the sum of squared error in centroid-to-data-point assignment """ # 0. Preliminaries: # For the calculation of the centroids, we want to make sure that the data # are all pointing into the same hemisphere (expects 3 by n): data = ozu.vecs2hemi(data.T).T # If no weights are provided treat all data points equally: if weights is None: weights = np.ones(data.shape[0]) # 1. Initialization: if seeds is None: # Choose random seeds. # thetas are uniform [0,pi]: theta = np.random.rand(k) * np.pi # phis are uniform [0, 2pi] phi = np.random.rand(k) * 2 * np.pi # They're all unit vectors: r = np.ones(k) # et voila: seeds = np.array(geo.sphere2cart(theta, phi, r)).T mu = seeds.copy() is_changing = True last_y_n = False iter = 0 while is_changing: # Make sure they're all unit vectors, so that correlation below is scaled # properly: mu = np.array([ozu.unit_vector(x) for x in mu]) data = np.array([ozu.unit_vector(x) for x in data]) # 2. Data assignment: # Calculate all the correlations in one swoop: corr = np.dot(data, mu.T) # In cases where antipodal symmetry is assumed, if antipodal == True: corr = np.abs(corr) # This chooses the centroid for each one: y_n = np.argmax(corr, -1) # 3. Centroid estimation: for this_k in range(k): idx = np.where(y_n == this_k) if len(idx[0]) > 0: # The average will be based on the data points that are considered # in this centroid with a weighted average: this_sum = np.dot(weights[idx], data[idx]) # This goes into the volume of the sphere, so we renormalize to the # surface (or to the origin, if it's 0): this_norm = ozu.l2_norm(this_sum) if this_norm > 0: # Scale by the mean of the weights mu[this_k] = (this_sum / this_norm) * np.mean(weights[idx]) elif this_norm < 0: mu[this_k] = np.array([0, 0, 0]) # Did it change? if np.all(y_n == last_y_n): # 4. Stop if there's no change in assignment: is_changing = False else: last_y_n = y_n # Another stopping condition is if this has gone on for a while iter += 1 if iter > max_iter: is_changing = False # Once you are done computing 'em all, calculate the resulting SSE: SSE = 0 if calc_sse: for this_k in range(k): idx = np.where(y_n == this_k) len_idx = len(idx[0]) if len_idx > 0: scaled_data = data[idx] * weights[idx].reshape(len_idx, 1) SSE += np.sum((mu[this_k] - scaled_data)**2) return mu, y_n, SSE