def maximin_design_obj(y, vert=None): Ny, n = vert.shape N = y.size / n Y = y.reshape((N, n)) D0 = distance_matrix(Y, Y) + 1e4*np.eye(N) D1 = distance_matrix(Y, vert) return -np.amin(np.hstack((D0.flatten(), D1.flatten())))
def _maximin_design_obj(y, vert=None): """ Objective function for the maximin design optimization. :param ndarray y: Contains the coordinates of the points in the design. If there are N points in n dimensions then `y` is shape ((Nn, )). :param ndarray vert: Contains the fixed vertices defining the zonotope. **Notes** This function returns the minimum squared distance between all points in the design and between points and vertices. """ Ny, n = vert.shape N = y.size / n Y = y.reshape((N, n)) # get minimum distance among points D0 = distance_matrix(Y, Y) + 1e5*np.eye(N) d0 = np.power(D0.flatten(), 2) d0star = np.amin(d0) # get minimum distance between points and vertices D1 = distance_matrix(Y, vert) d1 = np.power(D1.flatten(), 2) d1star = np.amin(d1) dstar = np.amin([d0star, d1star]) return -dstar
def euclDist_infl(subject): import numpy as np import nibabel.freesurfer.io as fs from scipy.spatial import distance_matrix fsDir = '/afs/cbs.mpg.de/projects/mar004_lsd-lemon-preproc/freesurfer' surfDir = '/afs/cbs.mpg.de/projects/mar005_lsd-lemon-surf/probands' for hemi in ['lh', 'rh']: # fsaverage5 coords on sphere fsa5_sphere_coords = fs.read_geometry('%s/fsaverage5/surf/%s.sphere' % (fsDir, hemi))[0] cort = fs.read_label('%s/fsaverage5/label/%s.cortex.label' % (fsDir, hemi)) # get corresponding nodes on subject sphere (find coords of high-dim subject surface closest to fsa5 nodes in sphere space) subj_sphere_coords = fs.read_geometry('%s/%s/surf/%s.sphere' % (fsDir, subject, hemi))[0] subj_indices = [] for node in cort: dist2all = np.squeeze(distance_matrix(np.expand_dims(fsa5_sphere_coords[node], axis=0), subj_sphere_coords)) subj_indices.append(list(dist2all).index(min(dist2all))) # pair-wise euclidean distance between included nodes on subject surface (midline) subj_surf_coords = fs.read_geometry('%s/%s/surf/%s.inflated' % (fsDir, subject, hemi))[0] euclDist = np.zeros((10242,10242)) euclDist[np.ix_(cort, cort)] = distance_matrix(subj_surf_coords[subj_indices,:],subj_surf_coords[subj_indices,:]) np.save('%s/%s/distance_maps/%s_%s_euclDist_inflated_fsa5' % (surfDir, subject, subject, hemi), euclDist)
def test_distance_matrix_looping(): m = 10 n = 11 k = 4 xs = np.random.randn(m,k) ys = np.random.randn(n,k) ds = distance_matrix(xs,ys) dsl = distance_matrix(xs,ys,threshold=1) assert_equal(ds,dsl)
def _update_point_movement(self, points): update_points = points is not self.last_points update_control_points = self.control_points_need_update if update_points or update_control_points: if update_control_points: self.control_points[:, 0] = self.parameter[0 + self.parameter_separation_index::3] self.control_points[:, 1] = self.parameter[1 + self.parameter_separation_index::3] self.control_points[:, 2] = self.parameter[2 + self.parameter_separation_index::3] self.K = self.kernel_function(distance_matrix(self.control_points, self.control_points)) self.control_points_need_update = False self.last_points = points self.last_distance_matrix = distance_matrix(points, self.control_points) self.last_kernel_matrix = self.kernel_function(self.last_distance_matrix ** 2) self.kernel_deriv_matrix_needs_update = True
def create_dataset_artificial(size1, size2, same=True, sigma1=None, sigma2=None, verbose=False): """This function creates two adjacency matrices graphs whose respective number of nodes is size1 and size2, respectively. The graphs refer to 2D clouds of point where the edges, i.e. the values of the adjacency matrices, are similarities between points defined as s(x1, x2) = exp(-d(x1,x2)**2 / sigma**2) where d() is the Euclidean distance and sigma is either provided by the user or defined as the median distance between the points. If 'same' is True, then the smaller cloud of points is a subset of the larger cloud, i.e. the corresponding graphs have a perfect subgraph match. """ print("Dateset creation.") if same: X = np.random.rand(max([size1, size2]), 2) X1 = X[:size1] X2 = X[:size2] dm = distance_matrix(X, X) dm1 = dm[:size1, :size1] dm2 = dm[:size2, :size2] sigma = np.median(dm[np.triu_indices(dm.shape[0], 1)]) if sigma1 is None: sigma1 = sigma if sigma2 is None: sigma2 = sigma else: X1 = np.random.rand(size1, 2) X2 = np.random.rand(size2, 2) dm1 = distance_matrix(X1, X1) dm2 = distance_matrix(X2, X2) if sigma1 is None: sigma1 = np.median(dm1[np.triu_indices(size1, 1)]) if sigma2 is None: sigma2 = np.median(dm2[np.triu_indices(size2, 1)]) if verbose: print("create_dataset_artificial: sigma1=%s ,sigma2=%s" % (sigma1, sigma2)) A = np.exp(- dm1 * dm1 / (sigma1 ** 2)) B = np.exp(- dm2 * dm2 / (sigma2 ** 2)) return A, B, X1, X2
def setUp(self): self._num_points = 10 self._pop_size = 5 gen = TSPGenerator(self._num_points) self._data = gen.generate() self._distances = distance_matrix(self._data, self._data)
def _kmeans(data, threshold, centroids, verbose): """\ The *raw* version of k-means. """ # initialize J Jprev = inf # initialize iteration count iter = 0 # iterations while True: # calculate the distance from x to each centroids dist = distance_matrix(data, centroids) # assign x to nearst centroids labels = dist.argmin(axis=1) # re-calculate each center for j in range(len(centroids)): idx_j = (labels == j).nonzero() centroids[j] = data[idx_j].mean(axis=0) # calculate J # Note, if you would like to compare the J here to that # of k-medoids, here should be # (((...).sum(axis=1))**0.5).sum() J = ((data-centroids[labels])**2).sum() iter += 1 if verbose: print '[kmeans] iter %d (J=%.4f)' % (iter, J) if Jprev-J < threshold: break Jprev = J return centroids, labels, J
def predictedPoint(self, x, y, model, coords, values, invg): """Prediction of the Big Kriging for a point \o/ Parameters ---------- x, y : floats coordinates of the desired predicted point model : Model what model to use (and not your favorite color!) coords : ndarray original grid coordinates values : ndarray original grid values, ordered like coords invg : the resulting inverse gamma matrix based on model and coords Returns ---------- array(x,y,v,e) x, y : coordinates of the desired predicted point v : the predicted value e : the standard error """ dist = spatial.distance_matrix(coords, [[x, y],]) gg = np.matrix( np.vstack([model.func(dist), [1,]]) ) weights = invg*gg v = np.sum( values[:, np.newaxis]*np.asarray(weights[:-1]) ) e = np.sqrt( abs(np.sum(gg.A1*weights.A1)) ) return np.asarray([x, y, v, e])
def _maximin_design_grad(y, vert=None): """Gradient of objective function for the maximin design optimization. Parameters ---------- y : ndarray contains the coordinates of the points in the design. If there are N points in n dimensions then `y` is shape ((Nn, )). vert : ndarray contains the fixed vertices defining the zonotope """ Ny, n = vert.shape v = vert.reshape((Ny*n, )) N = y.size / n Y = y.reshape((N, n)) # get minimum distance among points D0 = distance_matrix(Y, Y) + 1e5*np.eye(N) d0 = np.power(D0.flatten(), 2) d0star, k0star = np.amin(d0), np.argmin(d0) # get minimum distance between points and vertices D1 = distance_matrix(Y, vert) d1 = np.power(D1.flatten(), 2) d1star, k1star = np.amin(d1), np.argmin(d1) g = np.zeros((N*n, )) if d0star < d1star: dstar, kstar = d0star, k0star istar = kstar/N jstar = np.mod(kstar, N) for k in range(n): g[istar*n + k] = 2*(y[istar*n + k] - y[jstar*n + k]) g[jstar*n + k] = 2*(y[jstar*n + k] - y[istar*n + k]) else: dstar, kstar = d1star, k1star istar = kstar/Ny jstar = np.mod(kstar, Ny) for k in range(n): g[istar*n + k] = 2*(y[istar*n + k] - v[jstar*n + k]) return -g
def covariance(X, Z, h): '''This function computes the covariance matrix with a guassian kernel between the two matrices. Input: two matrices, and bandwidth(h) Output: covariance matrix.''' d = spatial.distance_matrix(X,Z) K = np.exp(-(d**2) / (2*h*h)) return K
def _choose_bf_metering_pos(positions): # find the position which has the smallest distance to its 8 closest neighbors, # because that position is likely right in the middle pos_names, pos_values = zip(*positions.items()) xys = numpy.array(pos_values)[:,:2] distances = spatial.distance_matrix(xys, xys) distances.sort(axis=1) distance_sums = distances[:,:8].sum(axis=1) return pos_names[distance_sums.argmin()]
def setUp(self): self._num_points = 10 self._pop_size = 20 gen = TSPGenerator(self._num_points) self._data = gen.generate() self._distances = distance_matrix(self._data, self._data) popGen = SimplePopulationGenerator(self._pop_size) self._population = popGen.generate(self._distances[0])
def getDistances(comparisonSet,data): answer = [] for i in range(len(data)): searchSet = getNeighbours(data[0][i],data[1][i],comparisonSet,searchRadius) dist=spatial.distance_matrix(searchSet,data[i]) dist0 = dist[:,0] dist0.sort() dist0 = dist0[dist0<searchRadius] answer.append(dist0) return answer
def test_distance_matrix(): m = 10 n = 11 k = 4 xs = np.random.randn(m,k) ys = np.random.randn(n,k) ds = distance_matrix(xs,ys) assert_equal(ds.shape, (m,n)) for i in range(m): for j in range(n): assert_almost_equal(distance(xs[i],ys[j]),ds[i,j])
def __init__(self, control_points, kernel_function): self.control_points = control_points self.kernel_function = kernel_function self.K = self.kernel_function(distance_matrix(self.control_points, self.control_points)) self.last_points = None self.kernel_deriv_matrix_needs_update = True self._identity = numpy.zeros(len(control_points) * 3) self.parameter = self.identity.copy() self._bounds = numpy.c_[self.identity, self.identity] self._bounds[:, 0] = -self.kernel_function.support self._bounds[:, 1] = +self.kernel_function.support
def _sample_one_more(X, box, r): """ Sample one more atom. """ if X.shape[0] == 0: return box[:, 0] + (box[:, 1] - box[:, 0]) * np.random.rand(1, 3) while True: x = box[:, 0] + (box[:, 1] - box[:, 0]) * np.random.rand(1, 3) d = spt.distance_matrix(X, x) if (d > 2.0 * r).all(): return x
def create_dataset_artificial(size1, size2, same=True): print("Dateset creation.") if same: X = np.random.rand(max([size1, size2]), 2) X1 = X[:size1] X2 = X[:size2] dm = distance_matrix(X, X) dm1 = dm[:size1, :size1] dm2 = dm[:size2, :size2] sigma1 = sigma2 = np.median(dm) else: X1 = np.random.rand(size1, 2) X2 = np.random.rand(size2, 2) dm1 = distance_matrix(X1, X1) dm2 = distance_matrix(X2, X2) sigma1 = np.median(dm1) sigma2 = np.median(dm2) A = np.exp(- dm1 * dm1 / (sigma1 ** 2)) B = np.exp(- dm2 * dm2 / (sigma2 ** 2)) return A, B
def _farthest_points(points): points = numpy.asarray(points) bbox_lower_left = points.min(axis=0) lower_left = numpy.linalg.norm(points - bbox_lower_left, axis=1).argmin() selected = [lower_left] dist = spatial.distance_matrix(points, points) for _ in range(len(points) - 1): dist_to_selected = dist[selected] dist_to_nearest_selected = dist_to_selected.min(axis=0) farthest_from_selected = dist_to_nearest_selected.argmax() selected.append(farthest_from_selected) return selected
def __init__(self, control_points, kernel_function): self.control_points = control_points self.control_points_need_update = False self.parameter_separation_index = len(control_points) * 3 self.kernel_function = kernel_function self.K = self.kernel_function(distance_matrix(self.control_points, self.control_points)) self.last_points = None self.kernel_deriv_matrix_needs_update = True self._identity = numpy.zeros(len(control_points) * 6) self._identity[3 * len(control_points):] = self.control_points.ravel() self._parameter = self.identity.copy() self._parameter.flags.writeable = False
def find_outliers_all(self): distances_matrix = spsp.distance_matrix(self.points, self.points) outliers = [] distances_vector = ma.masked_array(np.sum(distances_matrix, axis=1)) for out in range(self.n_of_outliers): outlier = distances_vector.argmax() logging.debug("%d of %d", self.n_of_outliers, out) outliers.append(outlier) distances_vector -= distances_matrix[:, outlier] distances_vector[outlier] = ma.masked return outliers
def add_periodic_connections(self, pores1, pores2, apply_label='periodic'): r""" Accepts two sets of pores and connects them with new throats. The connections are determined by pairing each pore in ``pores1`` with its nearest pore in ``pores2``. For cubic Networks this will create pairings with pores directly across the domain from each other, assuming the input pores are 2D co-planar sets of pores. Parameters ---------- pores_1 and pores_2 : array_like Lists of pores on the opposing faces which are to be linked to create periodicity. apply_label = string The label to apply to the newly created throats. The default is 'periodic'. Notes ----- This method will raise an exception if the input pores do not create fully unique pairs. Specifically, the length of pore_1 and pores_2 must be the same AND each pore in pores_1 must pair up with one and only one pore in pores_2, and vice versa. If these conditions are not met then periodicity cannot be acheived, and an exception is raised. """ logger.debug('Creating periodic pores') if sp.shape(pores1)[0] != sp.shape(pores2)[0]: raise Exception('Unequal length inputs, periodicity not possible') p1 = self['pore.coords'][pores1] p2 = self['pore.coords'][pores2] dist_mat = sptl.distance_matrix(p1, p2) dist_min = sp.amin(dist_mat, axis=1, keepdims=True) [a, b] = sp.where(dist_mat == dist_min) pairs = sp.vstack([pores1[a], pores2[b]]).T # Confirm that each pore in each list is only paired up once temp_1 = sp.unique(pairs[:, 0]) if sp.shape(temp_1) < sp.shape(pores1): raise Exception('Non-unique pairs found, periodicity not met') temp_2 = sp.unique(pairs[:, 1]) if sp.shape(temp_2) < sp.shape(pores2): raise Exception('Non-unique pairs found, periodicity not met') # Add throats to the network for the periodic connections self.extend(throat_conns=pairs, labels=apply_label) # Create a list which pores are connected which self['pore.periodic_neighbor'] = sp.nan self['pore.periodic_neighbor'][pairs[:, 0]] = pairs[:, 1] self['pore.periodic_neighbor'][pairs[:, 1]] = pairs[:, 0] logger.info('Periodic boundary pores added successfully')
def remove_outliers(self): global redetect points = self.new_points.reshape(-1,2) dist_matrix = distance_matrix(points, points, p=2) points = map(list, [p for p in points]) sum_of_dist = sum(dist_matrix) good_points = [ abs(sum_of_dist - np.mean(sum_of_dist)) < 2*np.std(sum_of_dist)] for p,g in zip(points, good_points[0]): if not g: points.remove(p) if len(points) < 20: redetect = True self.new_points = np.array(points).reshape(-1, 1, 2)
def cluster_normals(normals, clusters): height=normals.shape[0]; width=normals.shape[1]; nb_clusters = clusters.shape[0]; classif_normals = np.zeros((height, width, nb_clusters)); for l in range(height): for c in range(width): #compute all distances dist = spatial.distance_matrix(clusters, np.reshape(normals[l, c, :],(1,3))); #min3=[0,0,0] #find the min classif_normals[l,c,np.argmin(dist)] = 1; return classif_normals
def getDSorted(self, doSort = True): idx = self.getFeaturesIdx() #If the features have been changed since last time an update is needed if not np.array_equal(idx, self.lastIdx): self.lastIdx = idx; self.D = np.array([]) #Find the first "densityNPoints" points in ascending order of max neighborhood point if len(self.D) == 0: tic = time.time() self.D = spatial.distance_matrix(self.OrigDelaySeries[:, idx], self.OrigDelaySeries[:, idx]) toc = time.time() print "Elapsed distance matrix computation time = %g"%(toc - tic) if len(self.DSorted) == 0 and doSort: tic = time.time() self.DSorted = np.sort(self.D, 0) toc = time.time() print "Elapsed sorting time = %g"%(toc - tic)
def evaluate_emulator(x, emulator, cov, cov_args=(), cov_kwargs={}): ''' Evaluates emulator at given point or sequence of points Arguments --------- x : ndarray Array of length d or of dimension d x m, with each column containing a point at which to evaluate the emulator. emulator : dict Dictionary as output by build_emulator containing grid and v. cov : function Covariance function for Gaussian process. Must accept ndarray of distances as first argument and return an ndarray of the same dimension. Called as cov(dm, *cov_args, **cov_kwargs). cov_args : tuple Tuple of additional positional arguments for cov. cov_kwargs : tuple Dictionary of additional kw arguments for cov. Returns ------- f_hat : ndarray Array of size k x m containing estimated values of function. ''' # Convert x to matrix if needed if not type(x) is np.ndarray: x = np.array(x) if len(x.shape) < 2: x = x[:, np.newaxis] # Evaluate distances between x and grid C = spatial.distance_matrix(x.T, emulator['grid']) C = cov(C, *cov_args, **cov_kwargs) # Estimate function values at x f_hat = np.dot(emulator['v'].T, C.T) # Add linear term if needed if emulator['slope_mean'] is not None: f_hat += np.dot(emulator['slope_mean'], (x.T - emulator['center']).T) if x.shape[1] < 2: f_hat = f_hat[:, 0] return f_hat
def project_on_grid(points, grid): """ Project points on a grid Parameters: ----------- points : ndarray (N,) grid : ndarray (M,) Returns: ------- idx: ndarray (N,) grid indices closest to given points grid_val: ndarray (N,) grid values closest to given points """ d = distance_matrix(np.array([points]).T,np.array([grid]).T) idx = np.argmin(d,axis=1) return idx, grid[idx]
def get_vertices_at_intersections(normals, offsets, ceiling_height): """Returns a dict of vertices and normals for each surface intersecton of walls given by the Nx3 arrays of normals and offsets.""" from scipy import spatial # Calculate d in equation ax + by + cz = d dd = np.sum(normals * offsets, axis=1) # Automatically Separate out the floor from the walls. floor_idx = normals[:, 1].argsort()[-1] wall_normals, wall_d = np.delete(normals, floor_idx, axis=0), np.delete(dd, floor_idx) floor_normal, floor_d = normals[floor_idx, :], dd[floor_idx] # Get neighbors between all walls (excluding the floor, which touches everything.) distances = spatial.distance_matrix(wall_normals, wall_normals) + (3 * np.eye(wall_normals.shape[0])) neighboring_walls = np.sort(distances.argsort()[:, :2]) # Get the two closest wall indices to each wall neighbors = {dd: el.tolist() for (dd, el) in enumerate(neighboring_walls)} # Solve for intersection between the floor/ceiling and adjacent walls, vertices = {wall: [] for wall in range(len(neighbors))} floor_verts = [] for wall in neighbors: for adj_wall in neighbors[wall]: for normal, d in ((floor_normal, floor_d), (np.array([0., 1., 0.]), ceiling_height)): all_norms = np.vstack((wall_normals[wall], wall_normals[adj_wall], normal)) all_d = np.array((wall_d[wall], wall_d[adj_wall], d)) vertex = np.linalg.solve(all_norms, all_d).transpose() vertices[wall].append(vertex) if d < ceiling_height and vertex.tolist() not in floor_verts: floor_verts.append(vertex.tolist()) # Convert vertex lists to dict of NumPy arrays vertices = {key: np.array(value) for key, value in vertices.items()} vertices[len(vertices)] = np.array(floor_verts) norms = {key: np.array(value) for key, value in enumerate(wall_normals)} norms[len(norms)] = np.array(floor_normal) return vertices, norms
def create_mock_data(data_dir, tracab_id, params, event_telemetry, telemetry_map, video_metadata): start_frame = params[0] end_frame = params[1] xmin = params[2] xmax = params[3] ymin = params[4] ymax = params[5] num_x_grid = params[6] num_y_grid = params[7] tracking_initial_frame = event_telemetry.metadata['initial_frame'] video_initial_frame = int(video_metadata['MediaproPanaMetaData']['match']['videofile']['start']['@iFrame']) global_zero_frame = max(video_initial_frame, tracking_initial_frame) xx = np.linspace(xmin, xmax, num_x_grid) yy = np.linspace(ymin, ymax, num_y_grid) XX, YY = np.meshgrid(xx, yy) S = np.array(list(zip(XX.flatten(), YY.flatten()))) output_dir = os.path.join(data_dir, tracab_id, "mock") if not os.path.exists(output_dir): os.mkdir(output_dir) for frame in range(start_frame, end_frame): adjusted_frame = frame + global_zero_frame - tracking_initial_frame positions = get_positions(adjusted_frame, telemetry_map) D = distance_matrix(S, positions) NN_indices = D.argmin(axis=0) # Store grid values Z = np.zeros(len(S)) Z[NN_indices] = 1 # Save results with open(os.path.join(output_dir, "proba-f{}.dat".format(adjusted_frame)), "w") as f: f.write("\n".join(["{} {}".format(a, b) for a, b in zip(range(len(Z)), Z)]))
def plot_nearest_words(word, k=20): """Summary Parameters ---------- word : TYPE Description k : int, optional Description """ # Get distances to target word target_vec = wordvecs[word2id[word]] dists = [] for vec_i in wordvecs: dists.append(distance.cosine(target_vec, vec_i)) idxs = np.argsort(dists) labels = [words[idx_i] for idx_i in idxs[:k]] vecs = [wordvecs[idx_i] for idx_i in idxs[:k]] dm = distance_matrix(vecs, vecs) fig, axs = plt.subplots(1, 2, figsize=(10, 4)) # Create distance matrix axs[0].imshow(dm) axs[0].set_xticks(range(len(labels))) axs[0].set_yticks(range(len(labels))) axs[0].set_xticklabels(labels, rotation='vertical') axs[0].set_yticklabels(labels) # Center the distance matrix dm = dm / np.mean(dm, axis=0, keepdims=True) # Plot data points in reduced dimensionality using principal components # of the distance matrix res = PCA(2).fit_transform(dm) pc1, pc2 = res[:, 0], res[:, 1] axs[1].scatter(pc1, pc2) for i in range(len(labels)): axs[1].text(pc1[i], pc2[i], labels[i])
def main(): if len(sys.argv) <= 1: sys.argv.extend( " -f ../../cross_validation/try1.csv -d ../../filtered_data_sets/CDR3_from_celiac_trim_3_4_with_labels_unique_sequences_Celiac_model_April_2020_FILTERED_DATA_1K_per_subject.csv -v ../../vectors/CDR3_from_celiac_trim_3_4_with_labels_unique_sequences_Celiac_model_April_2020_VECTORS_1K_per_subject.csv -of ../../cross_validation/ -od try1K_TRAIN_0" .split()) parser = argparse.ArgumentParser() parser.add_argument( '-f', '--features_list', help= 'feature list file, contains the list of relevent features, including feature center and maximal distance from it' ) parser.add_argument('-d', '--data_file_path', help='the filtered data file path') parser.add_argument('-v', '--vectors_file_path', help='the vectors file path') parser.add_argument('-of', '--output_folder_path', help='Output folder for the feature table') parser.add_argument('-od', '--output_description', help='description to use inside output file names') parser.add_argument( '-s', '--subject_col_name', help='subject column name in data file, default "FILENAME"', default='FILENAME', type=str) parser.add_argument( '-l', '--labels_col_name', help='labels column name in data file, default "labels"', default='labels', type=str) args = parser.parse_args() if not (os.path.isfile(args.features_list)): print( 'feature list file error, make sure file path exists\nExiting...') sys.exit(1) if not (os.path.isfile(args.data_file_path)): print('feature file error, make sure file path exists\nExiting...') sys.exit(1) if not (os.path.isfile(args.vectors_file_path)): print('vectors file error, make sure file path exists\nExiting...') sys.exit(1) # load files feature_list = pd.read_csv(args.features_list, index_col=0) data_file = pd.read_csv(args.data_file_path) vectors_file = pd.read_csv(args.vectors_file_path) if not args.labels_col_name in data_file.columns: print( f'label "{args.labels_col_name}" column name doesnt exist in data file.\nExiting...' ) sys.exit(1) if not args.subject_col_name in data_file.columns: print( f'"{args.subject_col_name}" column name doesnt exist in data file.\nExiting...' ) sys.exit(1) if not 'feature_index' in feature_list.columns: print( f'"feature list file error, no "feature index" column. please check.\n exiting...' ) sys.exit(1) else: print(f'feature indexes: {feature_list.index}') features_table = pd.DataFrame( 0, index=pd.unique(data_file[args.subject_col_name]), columns=feature_list['feature_index'] ) #define an empty matrix, each raw is a subject, each column is a feature (cluster) by_subject = data_file.groupby(args.subject_col_name) sub_num = 0 for subject, frame in by_subject: # for each subject sub_num += 1 print("------------------------") print(f"{str(datetime.now())}: Analysing {subject!r} #{sub_num!r}") for vector_index, row in frame.iterrows( ): #for each vector in that subject #print(f"{str(datetime.now())}: Analysing {vector_index!r} vector index") sum_iloc = 0.0 cnt_iloc = 0 sum_euclidean = 0.0 cnt_eculidean = 0 start_time_others = time.time() #features_count = 0 multiple_entries = 0 vector_u = vectors_file.iloc[ vector_index, :] # vector in data file if True: # pavel new features = feature_list.iloc[:, -100:] distances = distance_matrix(features, np.array(vector_u, ndmin=2)) distances = distances.reshape((len(features), )) max_distance = feature_list.loc[:, 'max_distance'] distance_close_enough_vec = distances <= max_distance # TODO: where to increment the counters? features_count = np.sum(distance_close_enough_vec) if features_count > 1: multiple_entries += 1 add_feature_index = np.where(distance_close_enough_vec == True) features_table.loc[subject, feature_list.loc[add_feature_index, 'feature_index']] += 1 if False: # thecode before for feature_index in feature_list.index: #check distances of each vector from all features tic = time.time() vector_v = feature_list.iloc[ feature_index, -100:] #center vector is the last 100 vectors sum_iloc += time.time() - tic cnt_iloc += 1 tic = time.time() distance = euclidean(vector_u, vector_v) sum_euclidean += time.time() - tic cnt_eculidean += 1 if distance <= feature_list.loc[feature_index, 'max_distance']: # print(f'feature {feature} answers condition') features_table.loc[subject, feature_list.loc[ feature_index, 'feature_index']] += 1 features_count += 1 print( "first iloc time = {}ms cnt={}\t eculedian time={}ms\t all={}" .format(1000 * sum_iloc / cnt_iloc, cnt_iloc, 1000 * sum_euclidean / cnt_eculidean, time.time() - start_time_others)) # print(f'===> A total of {features_count} answered the conditions, out of {len(frame)} raws') # Normlize by raw normlized_features_table = features_table.div(features_table.sum(axis=1), axis=0) normlized_features_table.to_csv( os.path.join(args.output_folder_path, args.output_description + '_feature_table.csv')) print( 'file saved to ', os.path.join(args.output_folder_path, args.output_description + '_feature_table.csv'))
def get_unique_vectors(self, distance_threshold=0.01, method='distance_comparison', min_samples=1, return_clusters=False): """Returns diffraction vectors considered unique by: strict comparison, distance comparison with a specified threshold, or by clustering using DBSCAN [1]. Parameters ---------- distance_threshold : float The minimum distance between diffraction vectors for them to be considered unique diffraction vectors. If distance_threshold==0, the unique vectors will be determined by strict comparison. method : str The method to use to determine unique vectors. Valid methods are 'strict', 'distance_comparison' and 'DBSCAN'. 'strict' returns all vectors that are strictly unique and corresponds to distance_threshold=0. 'distance_comparison' checks the distance between vectors to determine if some should belong to the same unique vector, and if so, the unique vector is iteratively updated to the average value. 'DBSCAN' relies on the DBSCAN [1] clustering algorithm, and uses the Eucledian distance metric. min_samples : int, optional The minimum number of not strictly identical vectors within one cluster for the cluster to be considered a core sample, i.e. to not be considered noise. Only used for method='DBSCAN'. return_clusters : bool, optional If True (False is default), the DBSCAN clustering result is returned. Only used for method='DBSCAN'. References ---------- [1] https://scikit-learn.org/stable/modules/generated/sklearn. cluster.DBSCAN.html Returns ------- unique_peaks : DiffractionVectors The unique diffraction vectors. clusters : DBSCAN The results from the clustering, given as class DBSCAN. Only returned if method='DBSCAN' and return_clusters=True. """ # Flatten the array of peaks to reach dimension (n, 2), where n # is the number of peaks. peaks_all = np.concatenate([peaks.ravel() for peaks in self.data.flat ]).reshape(-1, 2) # A distance_threshold of 0 implies a strict comparison. So in that # case, a warning is raised unless the specified method is 'strict'. if distance_threshold == 0: if method is not 'strict': warn(message='distance_threshold=0 was given, and therefore ' + 'a strict comparison is used, even though the ' + 'specified method was ' + method + '.') method = 'strict' if method == 'strict': unique_peaks = np.unique(peaks_all, axis=0) elif method == 'distance_comparison': unique_vectors, unique_counts = np.unique(peaks_all, axis=0, return_counts=True) unique_peaks = np.array([[0, 0]]) unique_peaks_counts = np.array([0]) while unique_vectors.shape[0] > 0: unique_vector = unique_vectors[0] distances = distance_matrix(np.array([unique_vector]), unique_vectors) indices = np.where(distances < distance_threshold)[1] new_count = indices.size new_unique_peak = np.array([ np.average(unique_vectors[indices], weights=unique_counts[indices], axis=0) ]) unique_peaks = np.append(unique_peaks, new_unique_peak, axis=0) unique_peaks_counts = np.append(unique_peaks_counts, new_count) unique_vectors = np.delete(unique_vectors, indices, axis=0) unique_counts = np.delete(unique_counts, indices, axis=0) unique_peaks = np.delete(unique_peaks, [0], axis=0) elif method == 'DBSCAN': # All peaks are clustered by DBSCAN so that peaks within # one cluster are separated by distance_threshold or less. unique_vectors, unique_vectors_counts = np.unique( peaks_all, axis=0, return_counts=True) clusters = DBSCAN(eps=distance_threshold, min_samples=min_samples, metric='euclidean').fit( unique_vectors, sample_weight=unique_vectors_counts) unique_labels, unique_labels_count = np.unique(clusters.labels_, return_counts=True) unique_peaks = np.zeros((unique_labels.max() + 1, 2)) # For each cluster, a center of mass is calculated based # on all the peaks within the cluster, and the center of # mass is taken as the final unique vector position. for n in np.arange(unique_labels.max() + 1): peaks_n_temp = unique_vectors[clusters.labels_ == n] peaks_n_counts_temp = unique_vectors_counts[clusters.labels_ == n] unique_peaks[n] = np.average(peaks_n_temp, weights=peaks_n_counts_temp, axis=0) # Manipulate into DiffractionVectors class if unique_peaks.size > 0: unique_peaks = DiffractionVectors(unique_peaks) unique_peaks.axes_manager.set_signal_dimension(1) if return_clusters and method == 'DBSCAN': return unique_peaks, clusters else: return unique_peaks
def from_num_cities(self, n=20, length=100, seed=1): np.random.seed(seed) self.num_cities = n self.coords = np.random.uniform(-length, length, size=(n, 2)).tolist() self.dist_mat = distance_matrix(self.coords, self.coords).tolist()
V1_subsub = V1[:, sub_ind1[:N_SUB2]] # now subsample V2 sub_ind2 = np.array(SubSample(V2, N_SUB1), dtype=np.int) V2_sub = V2[:, sub_ind2] a2 = np.mean(V2_sub, axis=1) b2 = np.matlib.repmat(a2, N_SUB1, 1) V2_sub = V2_sub - b2.T V2_sub = V2_sub / np.max(np.linalg.norm(V2_sub, axis=0)) V2_subsub = V2[:, sub_ind2[:N_SUB2]] ## step 1 - Align and Register R = PrincipalComponentAlignment(V1_sub, V2_sub, ref=False) min_cost = np.ones(len(R)) * np.inf permutations = [] for rot, i in zip(R, range(len(R))): cost = distance_matrix(V1_sub.T, np.dot(rot, V2_sub).T) V1_ind, V2_ind = Hungary(cost) min_cost[i] = np.sqrt(np.sum( cost[V1_ind, V2_ind])) # the actual cost of the permutation found permutations.append(V2_ind) best_rot_ind = np.argmin(min_cost) best_permutation = permutations[best_rot_ind] best_rot = R[best_rot_ind] newV2_sub = np.dot(best_rot.T, V2_sub) i = 0 while True: newV2_sub = newV2_sub[:, best_permutation] # Do Kabsch cur_rot = Kabsch(newV2_sub.T, V1_sub.T)
pdf = pdf.dropna() pdf = pdf.reset_index(drop=True) #Select features featureset = pdf[[ 'engine_s', 'horsepow', 'wheelbas', 'width', 'length', 'curb_wgt', 'fuel_cap', 'mpg' ]] #Normalize data from sklearn.preprocessing import MinMaxScaler x = featureset.values min_max_scaler = MinMaxScaler() feature_mtx = min_max_scaler.fit_transform(x) dist_matrix = distance_matrix(feature_mtx, feature_mtx) agglom = AgglomerativeClustering(n_clusters=6, linkage='complete') agglom.fit(feature_mtx) pdf['cluster_'] = agglom.labels_ import matplotlib.cm as cm n_clusters = max(agglom.labels_) + 1 colors = cm.rainbow(np.linspace(0, 1, n_clusters)) cluster_labels = list(range(0, n_clusters)) import matplotlib.cm as cm n_clusters = max(agglom.labels_) + 1 colors = cm.rainbow(np.linspace(0, 1, n_clusters)) cluster_labels = list(range(0, n_clusters)) # Create a figure of size 6 inches by 4 inches.
def calc_distance(data_matrix): return distance_matrix(data_matrix, data_matrix)
def predict(self, y): match = y[['lat', 'lon', 'mag']].values dist = spatial.distance_matrix(self.matching, [match]) kmin_index = np.argsort(dist, axis=0) return kmin_index[:self.neighbors], dist[kmin_index[:self.neighbors]]
cells[:, i] = (cells[:, i] - means[i]) / stds[i] #point 1 cells[:, i + 3] = (cells[:, i + 3] - means[i]) / stds[i] #point 2 cells[:, i + 6] = (cells[:, i + 6] - means[i]) / stds[i] #point 3 barycenters[:, i] = (barycenters[:, i] - mins[i]) / (maxs[i] - mins[i]) normals[:, i] = (normals[:, i] - nmeans[i]) / nstds[i] X = np.column_stack((cells, barycenters, normals)) #X = (X-np.ones((X.shape[0], 1))*np.mean(X, axis=0)) / (np.ones((X.shape[0], 1))*np.std(X, axis=0)) # computing A_S and A_L A_S = np.zeros([X.shape[0], X.shape[0]], dtype='float32') A_L = np.zeros([X.shape[0], X.shape[0]], dtype='float32') D = distance_matrix(X[:, 9:12], X[:, 9:12]) A_S[D < 0.1] = 1.0 A_S = A_S / np.dot(np.sum(A_S, axis=1, keepdims=True), np.ones((1, X.shape[0]))) A_L[D < 0.2] = 1.0 A_L = A_L / np.dot(np.sum(A_L, axis=1, keepdims=True), np.ones((1, X.shape[0]))) # numpy -> torch.tensor X = X.transpose(1, 0) X = X.reshape([1, X.shape[0], X.shape[1]]) X = torch.from_numpy(X).to(device, dtype=torch.float) A_S = A_S.reshape([1, A_S.shape[0], A_S.shape[1]]) A_L = A_L.reshape([1, A_L.shape[0], A_L.shape[1]]) A_S = torch.from_numpy(A_S).to(device, dtype=torch.float)
lbl_t3 = np.random.randint(-15, 15, size=(1000,2))+lbl_t2 max_displacement = 20 max_discontinuity = 3 timepoints = [1,2,3] consecutive_tp_pairs = [(timepoints[i], timepoints[i+1]) for i in range(len(timepoints)-1)] lbls = {1: lbl_t1, 2: lbl_t2, 3: lbl_t3} tp2idx = {tp:i for i, tp in enumerate(timepoints)} tracks = [] segment_list = [] track_id = 1 for ti, tj in consecutive_tp_pairs: lbl_i, lbl_j = lbls[ti], lbls[tj] # Assuming these are centroids cost_matrix = distance_matrix(lbl_i, lbl_j) total_cost, column2row, row2column = lap.lapjv(cost_matrix, cost_limit=max_displacement, extend_cost=True) for col, row in enumerate(column2row): if col == -1: tracks.append(([ti], [lbl_i[col]])) # time and xy else: tracks.append(([ti, tj], [lbl_i[col], lbl_j[row]])) track_id += 1 track_starts = np.array([i[0][0] for i in tracks]) track_ends = np.array([i[0][1] for i in tracks]) track_xy_start = np.array([i[1][0] for i in tracks])
def coordinates_are_resonable(coords): """Check that there are no very short or very long pairwise distances""" dist_mat = distance_matrix(coords, coords) return 0.8 < np.min(dist_mat + np.identity(len(coords))) < 5.0
def align_and_rotate( self ): # get the local alignment, calculate optimal rotation matrix for structures to fit into each other # ab = dm_euclidian(self.query.er, self.target.er) # normal distribution (dmnd) or difference (dm_euclidian) ### TESTING # print(np.allclose(self.query.er, scale2(self.query.er))) # scr1 = np.genfromtxt('scr1.table') # scr2 = np.genfromtxt('scr2.table') # dspair = np.genfromtxt('ds_pair.table') # print(np.allclose(self.query.er, scr1)) ab = spatial.distance_matrix(self.query.er, self.target.er) # print(np.allclose(ab, dspair)) # ab = spatial.distance_matrix(scr1, scr2) # print(np.allclose(ab, dspair)) # ab = dm_scipy(scr1, scr2) # print(np.allclose(ab, dspair)) # ab = dm_euclidian(self.query.er, self.target.er) # ab2 = spatial.distance_matrix(self.query.er, scr2) # plt.imshow(ab);plt.colorbar(); plt.title(__file__+' - 1st alignment DM'); plt.show() # print(ab[:10,:10]) ab = dm_ndtr(self.query.er, self.target.er) # plt.imshow(ab);plt.colorbar();plt.title(__file__+' - 1st alignment DM (normal distriution)');plt.show() # actual alignment, using the fast SW from above self.i_list, self.j_list, self.is_gap, self.score = nlocalalign( ab, self.gap, self.factor, self.limit) self.traceback_len = len(self.is_gap) i_list = [i for i, g in zip(self.i_list, self.is_gap) if g == 0] j_list = [j for j, g in zip(self.j_list, self.is_gap) if g == 0] self.len_wo_gaps = len(i_list) self.nrgaps = np.count_nonzero(self.is_gap) # Kabsch a_pre = self.query.coordinates b_pre = self.target.coordinates a = a_pre[i_list, :] b = b_pre[j_list, :] self.query_centroid = np.mean(a, axis=0) self.target_centroid = np.mean(b, axis=0) a -= self.query_centroid b -= self.target_centroid h = a.T @ b u, s, v = np.linalg.svd(h.T) d = np.linalg.det(v.T @ u.T) r = v.T @ np.diag([1, 1, d]) @ u.T a = a @ r self.rmsd = rmsd(a, b) self.rotation_matrix = r self.query_aligned = a self.target_aligned = b self.dists = np.linalg.norm(a - b, axis=1) # GDT_TS: f1 = np.count_nonzero(np.where(self.dists < 1)) f2 = np.count_nonzero(np.where(self.dists < 2)) f4 = np.count_nonzero(np.where(self.dists < 4)) f8 = np.count_nonzero(np.where(self.dists < 8)) self.gdt_ts = 25 * sum( [f1, f2, f4, f8]) / self.len_wo_gaps if self.len_wo_gaps > 0 else 0 # FATCAT-inspired similarity score ########################################################################### #GDT-sim "improved", needs further tinkering... self.gdt_sim = self.score * self.len_wo_gaps * self.gdt_ts # TMscore d0 = 1.24 * np.cbrt(self.target.l - 15) - 1.8 di = np.sqrt(np.sum((a - b)**2, axis=1)) self.tmq = np.sum(1 / (1 + (di / d0)**2)) / self.query.l self.tmt = np.sum(1 / (1 + (di / d0)**2)) / self.target.l self.tm = (self.tmq + self.tmt) / 2
# ******************************************************************************* # RUNNING MINI-BATCH ONLINE k-MEANS WITH k-MC^2 INITIALIZED CENTERS # ******************************************************************************* # create empty list to hold the list of data points assigned to a each center center_data_list = [[] for j in range(CLIST_kmcmc.shape[0])] # create empty list to hold the list of distances of data points assigned to a each center center_data_dist_list = [[] for j in range(CLIST_kmcmc.shape[0])] # distance matrix having distance of each data point to each center dist_matrix = spatial.distance_matrix(X_mini_batch, CLIST_kmcmc, p = 2) # get index of center assigned to each of the corresponding data point c_j_index = [np.argmin(dist) for dist in dist_matrix] # list of tuples of data point index and corresponding nearest center index zipped1 = zip(c_j_index, np.arange(0, mini_batch_size)) # list of distance between center and its assigned data point center_data_dist = [np.amin(di) for di in dist_matrix] count1 = 0 # loop over all data points and corresponding centers for (k1, v1) in zipped1:
cluster_dorsal,centroid_dorsal = kmeans(df_dorsal,c) cluster_palmar,centroid_palmar = kmeans(df_palmar,c) # Cluster details command line and html format # Compute feature descriptors for unlabelled data csv_file =model + '_unlabeled_set' + str(unlabeled_set) + '.csv' if os.path.exists(csv_file): os.remove(csv_file) hog(unlabelled_folder,csv_file) df = pd.read_csv(csv_file, sep=',', header=None) dist_dorsal = distance_matrix(df.values[:,1:],centroid_dorsal) dist_palmar = distance_matrix(df.values[:,1:],centroid_palmar) total_count = len(df.values) result=[] for i in range(len(df.values)): if min(dist_dorsal[i]) < min(dist_palmar[i]): result.append([df[0][i],"dorsal"]) else: result.append([df[0][i],"palmar"]) def testing_accuracy(result,unlabeled_set): positive = 0 negative = 0
def generateDisMatrix(df): return pd.DataFrame(distance_matrix(df.values, df.values), index=df.index, columns=df.index)
def _spdistance_matrix(self, x, y, threshold=None): dist = distance_matrix(x, y) if threshold is not None: zeros = dist > threshold dist[zeros] = 0 return sp.csr_matrix(dist)
def im_callback(self, msg): if self.first_spin: self.old_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') # old coords self.old_gray = self.buildMask(self.old_image) self.old_coords = self.detectBalls(self.old_gray) for c in self.old_coords: x, y = c[0], c[1] self.balls.append(Balle(x, y, self.nb_ball_spawn, 1, 1)) self.nb_ball_spawn += 1 self.first_spin = False else: # read new image new_frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') lk_params = dict(winSize=(15, 15), maxLevel=10, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) # calculate optical flow frame_gray = self.buildMask(new_frame) coords1, st, err = cv2.calcOpticalFlowPyrLK( self.old_gray, frame_gray, self.old_coords, None, **self.lk_params) good_new = coords1[(st == 1).flatten()] good_old = self.old_coords[(st == 1).flatten()] self.old_gray = frame_gray.copy() self.old_coords = good_new.reshape(-1, 1, 2) # Check for new balls newcoords = self.detectBalls(frame_gray) distance = distance_matrix(self.old_coords.reshape((-1, 2)), newcoords) if (newcoords.shape[0] == self.old_coords.shape[0]): #print("same number") for k in range(self.old_coords.shape[0]): v = np.min(distance[k, :]) ind = np.argmin(distance[k, :]) x, y = self.old_coords[k][0][0], self.old_coords[k][0][1] self.balls[k].coords = [x, y] self.balls[k].num = ind self.balls[k].is_visible = 1 # If any new : elif (newcoords.shape[0] > self.old_coords.shape[0]): matched = [] for k in range(self.old_coords.shape[0]): ind = np.argmin(distance[k, :]) matched.append(newcoords[ind]) distance[k, ind] = 100000 if (newcoords.shape[0] > self.old_coords.shape[0]): for l in range(newcoords.shape[0]): if (np.max(distance[:, l]) != 100000): matched.append(newcoords[l]) #print("nc : ", newcoords.shape[0]) #print("nc : ", self.old_coords.shape[0]) #print("mtchd : ", len(matched)) self.old_coords = np.asarray(matched).reshape( (newcoords.shape[0], 1, 2)) # Create Balles Objects for j in range(len(self.old_coords)): x, y = self.old_coords[j][0][0], self.old_coords[j][0][1] if (len(self.balls) < len(self.old_coords)): self.balls.append(Balle(x, y, self.nb_ball_spawn, 1, 1)) self.balls[j].coords = [x, y] self.balls[j].num = j self.balls[j].is_visible = 1 self.balls[j].detected = 1 self.nb_ball_spawn += 1 if (newcoords.shape[0] < self.old_coords.shape[0]): matched = [] dis = [] for k in range(self.old_coords.shape[0]): v = np.min(distance[k, :]) ind = np.argmin(distance[k, :]) if v > 30: #matched.append(newcoords[ind]) #distance[k,ind] = 10000 dis.append(k) else: #matched.append(self.old_coords[0][k]) zebbi = 1 #print("dis : ", dis) #print("============") #self.old_coords = np.asarray(matched).reshape((self.old_coords.shape[0],1,2)) #print("self.old_coords : ", self.old_coords) for j in range(len(self.balls)): #self.balls[j].coords = [self.old_coords[j][0][0], self.old_coords[j][0][1]] if (j in dis): for b in self.balls: #print("ball ", b.num, " : ", b.coords[0], ", ", b.coords[1]) if (b.num == j): x, y = self.old_coords[j][0][ 0], self.old_coords[j][0][1] self.balls[j].coords = [x, y] self.balls[j].is_visible = 0 if self.visualize: frame_show = new_frame.copy() for b in self.balls: x, y = b.coords[0], b.coords[1] #print("x, y", x, ",", y) j = b.num if (b.is_visible): frame_show = cv2.circle(frame_show, (int(x), int(y)), 5, (0, 200, 0), -1) frame_show = cv2.putText(frame_show, str(j), (int(x) + 20, int(y) + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) else: frame_show = cv2.circle(frame_show, (int(x), int(y)), 5, (0, 0, 200), -1) frame_show = cv2.putText(frame_show, str(j), (int(x) + 20, int(y) + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 200), 2) cv2.imshow("tracking", frame_show) cv2.waitKey(1) lst = [] for b in self.balls: if (b.detected and b.is_visible): w = self.imgToWorld(b.coords[0], b.coords[1]) lst.append(float(b.num)) lst.append(w[0]) lst.append(w[1]) lst_coords = Float32MultiArray() lst_coords.data = [1.0, 2.0, 3.0] #print("lst : ", lst) lst_coords.data = lst #print("lst_coords.data : ", len(lst_coords.data)) self.balls_publisher.publish(lst_coords)
in_file = pd.read_csv('/media/miri-o/Documents/AA_triplets_with_embedding_and_clusters.csv') data = pd.DataFrame(in_file, columns = ['Ngram', 'cluster', 'dim1', 'dim2']) props = ['CDR3_AA_GRAVY', 'CDR3_AA_BULK', 'CDR3_AA_ALIPHATIC', 'CDR3_AA_POLARITY', 'CDR3_AA_CHARGE', 'CDR3_AA_BASIC', 'CDR3_AA_ACIDIC', 'CDR3_AA_AROMATIC'] property_data = pd.DataFrame(in_file) property_data = property_data.drop(['Ngram', 'dim1', 'dim2'], axis=1) property_data_clusterized = clusterize_properties(property_data , props) amino_acid_logo = build_clust_logo(data) amino_acid_logo.to_csv('/media/miri-o/Documents/results/amino_acids_clusters_logo.csv') amino_acid_logo_values = amino_acid_logo.drop(['length', 'center_x', 'center_y'], axis = 1) # compute ditance matrix logo_cluster_dist_mat = pd.DataFrame(distance_matrix(amino_acid_logo_values.values, amino_acid_logo_values.values)) plt.figure(figsize=(12, 12)) sns.heatmap(logo_cluster_dist_mat, cmap = "RdBu") plt.show() fig2 = plt.figure(figsize=(10,10)) ax2 = plt.scatter(data['dim1'], data['dim2'], s=3, marker = 'D') ax2 = plt.scatter(amino_acid_logo['center_x'], amino_acid_logo['center_y'], s=3, marker = 'x', color = 'r') amino_acid_coords = amino_acid_logo[['center_x', 'center_y']] CM_cluster_dist_mat = pd.DataFrame(distance_matrix(amino_acid_coords.values, amino_acid_coords.values)) plt.figure(figsize=(12, 12))
# -*- coding: utf-8 -*- """ Created on Tue Dec 29 13:52:12 2020 @author: Pedro Ayres """ import numpy as np import pandas as pd from scipy.spatial import distance_matrix # Original code from OP, slightly reformatted DF_var = pd.DataFrame.from_dict({ "s1": [1.2, 3.4, 10.2], "s2": [1.4, 3.1, 10.7], "s3": [2.1, 3.7, 11.3], "s4": [1.5, 3.2, 10.9] }).T DF_var.columns = ["g1", "g2", "g3"] # Whole similarity algorithm in one line df_euclid = pd.DataFrame(1 / (1 + distance_matrix(DF_var.T, DF_var.T)), columns=DF_var.columns, index=DF_var.columns) print(df_euclid) # g1 g2 g3 # g1 1.000000 0.215963 0.051408 # g2 0.215963 1.000000 0.063021 # g3 0.051408 0.063021 1.000000
ax.set_ylabel('y') ax.set_zlabel('z') a1 = 4.05 ##lattice parameter a2 = a1 * np.sqrt(3) #periodic cell repeat multiple l = 4 h = 4 * np.sqrt(3) w = 4 strDataFile = 'new.data' strDumpFile = 'dump.eam' strPMFile = strDumpFile + 'PM' arrSigma = gf.CubicCSLGenerator(np.array([1, 1, 1]), 25) print(arrSigma) fltAngle, arrVector = gf.FindRotationVectorAndAngle(np.array([1, 1, 1]), np.array([0, 0, 1])) arrBasisVectors = gf.RotatedBasisVectors(fltAngle, arrVector) objFirstLattice = gl.ExtrudedRectangle(l, w, h, arrBasisVectors, ld.FCCCell, np.ones(3), np.zeros(3)) objSecondLattice = gl.ExtrudedRectangle( l, w, h, gf.RotateVectors(arrSigma[0, 1], np.array([0, 0, 1]), arrBasisVectors), ld.FCCCell, np.ones(3), np.zeros(3)) arrPoints1 = objFirstLattice.GetRealPoints() arrPoints2 = objSecondLattice.GetRealPoints() arrDistanceMatrix = spatial.distance_matrix(arrPoints1, arrPoints2) lstPoints = np.where(arrDistanceMatrix < 1e-5)[0] arrCSLPoints = arrPoints1[lstPoints] plt.plot(*tuple(zip(*arrPoints1)), 'bo', c='b') plt.plot(*tuple(zip(*arrPoints2)), 'bo', c='r') plt.plot(*tuple(zip(*arrCSLPoints)), 'bo', c='black') plt.show()
def lp_distance(x): return distance_matrix(x, x)
def predict(self, y): dist = spatial.distance_matrix(self.data, y) kmin_index = np.argsort(dist, axis=0) return self.data[kmin_index[:self.neighbors]], dist[ kmin_index[:self.neighbors]]
def pearson_affinity(M): cov_metrix = np.cov(M) dist = (1 - cov_metrix / 2)**0.5 dist = distance_matrix(dist, dist) return dist
max_len -= 1 timesteps = n_timesteps[:max_len] # Downsample if needed for trial_idx, n_timesteps in enumerate(merged_timesteps): # We assume they are the same, or they will be discarded in the next step if len(n_timesteps ) == min_ or n_timesteps[-1] < args.min_timesteps: pass else: # Discard # merged_mean[trial_idx] = [] new_merged_mean, new_merged_std = [], [] # Nearest neighbour distance_mat = distance_matrix( n_timesteps.reshape(-1, 1), timesteps.reshape(-1, 1)) closest_indices = distance_mat.argmin(axis=0) for closest_idx in closest_indices: new_merged_mean.append( merged_mean[trial_idx][closest_idx]) new_merged_std.append( merged_std[trial_idx][closest_idx]) merged_mean[trial_idx] = new_merged_mean merged_std[trial_idx] = new_merged_std last_eval[trial_idx] = merged_results[trial_idx][ closest_indices[-1]] # Remove incomplete runs mean_tmp, std_tmp, last_eval_tmp = [], [], [] for idx in range(len(merged_mean)):
from matplotlib import pyplot as plt from sklearn import manifold, datasets from sklearn.cluster import AgglomerativeClustering from sklearn.datasets.samples_generator import make_blobs #Make the blobs X2, y2 = make_blobs(n_samples=50, centers=[[4,4], [-2, -1], [1, 1], [10,4]], cluster_std=0.9) #Create the model and train it agglom = AgglomerativeClustering(n_clusters = 4, linkage = 'average') agglom.fit(X2,y2) # Create a minimum and maximum range of X2. x_min, x_max = np.min(X2, axis=0), np.max(X2, axis=0) # Get the average distance for X2. X2 = (X2 - x_min) / (x_max - x_min) #Create the distance matrix dist_matrix = distance_matrix(X2,X2) #Create the training data Z = hierarchy.linkage(dist_matrix, 'complete') #Create the dendogram dendro = hierarchy.dendrogram(Z) # Create a figure of size 6 inches by 4 inches. plt.figure(figsize=(6,4)) # These two lines of code are used to scale the data points down, # Or else the data points will be scattered very far apart. # Create a minimum and maximum range of X2. x_min, x_max = np.min(X2, axis=0), np.max(X2, axis=0) # Get the average distance for X2. X2 = (X2 - x_min) / (x_max - x_min) # This loop displays all of the datapoints. for i in range(X2.shape[0]):
def collision_detection(self): """Parse collided nodes on to interact function""" dm = np.tril(distance_matrix(self.nodes[:, :2], self.nodes[:, :2])) collision_pairs = list(zip(*np.where((dm < self.node_radius*2) & (dm != 0.0)))) self.interact(collision_pairs)
test = flag_1_data.loc[["S000713", "S000715"]] # test2 = flag_1_data[flag_1_data["APPLICATION_NUMBER_1"] =="S000713"] # Calculate distance matrices between PODs within each huc 8 with this package: from scipy.spatial import distance_matrix #n is just a counter n = 0 lst=[] for huc in flag_1_data["HUC_8_NUMBER"].unique(): n = n + 1 print(n, huc) data = flag_1_data[["HUC_8_NUMBER", "LATITUDE", "LONGITUDE"]][flag_1_data["HUC_8_NUMBER"]==huc] dist = pd.DataFrame(distance_matrix(data.values, data.values), index=data.index, columns=data.index) cols = dist.index lst.append((pd.DataFrame(np.triu(dist, k = 1), index = cols, columns = cols)).replace(0, 999999999)) threshold = 1000000 app_list = [] x = 0 for i, list_ in enumerate(lst): x = x + 1 print(x) df = lst[i] for j, app in enumerate(df.index): print(j,app) df1 = df[df.loc[df.index[j]] < threshold] if len(df1) > 0: # app_list.append(tuple((df.index[j], df1.index.values[0])))
def plot_diffraction_vectors( self, xlim=1.0, ylim=1.0, unique_vectors=None, distance_threshold=0.01, method='distance_comparison', min_samples=1, image_to_plot_on=None, image_cmap='gray', plot_label_colors=False, distance_threshold_all=0.005): # pragma: no cover """Plot the unique diffraction vectors. Parameters ---------- xlim : float The maximum x coordinate in reciprocal Angstroms to be plotted. ylim : float The maximum y coordinate in reciprocal Angstroms to be plotted. unique_vectors : DiffractionVectors, optional The unique vectors to be plotted (optional). If not given, the unique vectors will be found by get_unique_vectors. distance_threshold : float, optional The minimum distance in reciprocal Angstroms between diffraction vectors for them to be considered unique diffraction vectors. Will be passed to get_unique_vectors if no unique vectors are given. method : str The method to use to determine unique vectors, if not given. Valid methods are 'strict', 'distance_comparison' and 'DBSCAN'. 'strict' returns all vectors that are strictly unique and corresponds to distance_threshold=0. 'distance_comparison' checks the distance between vectors to determine if some should belong to the same unique vector, and if so, the unique vector is iteratively updated to the average value. 'DBSCAN' relies on the DBSCAN [1] clustering algorithm, and uses the Eucledian distance metric. min_samples : int, optional The minimum number of not identical vectors within one cluster for it to be considered a core sample, i.e. to not be considered noise. Will be passed to get_unique_vectors if no unique vectors are given. Only used if method=='DBSCAN'. image_to_plot_on : BaseSignal, optional If provided, the vectors will be plotted on top of this image. The image must be calibrated in terms of offset and scale. image_cmap : str, optional The colormap to plot the image in. plot_label_colors : bool, optional If True (default is False), also the vectors contained within each cluster will be plotted, with colors according to their cluster membership. If True, the unique vectors will be calculated by get_unique_vectors. Requires on method=='DBSCAN'. distance_threshold_all : float, optional The minimum distance, in calibrated units, between diffraction vectors inside one cluster for them to be plotted. Only used if plot_label_colors is True and requires method=='DBSCAN'. Returns ------- fig : matplotlib figure The plot as a matplotlib figure. """ fig = plt.figure() ax = fig.add_subplot(111) offset, scale = 0., 1. if image_to_plot_on is not None: offset = image_to_plot_on.axes_manager[-1].offset scale = image_to_plot_on.axes_manager[-1].scale ax.imshow(image_to_plot_on, cmap=image_cmap) else: ax.set_xlim(-xlim, xlim) ax.set_ylim(ylim, -ylim) ax.set_aspect('equal') if plot_label_colors is True and method == 'DBSCAN': clusters = self.get_unique_vectors(distance_threshold, method='DBSCAN', min_samples=min_samples, return_clusters=True)[1] labs = clusters.labels_[clusters.core_sample_indices_] # Get all vectors from the clustering not considered noise cores = clusters.components_ if cores.size == 0: warn('No clusters were found. Check parameters, or ' 'use plot_label_colors=False.') else: peaks = DiffractionVectors(cores) peaks.axes_manager.set_signal_dimension(1) # Since this original number of vectors can be huge, we # find a reduced number of vectors that should be plotted, by # running a new clustering on all the vectors not considered # noise, considering distance_threshold_all. peaks = peaks.get_unique_vectors(distance_threshold_all, min_samples=1, return_clusters=False) peaks_all_len = peaks.data.shape[0] labels_to_plot = np.zeros(peaks_all_len) peaks_to_plot = np.zeros((peaks_all_len, 2)) # Find the labels of each of the peaks to plot by referring back # to the list of labels for the original vectors. for n, peak in zip(np.arange(peaks_all_len), peaks): index = distance_matrix([peak.data], cores).argmin() peaks_to_plot[n] = cores[index] labels_to_plot[n] = labs[index] # Assign a color value to each label, and shuffle these so that # adjacent clusters hopefully get distinct colors. cmap_lab = get_cmap('gist_rainbow') lab_values_shuffled = np.arange(np.max(labels_to_plot) + 1) np.random.shuffle(lab_values_shuffled) labels_steps = np.array( list( map(lambda n: lab_values_shuffled[int(n)], labels_to_plot))) labels_steps = labels_steps / (np.max(labels_to_plot) + 1) # Plot all peaks for lab, peak in zip(labels_steps, peaks_to_plot): ax.plot((peak[0] - offset) / scale, (peak[1] - offset) / scale, '.', color=cmap_lab(lab)) if unique_vectors is None: unique_vectors = self.get_unique_vectors(distance_threshold, method=method, min_samples=min_samples) # Plot the unique vectors ax.plot((unique_vectors.data.T[0] - offset) / scale, (unique_vectors.data.T[1] - offset) / scale, 'kx') plt.tight_layout() plt.axis('off') return fig
import pandas as pd import numpy as np from scipy.spatial import distance_matrix from scipy.spatial import KDTree df = pd.read_csv(r"C:\Users\Asus\Documents\GitHub\Gisele_MILP\cluster3_PS.csv") for i in df.index: if df.loc[i]['Population'] == 0: df.drop(i, inplace=True) coords = pd.DataFrame() coords['X'] = df['X'] coords['Y'] = df['Y'] Dist_matrix = pd.DataFrame(distance_matrix(coords.values, coords.values), index=df['id'], columns=df['id']) Weight = pd.DataFrame() PS = pd.DataFrame() df.index = df['id'] k = 0 #for i, row in df.iterrows(): # if df.loc[i,'Population'] > 10: # df[i, 'Weight']= 0 #create new column with absorbed power df = df.assign(Power=0.1) df['Power'] = df['PS'].apply(lambda x: '0' if x == 1 else '0.1')
[5, 3], [10, 15], [15, 12], [24, 10], [30, 30], [85, 70], [71, 80], [60, 78], [70, 55], [80, 91], ]) print('X =') print(X) print('----Distance------') df = X dist = pd.DataFrame(distance_matrix(df, df)) print(dist.values) labels = range(1, 11) plt.figure(figsize=(10, 7)) plt.subplots_adjust(bottom=0.1) plt.scatter(X[:, 0], X[:, 1], label='True Position') for label, x, y in zip(labels, X[:, 0], X[:, 1]): plt.annotate(label, xy=(x, y), xytext=(-3, 3), textcoords='offset points', ha='right', va='bottom')
# Replace the data points with their respective cluster value # (ex. 0) and is color coded with a colormap (plt.cm.spectral) plt.text( X1[i, 0], X1[i, 1], str(y1[i]), #places the cluster # at data points, colors them color=plt.cm.nipy_spectral( agglom.labels_[i] / 10. ), #gets proper label so that each data point in a cluster is the same color fontdict={ 'weight': 'bold', 'size': 9 }) # Display the plot of the original data before clustering plt.scatter(X1[:, 0], X1[:, 1], marker='.') # Display the plot plt.savefig('finalplot.png') #---------------Dendrogram/phylogentic tree-------------------------------- #create a distance matrix between every point dist_matrix = distance_matrix(X1, X1) #define type of heierarchy Z = hierarchy.linkage(dist_matrix, 'complete') #display dendrogram dendro = hierarchy.dendrogram(Z) plt.savefig('dendo.png')