def test_mask_indices(): # simple test without offset iu = mask_indices(3, np.triu) a = np.arange(9).reshape(3, 3) yield (assert_array_equal, a[iu], array([0, 1, 2, 4, 5, 8])) # Now with an offset iu1 = mask_indices(3, np.triu, 1) yield (assert_array_equal, a[iu1], array([1, 2, 5]))
def test_mask_indices(): # simple test without offset iu = mask_indices(3, np.triu) a = np.arange(9).reshape(3, 3) yield (assert_array_equal, a[iu], array([0, 1, 2, 4, 5, 8])) # Now with an offset iu1 = mask_indices(3, np.triu, 1) yield (assert_array_equal, a[iu1], array([1, 2, 5]))
def test_mask_indices(self): iu = np.mask_indices(3, np.triu) print(iu) a = np.arange(9).reshape(3, 3) b = a[iu] print(b) iu1 = np.mask_indices(3, np.triu, 1) c = a[iu1] print(c) return
def map2corr(y, gt): corr_matrix = np.corrcoef(y) subjects_effect = part2adj(gt) iu = np.mask_indices(corr_matrix.shape[0], np.triu, k=1) mask_sub = subjects_effect[iu] data = np.arctanh(corr_matrix[iu]) return data, mask_sub > 0
def gfusedlasso(z, A, lam=None): # print(type(z),type(A),type(lam)) A = np.triu(A) > 0 edges = np.stack(np.mask_indices(A.shape[0], lambda n, k: A), axis=-1) # print(z.shape,z.dtype,edges.shape,edges.dtype,lam) z_fused = solve_gfl(z.astype(np.float64), edges, lam=lam) return z_fused.astype(z.dtype)
def calc_cov(self, x, model): """ Creates the covariance matrix from the variogram model """ if x.ndim < 2: x = x.reshape(x.size, 1) ii, jj = np.mask_indices(self.s, np.triu, k=1) lags = pdist(x) covariances = self.sill - self.model(lags) if not self.sparse: h_cov = np.zeros((self.s, self.s), dtype=np.float64) h_cov[np.diag_indices(self.s)] = self.sill else: h_cov = ssp.lil_matrix((self.s, self.s), dtype=np.float64) h_cov.setdiag(self.sill) nocorrs = np.isclose(covariances, 0) ii = ii[~nocorrs] jj = jj[~nocorrs] covariances = covariances[~nocorrs] h_cov[ii, jj] = covariances h_cov[jj, ii] = covariances if not self.sparse: self.cov = h_cov else: self.cov = h_cov.asformat("csc")
def saveMatrix(self, outputpath, index=None): if not self.data_loaded: msg = "Warning: No data loaded, nothing to save" print(msg) return sparseMatrix = None windowsize = self.windowsize flankingsize = self.flankingsize if not isinstance(flankingsize, int): flankingsize = windowsize if isinstance(self.maxdist, int) and self.maxdist < windowsize and self.maxdist > 0: maxdist = self.maxdist else: maxdist = windowsize if isinstance(index, int) and index < self.getNumberSamples(): tmpMat = np.zeros(shape=(windowsize, windowsize)) indices = np.mask_indices(windowsize, utils.maskFunc, k=maxdist) tmpMat[indices] = self.__getMatrixData(idx=index) sparseMatrix = csr_matrix(tmpMat) else: sparseMatrix = self.sparseHiCMatrix folderName = self.chromatinFolder.rstrip("/").replace("/", "-") filename = "matrix_{:s}_chr{:s}_{:s}".format(folderName, str(self.chromosome), str(index)) filename = os.path.join(outputpath, filename) save_npz(file=filename, matrix=sparseMatrix)
def flatten_input(self, H): """ This is an internal method for reshaping input matrices to the GRU layer of the controller. H: a numpy array representing the target matrx input (target unitary) with shape (number of examples,number of time steps, number of waveguides, number of waveguides) """ # retreive the batch size from the training dataset num_examples = H.shape[0] num_points = H.shape[1] # initialize the array for expanding the upper triangular part params_r = np.zeros( (num_examples,num_points,self.num_wg*(self.num_wg+1)//2) ) params_i = np.zeros( (num_examples,num_points,self.num_wg*(self.num_wg+1)//2) ) # define an anynomus function to extract upper triangular part of a matrix get_upper = (lambda x: x[np.mask_indices(self.num_wg, np.triu)]) if self.mode==0 or self.mode==1: #extract upper triangular part of the Hamiltonian for idx_ex in range(num_examples): for idx_t in range(num_points): params_r[idx_ex,idx_t,:] = np.real(get_upper(H[idx_ex,idx_t,:])) params_i[idx_ex,idx_t,:] = np.imag(get_upper(H[idx_ex,idx_t,:])) if self.mode==0: return params_r elif self.mode==1: return np.concatenate((params_r, params_i),axis=-1) else: return np.concatenate((np.real(np.reshape(H,(num_examples, num_points, self.num_wg*self.num_wg))), np.imag(np.reshape(H,(num_examples, num_points, self.num_wg*self.num_wg))) ),axis=-1)
def rebuildMatrix(pArrayOfTriangles, pWindowSize, pFlankingSize=None, pMaxDist=None, pStepsize=1): #rebuilds the interaction matrix (a trapezoid along its diagonal) #by taking the mean of all overlapping triangles #returns an interaction matrix as a numpy ndarray if pFlankingSize == None: flankingSize = pWindowSize else: flankingSize = pFlankingSize nr_matrices = pArrayOfTriangles.shape[0] sum_matrix = np.zeros( (nr_matrices - 1 + (pWindowSize+2*flankingSize), nr_matrices - 1 + (pWindowSize+2*flankingSize)) ) count_matrix = np.zeros_like(sum_matrix,dtype=int) mean_matrix = np.zeros_like(sum_matrix,dtype="float32") if pMaxDist is None or pMaxDist == pWindowSize: stepsize = 1 else: #trapezoid, compute the stepsize such that the overlap is minimized stepsize = max(pStepsize, 1) stepsize = min(stepsize, pWindowSize - pMaxDist + 1) #the largest possible value such that predictions are available for all bins #sum up all the triangular or trapezoidal matrices, shifting by one along the diag. for each matrix for i in tqdm(range(0, nr_matrices, stepsize), desc="rebuilding matrix"): j = i + flankingSize k = j + pWindowSize if pMaxDist is None or pMaxDist == pWindowSize: #triangles sum_matrix[j:k,j:k][np.triu_indices(pWindowSize)] += pArrayOfTriangles[i] else: #trapezoids sum_matrix[j:k,j:k][np.mask_indices(pWindowSize, maskFunc, pMaxDist)] += pArrayOfTriangles[i] count_matrix[j:k,j:k] += np.ones((pWindowSize,pWindowSize),dtype=int) #keep track of how many matrices have contributed to each position mean_matrix[count_matrix!=0] = sum_matrix[count_matrix!=0] / count_matrix[count_matrix!=0] return mean_matrix
def latlon_randomizer(settings): done_flag = False iter_n = 0 while not done_flag: iter_n += 1 # id_mat = aclat = np.random.rand(settings.n_ac) * ( settings.max_lat_gen - settings.min_lat_gen) + settings.min_lat_gen aclon = np.random.rand(settings.n_ac) * ( settings.max_lon_gen - settings.min_lon_gen) + settings.min_lon_gen _, dist = tools.geo.kwikqdrdist_matrix(np.asmatrix(aclat), np.asmatrix(aclon), np.asmatrix(aclat), np.asmatrix(aclon)) mask = np.mask_indices(settings.n_ac, np.tril, -1) dist = np.asarray(dist) dist = dist[mask] # idx = np.where(dist <= settings.spawn_separation) # aclat[idx] = np.random.rand(idx.size()) * (settings.max_lat_gen - settings.min_lat_gen) + settings.min_lat_gen # aclon[idx] = np.random.rand(idx.size()) * (settings.max_lon_gen - settings.min_lon_gen) + settings.min_lon_gen if all(dist >= settings.spawn_separation) or iter_n >= 500: done_flag = True if iter_n >= 500: print( "Maximum iterations on aircraft random generation reached, aircraft may spawn to close together" ) print("Minimum current distance is: ", min(dist)) return aclat, aclon
def __getMatrixData(self, idx): if self.matrixfilepath is None: return None # this can't work if not self.data_loaded: msg = "Error: Load data first" raise RuntimeError(msg) #the 0-th matrix starts flankingsize away from the boundary windowsize = self.windowsize flankingsize = self.flankingsize if flankingsize is None: flankingsize = windowsize startInd = idx + flankingsize stopInd = startInd + windowsize trainmatrix = None if isinstance( self.maxdist, int ) and self.maxdist < windowsize and self.maxdist > 0: #trapezoids, i.e. distance limited submatrices trainmatrix = self.sparseHiCMatrix[ startInd:stopInd, startInd:stopInd].todense()[np.mask_indices( windowsize, utils.maskFunc, self.maxdist)] else: #triangles, i. e. full submatrices trainmatrix = self.sparseHiCMatrix[startInd:stopInd, startInd:stopInd].todense()[ np.triu_indices(windowsize)] trainmatrix = np.array(np.nan_to_num(trainmatrix))[0, :] return trainmatrix
def calc_diffs(self): """ Upon initialization, calculates squared differences between all field values given. Performed before any reductions are applied. """ c_indx = np.mask_indices(self.s, np.triu, k=1) diffs = (self.f[c_indx[0]] - self.f[c_indx[1]])**2 return diffs
def _get_pairlist_wcutoff(self, diff_mx): #this method calculates the coordinates for a given #difference matrix upper_displaced_triangle = np.mask_indices(diff_mx.shape[0], np.triu, 1) pair_list_wcutoff = diff_mx[upper_displaced_triangle] pair_list_wcutoff = pair_list_wcutoff[pair_list_wcutoff < self.rcut] return pair_list_wcutoff
def random_graph(): graph_size_list = [np.random.randint(5, 10) for _ in range(10)] edge_list = [ np.stack(np.mask_indices( gs, lambda n, k: np.triu( (1 - np.eye(gs)) * np.random.rand(gs, gs)) >= 0.5), axis=-1).astype('int') for gs in graph_size_list ] print(graph_size_list) print(len(edge_list), [e.shape for e in edge_list]) return graph_size_list, edge_list
def getFeatures(self, **kwargs): ''' This is a Method that returns features (keypoints and descriptors) that are obtained by using the FeatureExtractor.Detector object. input: mask: boolean, optional, default False. Whether to use output: keypoints, descriptors ''' detector = self.detector dset = self.data lib = self.lib mask = kwargs.get('mask', False) origin = kwargs.get('origin', [0, 0]) winSize = kwargs.get('window_size', 0) if mask: def mask_func(x, winSize): x[origin[0] - winSize / 2:origin[0] + winSize / 2, origin[1] - winSize / 2:origin[1] + winSize / 2] = 2 x = x - 1 return x mask_ind = np.mask_indices(dset.shape[-1], mask_func, winSize) self.data = np.array( [imp[mask_ind].reshape(winSize, winSize) for imp in dset]) # detect and compute keypoints def detect(image): if lib == 'opencv': image = (image - image.mean()) / image.std() image = image.astype('uint8') k_obj, d_obj = detector.detectAndCompute(image, None) keypts, descs = pickle_keypoints(k_obj), pickle_keypoints( d_obj) elif lib == 'skimage': imp = (image - image.mean()) / np.std(image) imp[imp < 0] = 0 imp.astype('float32') detector.detect_and_extract(imp) keypts, descs = detector.keypoints, detector.descriptors return keypts, descs # start pool of workers results = [detect(imp) for imp in self.data] # get keypoints and descriptors keypts = [itm[0].astype('int') for itm in results] desc = [itm[1] for itm in results] return keypts, desc
def calc_cov(self, x, model): """ Creates the covariance matrix for the points with the variogram model """ if x.ndim < 2: x = x.reshape(x.size, 1) h_cov = np.zeros((self.s, self.s)) h_cov[np.diag_indices(self.s)] = self.sill mask_indices = np.mask_indices(self.s, np.triu, k=1) lags = pdist(x) h_cov[mask_indices] = self.sill - self.model(lags) self.cov = np.maximum(h_cov, h_cov.T)
def test_indexing(self, args): shape, mult_ind, ravl_ind, nkm = args self.assertArraysMatch(nl.ravel_multi_index(mult_ind, shape, mode='wrap'), ravl_ind) self.assertArraysMatch(nl.unravel_index(ravl_ind, shape), np.unravel_index(ravl_ind, shape)) self.assertArraysMatch(nl.indices(shape), np.indices(shape)) self.assertArraysMatch(nl.diag_indices(shape[0], len(shape)), np.diag_indices(shape[0], len(shape))) if shape: rows, diag, cols = nkm self.assertArraysMatch(nl.tril_indices(rows, diag, cols), np.tril_indices(rows, diag, cols)) self.assertArraysMatch(nl.triu_indices(rows, diag, cols), np.triu_indices(rows, diag, cols)) self.assertArraysMatch(nl.mask_indices(rows, np.triu, diag), np.mask_indices(rows, np.triu, diag))
def flatten_Hamiltonian(self, H): """ This is an internal method for extracting the upper triangular part from the Hamiltonian, which makes it more efficient in the training process, since the Hamilonian must be Hermitian. H: a numpy array representing a Hamiltonian with shape (number of examples,number of time steps, number of waveguides, number of waveguides) """ # retreive the batch size from the training dataset num_examples = H.shape[0] num_points = H.shape[1] # initialize the array for expanding the upper triangular part params = np.zeros( (num_examples,num_points,self.num_wg*(self.num_wg+1)//2) ) # define an anynomus function to extract upper triangular part of a matrix get_upper = (lambda x: x[np.mask_indices(self.num_wg, np.triu)]) #extract upper triangular part of the Hamiltonian for idx_ex in range(num_examples): for idx_t in range(num_points): params[idx_ex,idx_t,:] = get_upper(H[idx_ex,idx_t,:]) return params
def faster_centroid_triplet_eval(X, X_new, y): ''' This is a function that is used to evaluate the lower dimension embedding. An triplet satisfaction score is calculated by evaluating how many triplets of cluster median centroids have been violated. Input: X: A numpy array with the shape [N, p]. The higher dimension embedding of some dataset. Expected to have some clusters. X_new: A numpy array with the shape [N, k]. The lower dimension embedding of some dataset. Expected to have some clusters as well. y: A numpy array with the shape [N, 1]. The labels of the original dataset. Used to identify clusters Output: acc: The score generated by the algorithm. ''' cluster_mean_ori, cluster_mean_new = [], [] categories = np.unique(y) num_cat = len(categories) mask = np.mask_indices(num_cat, np.tril, -1) for i in range(num_cat): label = categories[i] X_clus_ori = X[y == label] X_clus_new = X_new[y == label] cluster_mean_ori.append(np.median(X_clus_ori, axis=0)) cluster_mean_new.append(np.median(X_clus_new, axis=0)) cluster_mean_ori = np.array(cluster_mean_ori) cluster_mean_new = np.array(cluster_mean_new) ori_dist = euclidean_distances(cluster_mean_ori)[mask] new_dist = euclidean_distances(cluster_mean_new)[mask] dist_agree = 0. # two distance agrees dist_all = 0. # count for i in range(len(ori_dist)): for j in range(i + 1, len(ori_dist)): if ori_dist[i] > ori_dist[j] and new_dist[i] > new_dist[j]: dist_agree += 1 elif ori_dist[i] <= ori_dist[j] and new_dist[i] <= new_dist[j]: dist_agree += 1 dist_all += 1 return dist_agree / dist_all
pass def func2(ar: npt.NDArray[Any], a: float) -> float: pass AR_b: npt.NDArray[np.bool_] AR_m: npt.NDArray[np.timedelta64] AR_LIKE_b: List[bool] np.eye(10, M=20.0) # E: No overload variant np.eye(10, k=2.5, dtype=int) # E: No overload variant np.diag(AR_b, k=0.5) # E: No overload variant np.diagflat(AR_b, k=0.5) # E: No overload variant np.tri(10, M=20.0) # E: No overload variant np.tri(10, k=2.5, dtype=int) # E: No overload variant np.tril(AR_b, k=0.5) # E: No overload variant np.triu(AR_b, k=0.5) # E: No overload variant np.vander(AR_m) # E: incompatible type np.histogram2d(AR_m) # E: No overload variant np.mask_indices(10, func1) # E: incompatible type np.mask_indices(10, func2, 10.5) # E: incompatible type
def getFeatures(self, **kwargs): ''' This is a Method that returns features (keypoints and descriptors) that are obtained by using the FeatureExtractor.Detector object. input: processors: int, optional Number of processors to use, default = 1. mask: boolean, optional, default False. Whether to use output: keypoints, descriptors ''' detector = self.detector dset = self.data lib = self.lib processes = kwargs.get('processors', 1) mask = kwargs.get('mask', False) origin = kwargs.get('origin', [0, 0]) winSize = kwargs.get('window_size', 0) if mask: def mask_func(x, winSize): x[origin[0] - winSize / 2:origin[0] + winSize / 2, origin[1] - winSize / 2:origin[1] + winSize / 2] = 2 x = x - 1 return x mask_ind = np.mask_indices(dset.shape[-1], mask_func, winSize) self.data = np.array( [imp[mask_ind].reshape(winSize, winSize) for imp in dset]) # detect and compute keypoints def detect(image): if lib == 'opencv': image = (image - image.mean()) / image.std() image = image.astype('uint8') k_obj, d_obj = detector.detectAndCompute(image, None) keypts, descs = pickle_keypoints(k_obj), pickle_keypoints( d_obj) elif lib == 'skimage': imp = (image - image.mean()) / np.std(image) imp[imp < 0] = 0 imp.astype('float32') detector.detect_and_extract(imp) keypts, descs = detector.keypoints, detector.descriptors return keypts, descs # start pool of workers print('launching %i kernels...' % (processes)) pool = multiProcess.Pool(processes) tasks = [(imp) for imp in self.data] chunk = int(self.data.shape[0] / processes) jobs = pool.imap(detect, tasks, chunksize=chunk) # get keypoints and descriptors results = [] print('Extracting features...') try: for j in jobs: results.append(j) except ValueError: warnings.warn( 'ValueError something about 2d-image. Probably some of the detector input params are wrong.' ) keypts = [itm[0].astype('int') for itm in results] desc = [itm[1] for itm in results] # close the pool print('Closing down the kernels... \n') pool.close() return keypts, desc
def random_sparse_spd_matrix( dim: IntArgType, density: float, chol_entry_min: float = 0.1, chol_entry_max: float = 1.0, random_state: Optional[RandomStateArgType] = None, ) -> np.ndarray: """Random sparse symmetric positive definite matrix. Constructs a random sparse symmetric positive definite matrix for a given degree of sparsity. The matrix is constructed from its Cholesky factor :math:`L`. Its diagonal is set to one and all other entries of the lower triangle are sampled from a uniform distribution with bounds :code:`[chol_entry_min, chol_entry_max]`. The resulting sparse matrix is then given by :math:`A=LL^\\top`. Parameters ---------- dim Matrix dimension. density Degree of sparsity of the off-diagonal entries of the Cholesky factor. Between 0 and 1 where 1 represents a dense matrix. chol_entry_min Lower bound on the entries of the Cholesky factor. chol_entry_max Upper bound on the entries of the Cholesky factor. random_state Random state of the random variable. If None (or np.random), the global :mod:`numpy.random` state is used. If integer, it is used to seed the local :class:`~numpy.random.RandomState` instance. See Also -------- random_spd_matrix : Generate a random symmetric positive definite matrix. Examples -------- >>> from probnum.problems.zoo.linalg import random_sparse_spd_matrix >>> sparsemat = random_sparse_spd_matrix(dim=5, density=0.1, random_state=42) >>> sparsemat array([[1. , 0. , 0. , 0. , 0. ], [0. , 1. , 0. , 0. , 0. ], [0. , 0. , 1. , 0. , 0.24039507], [0. , 0. , 0. , 1. , 0. ], [0. , 0. , 0.24039507, 0. , 1.05778979]]) """ # Initialization random_state = _utils.as_random_state(random_state) if not 0 <= density <= 1: raise ValueError(f"Density must be between 0 and 1, but is {density}.") chol = np.eye(dim) num_off_diag_cholesky = int(0.5 * dim * (dim - 1)) num_nonzero_entries = int(num_off_diag_cholesky * density) if num_nonzero_entries > 0: # Draw entries of lower triangle (below diagonal) according to sparsity level entry_ids = np.mask_indices(n=dim, mask_func=np.tril, k=-1) idx_samples = random_state.choice(a=num_off_diag_cholesky, size=num_nonzero_entries, replace=False) nonzero_entry_ids = (entry_ids[0][idx_samples], entry_ids[1][idx_samples]) # Fill Cholesky factor chol[nonzero_entry_ids] = random_state.uniform( low=chol_entry_min, high=chol_entry_max, size=num_nonzero_entries) return chol @ chol.T
rsnlabels = [] for row in range(0, len(labelinfo), 2): rsnlabels.append(labelinfo[row].split('_')[2]) # exponential fit def func(x, a, b, c): return a * np.exp(-b * x) + c #################################### # tsn vs distance #################################### # underlying structure mask = np.mask_indices(400, np.triu, 1) masked_tsn = tsn[mask] # plot tsn distance scatter plot distance = sklearn.metrics.pairwise_distances(coor) x = distance[mask] y = tsn[mask] data, x_e, y_e = np.histogram2d(x, y, bins=30, density=True) z = interpn((0.5 * (x_e[1:] + x_e[:-1]), 0.5 * (y_e[1:] + y_e[:-1])), data, np.vstack([x, y]).T, method='splinef2d', bounds_error=False)
def minimise_energy(deprot_charges, affinities, xyz, charge, coulomb_only=False, verbose=False): """Minimising function. Iterates through a proton sequence to find the combination that provides the miminal energy. Parameters ---------- deprot_charges : ndarray charge of residue when deprotonated (1xN array) affinities : ndarray proton affinities of each residue (1xN array) xyz : ndarray coordinates for point charges (Nx3) charge : int target charge state coulomb_only : bool whether to only calculate Coulomb energy verbose : bool whether to print results Returns ------- proton_seq : ndarray current best proton sequence after minimisation e_total : float total energy of `proton_seq` after minimisation (Only if `coulomb_only`=False) e_coulomb : float Coulomb energy of `proton_seq` after minimisation e_proton : float binding energy of `proton_seq` after minimisation """ # Initialise local variables proton_seq = moveable_protons(deprot_charges, charge) mask = np.mask_indices(len(proton_seq), np.triu, 1) distances = distance_matrix(xyz, xyz)[mask] if coulomb_only: get_energy = lambda: coulomb_energy(proton_seq + deprot_charges, distances, mask) else: get_energy = lambda: (coulomb_energy(proton_seq + deprot_charges, distances, mask) - binding_energy( proton_seq, affinities)) # Initial energies current_min = get_energy() shunt_min = current_min counters = [time.process_time(), 0, 0] while shunt_min <= current_min: counters[1] += 1 if verbose: print('Shunt={}'.format(counters[1])) shunt_min = get_energy() best_shunt = [0, 0] deprot_sequence = np.where(proton_seq == 0)[0] for p in proton_seq.nonzero()[0]: proton_seq[p] = 0 # For all protonatable sites for d in deprot_sequence: counters[2] += 1 proton_seq[d] = 1 e_tot = get_energy() if verbose: print('Step {}, {:10.2f} kJ/mol'.format( counters[2], e_tot)) if e_tot <= shunt_min: shunt_min = e_tot best_shunt = [p, d] proton_seq[d] = 0 proton_seq[p] = 1 if verbose: print('Shunt {} minimum energy {:.2f} kJ/mol'.format( counters[1], shunt_min)) # Update `proton_seq` to best values if shunt_min >= current_min: e_coulomb = coulomb_energy(proton_seq + deprot_charges, distances, mask) e_proton = binding_energy(proton_seq, affinities) counters[0] = time.process_time() - counters[0] print("Best Sequence\n-------------\n{}".format(proton_seq)) print("Coulomb energy = {:.2f} kJ/mol".format(e_coulomb)) if not coulomb_only: print("Binding energy = {:.2f} kJ/mol".format(e_proton)) print("Total energy = {:.2f} kJ/mol".format(current_min)) print( "Optimisation completed in {:.2f} seconds after {} shunts in a total of {} steps." .format(*counters)) break # Reset `proton_seq` to best sequence to reseed proton_seq[best_shunt[0]] = 0 proton_seq[best_shunt[1]] = 1 current_min = shunt_min return proton_seq, current_min, e_coulomb, e_proton
def mat2arr(mat): """Convert distance matrix to array format.""" arr = mat[np.mask_indices(mat.shape[0], np.tril, -1)] # use tril idx = np.transpose(np.tril_indices(mat.shape[0], -1)) return arr, idx
def firing_correlation(firing_array, baseline_window, stimulus_window, data_step_size=25, shuffle_repeats=100, accumulated=False): """ General function, not bound by object parameters Calculates correlations in 2 windows of a firin_array (defined below) according to either accumulated distance or distance of mean points PARAMS :firing_array: (nrn x trial x time) array of firing rates :baseline_window: Tuple of time in ms of what window to take for BASELINE firing :stimulus_window: Tuple of time in ms of what window to take for STIMULUS firing :data_step_size: Resolution at which the data was binned (if at all) :shuffle repeats: How many shuffle repeats to perform for analysis control :accumulated: If True -> will calculate temporally integrated pair-wise distances between all points If False -> will calculate distance between mean of all points """ # Calculate indices for slicing data baseline_start_ind = int(baseline_window[0] / data_step_size) baseline_end_ind = int(baseline_window[1] / data_step_size) stim_start_ind = int(stimulus_window[0] / data_step_size) stim_end_ind = int(stimulus_window[1] / data_step_size) pre_dat = firing_array[:, :, baseline_start_ind:baseline_end_ind] stim_dat = firing_array[:, :, stim_start_ind:stim_end_ind] if accumulated: # Calculate accumulated pair-wise distances for baseline data pre_dists = np.zeros( (pre_dat.shape[1], pre_dat.shape[1], pre_dat.shape[2])) for time_bin in range(pre_dists.shape[2]): pre_dists[:, :, time_bin] = dist_mat(pre_dat[:, :, time_bin].T, pre_dat[:, :, time_bin].T) sum_pre_dist = np.sum(pre_dists, axis=2) # Calculate accumulated pair-wise distances for post-stimulus data stim_dists = np.zeros( (stim_dat.shape[1], stim_dat.shape[1], stim_dat.shape[2])) for time_bin in range(stim_dists.shape[2]): stim_dists[:, :, time_bin] = dist_mat(stim_dat[:, :, time_bin].T, stim_dat[:, :, time_bin].T) sum_stim_dist = np.sum(stim_dists, axis=2) # Remove lower triangle in correlation to not double count points indices = np.mask_indices(stim_dat.shape[1], np.triu, 1) rho, p = pearsonr(sum_pre_dist[indices], sum_stim_dist[indices]) pre_mat, stim_mat = sum_pre_dist, sum_stim_dist else: # Calculate accumulate pair-wise distances for baseline data mean_pre = np.mean(pre_dat, axis=2) mean_pre_dist = dist_mat(mean_pre.T, mean_pre.T) # Calculate accumulate pair-wise distances for post-stimulus data mean_stim = np.mean(stim_dat, axis=2) mean_stim_dist = dist_mat(mean_stim.T, mean_stim.T) indices = np.mask_indices(stim_dat.shape[1], np.triu, 1) rho, p = pearsonr(mean_pre_dist[indices], mean_stim_dist[indices]) pre_mat, stim_mat = mean_pre_dist, mean_stim_dist rho_sh_vec = np.empty(shuffle_repeats) p_sh_vec = np.empty(shuffle_repeats) for repeat in range(shuffle_repeats): rho_sh_vec[repeat], p_sh_vec[repeat] = pearsonr( np.random.permutation(pre_mat[indices]), stim_mat[indices]) return rho, p, rho_sh_vec, p_sh_vec, pre_mat, stim_mat
import numpy as np iu = np.mask_indices(3, np.triu) a = np.arange(9).reshape(3, 3) a a[iu] iu1 = np.mask_indices(3, np.triu, 1) a[iu1]
def task_fMRI_plots(SBJ, PURE, WL_sec, corr_range): # Define segment data # ------------------- if WL_sec == 30: if PURE == 'pure': seg_df = WL30pure_taskseg_df else: seg_df = WL30_taskseg_df else: if PURE == 'pure': seg_df = WL45pure_taskseg_df else: seg_df = WL45_taskseg_df # Define PURE varaible based on widget # ------------------------------------ if PURE == 'not pure': PURE = '' # Load data with non pure windows # Load task fMRI data # ------------------- file_name = SBJ + '_CTask001_WL0' + str( WL_sec) + '_WS01' + PURE + '_NROI0200_dF.mat' # Data file name data_path = osp.join( '/data/SFIMJGC_HCP7T/PRJ_CognitiveStateDetection02/PrcsData_PNAS2015', SBJ, 'D02_CTask001', file_name) # Path to data data_df = loadmat(data_path)['CB']['snapshots'][0][0] # Read data num_samp = data_df.shape[0] # Save number of samples as a varable # Create sleep segments plots # --------------------------- task_color_map = { 'Rest': 'gray', 'Memory': 'blue', 'Video': 'yellow', 'Math': 'green', 'Inbetween': 'black' } # Color key for task segments seg_x = hv.Segments(seg_df, [ hv.Dimension('start', range=(-10, num_samp - 1.5)), hv.Dimension('start_event', range=(-5, num_samp - 1.5)), 'end', 'end_event' ], 'task').opts(color='task', cmap=task_color_map, line_width=7, show_legend=True) # x axis segments seg_y = hv.Segments(seg_df, [ hv.Dimension('start_event', range=(-10, num_samp - 1.5)), hv.Dimension('start', range=(-5, num_samp - 1.5)), 'end_event', 'end' ], 'task').opts(color='task', cmap=task_color_map, line_width=7, show_legend=False) # y axis segments seg_plot = (seg_x * seg_y).opts(xlabel=' ', ylabel=' ', show_legend=False) # All segments # Compute correlation and distance matrix # --------------------------------------- data_corr = np.corrcoef(data_df) # Correlation matrix data_dist = pairwise_distances(data_df, metric='euclidean') # Distance matrix # Compute distribution of correlation and distance matrix # ------------------------------------------------------- triangle = np.mask_indices(num_samp, np.triu, k=1) # Top triangle mask for matricies corr_freq, corr_edges = np.histogram( np.array(data_corr)[triangle], 100 ) # Compute histogram of top triangle of correlation matrix (100 bars) dist_freq, dist_edges = np.histogram( np.array(data_dist)[triangle], 100) # Compute histogram of top triangle of distance matrix (100 bars) # Create matrix and histogram plots # --------------------------------- corr_img = hv.Image( np.rot90(data_corr), bounds=(-0.5, -0.5, num_samp - 1.5, num_samp - 1.5)).opts( cmap='viridis', colorbar=True, height=300, width=400, title='Correlation Matrix').redim.range(z=corr_range) dist_img = hv.Image(np.rot90(data_dist), bounds=(-0.5, -0.5, num_samp - 1.5, num_samp - 1.5)).opts(cmap='viridis', colorbar=True, height=300, width=400, title='Distance Matrix') corr_his = hv.Histogram( (corr_edges, corr_freq)).opts(xlabel='Correlation', height=300, width=400, title='Correlation Histogram') dist_his = hv.Histogram( (dist_edges, dist_freq)).opts(xlabel='Distance', height=300, width=400, title='Distance Histogram') corr_img_wseg = (corr_img * seg_plot).opts( width=600, height=300, legend_position='right' ) # Overlay task segemnt plot with correlation matrix dist_img_wseg = (dist_img * seg_plot).opts( width=600, height=300, legend_position='right' ) # Overlay task segemnt plot with distance matrix dash = (corr_img_wseg + corr_his + dist_img_wseg + dist_his).opts( opts.Layout(shared_axes=False)).cols(2) # Dashboard of all plots return dash
def rs_fMRI_plots(SBJ, RUN, WL_sec, corr_range): # Load rs fMRI data # ----------------- file_name = SBJ + '_fanaticor_Craddock_T2Level_0200_wl' + str( WL_sec).zfill( 3) + 's_ws002s_' + RUN + '_PCA_vk97.5.swcorr.pkl' # Data file name data_path = osp.join('/data/SFIM_Vigilance/PRJ_Vigilance_Smk02/PrcsData', SBJ, 'D02_Preproc_fMRI', file_name) # Path to data data_df = pd.read_pickle(data_path).T # Read data into pandas data frame num_samp = data_df.shape[0] # Save number of samples as a varable # Load sleep segmenting data # -------------------------- seg_path = osp.join(PRJDIR, 'Data', 'Samika_DSet02', 'Sleep_Segments', SBJ + '_' + RUN + '_WL_' + str(WL_sec) + 'sec_Sleep_Segments.pkl') # Path to segment data seg_df = pd.read_pickle(seg_path) # Load segment data # Compute correlation and distance matrix # --------------------------------------- data_corr = np.corrcoef(data_df) # Correlation matrix data_dist = pairwise_distances(data_df, metric='euclidean') # Distance matrix # Compute distribution of correlation and distance matrix # ------------------------------------------------------- triangle = np.mask_indices(num_samp, np.triu, k=1) # Top triangle mask for matricies corr_freq, corr_edges = np.histogram( np.array(data_corr)[triangle], 100 ) # Compute histogram of top triangle of correlation matrix (100 bars) dist_freq, dist_edges = np.histogram( np.array(data_dist)[triangle], 100) # Compute histogram of top triangle of distance matrix (100 bars) # Create sleep segments plots # --------------------------- sleep_color_map = { 'Wake': 'orange', 'Stage 1': 'yellow', 'Stage 2': 'green', 'Stage 3': 'blue', 'Undetermined': 'gray' } # Color key for sleep staging seg_x = hv.Segments(seg_df, [ hv.Dimension('start', range=(-10, num_samp - 1.5)), hv.Dimension('start_event', range=(-5, num_samp - 1.5)), 'end', 'end_event' ], 'stage').opts(color='stage', cmap=sleep_color_map, line_width=7, show_legend=True) # x axis segments seg_y = hv.Segments(seg_df, [ hv.Dimension('start_event', range=(-10, num_samp - 1.5)), hv.Dimension('start', range=(-5, num_samp - 1.5)), 'end_event', 'end' ], 'stage').opts(color='stage', cmap=sleep_color_map, line_width=7, show_legend=False) # y axis segments seg_plot = (seg_x * seg_y).opts(xlabel=' ', ylabel=' ', show_legend=False) # All segments # Create matrix and histogram plots # --------------------------------- # raterize() fucntion used for big data set corr_img = rasterize( hv.Image(np.rot90(data_corr), bounds=(-0.5, -0.5, num_samp - 1.5, num_samp - 1.5)).opts( cmap='viridis', colorbar=True, title='Correlation Matrix')).redim.range(z=corr_range) dist_img = rasterize( hv.Image(np.rot90(data_dist), bounds=(-0.5, -0.5, num_samp - 1.5, num_samp - 1.5)).opts(cmap='viridis', colorbar=True, title='Distance Matrix')) corr_his = rasterize( hv.Histogram( (corr_edges, corr_freq)).opts(xlabel='Correlation', height=300, width=400, title='Correlation Histogram')) dist_his = rasterize( hv.Histogram((dist_edges, dist_freq)).opts(xlabel='Distance', height=300, width=400, title='Distance Histogram')) corr_img_wseg = (corr_img * seg_plot).opts( width=600, height=300, legend_position='right' ) # Overlay sleep segemnt plot with correlation matrix dist_img_wseg = (dist_img * seg_plot).opts( width=600, height=300, legend_position='right' ) # Overlay sleep segemnt plot with distance matrix dash = (corr_img_wseg + corr_his + dist_img_wseg + dist_his).opts( opts.Layout(shared_axes=False)).cols(2) # Dashboard of all plots return dash
def computeMetrics(infile1, infile2, windowsize, outfile, predictioncelltype, modelcelltype, modelchromosome): #try loading HiC matrices try: hicMatrix1 = hm.hiCMatrix(infile1) hicMatrix2 = hm.hiCMatrix(infile2) except Exception as e: print(e) msg = "Could not load matrices, probably no cooler format" raise SystemExit(msg) #check bin sizes, must be equal / same matrix resolution binSize1 = hicMatrix1.getBinSize() binSize2 = hicMatrix2.getBinSize() if binSize1 != binSize2: msg = "Aborting. Bin sizes not equal.\n" msg += "Bin size 1: {0:d}, bin size 2: {0:d}" msg = msg.format(binSize1, binSize2) raise SystemExit(msg) numberOfDiagonals = int(np.round(windowsize / binSize1)) if numberOfDiagonals < 1: msg = "Window size must be larger than bin size of matrices.\n" msg += "Remember to specify window in basepairs, not bins." raise SystemExit(msg) #check chromosomes chromList1 = hicMatrix1.getChrNames() chromList2 = hicMatrix2.getChrNames() if chromList1 and chromList2: chrom1Str = str(chromList1[0]) chrom2Str = str(chromList2[0]) if chrom1Str != chrom2Str: msg = "Aborting, chromosomes are not the same: {:s} vs. {:s}" msg = msg.format(chrom1Str, chrom2Str) raise SystemExit(msg) if len(chromList1) != 1 or len(chromList2) != 1: msg = "Warning, more than one chromosome in the matrix\n" msg += "Consider using e.g. hicAdjustMatrix with --keep on the desired chromosome.\n" msg += "Only taking the first chrom, {:s}" msg = msg.format(chrom1Str) else: msg = "Aborting, no chromosomes found in matrix" raise SystemExit(msg) sparseMatrix1 = hicMatrix1.matrix sparseMatrix2 = hicMatrix2.matrix shape1 = sparseMatrix1.shape shape2 = sparseMatrix2.shape if shape1 != shape2: msg = "Aborting. Shapes of matrices are not equal.\n" msg += "Shape 1: ({:d},{:d}); Shape 2: ({:d},{:d})" msg = msg.format(shape1[0], shape1[1], shape2[0], shape2[1]) raise SystemExit(msg) if numberOfDiagonals > shape1[0] - 1: msg = "Aborting. Window size {0:d} larger than matrix size {:d}" msg = msg.format(numberOfDiagonals, shape1[0] - 1) raise SystemExit(msg) trapezIndices = np.mask_indices(shape1[0], maskFunc, k=numberOfDiagonals) reads1 = np.array(sparseMatrix1[trapezIndices])[0] reads2 = np.array(sparseMatrix2[trapezIndices])[0] matrixDf = pd.DataFrame( columns=['first', 'second', 'distance', 'reads1', 'reads2']) matrixDf['first'] = np.uint32(trapezIndices[0]) matrixDf['second'] = np.uint32(trapezIndices[1]) matrixDf['distance'] = np.uint32(matrixDf['second'] - matrixDf['first']) matrixDf['reads1'] = np.float32(reads1) matrixDf['reads2'] = np.float32(reads2) matrixDf.fillna(0, inplace=True) pearsonAucIndices, pearsonAucValues = getCorrelation( matrixDf, 'distance', 'reads1', 'reads2', 'pearson') pearsonAucScore = metrics.auc(pearsonAucIndices, pearsonAucValues) spearmanAucIncides, spearmanAucValues = getCorrelation( matrixDf, 'distance', 'reads1', 'reads2', 'spearman') spearmanAucScore = metrics.auc(spearmanAucIncides, spearmanAucValues) corrScoreOPredicted_Pearson = matrixDf[['reads1','reads2']].corr(method= \ 'pearson').iloc[0::2,-1].values[0] corrScoreOPredicted_Spearman= matrixDf[['reads1', 'reads2']].corr(method= \ 'spearman').iloc[0::2,-1].values[0] print("PearsonAUC", pearsonAucScore) print("SpearmanAUC", spearmanAucScore) columns = getResultFileColumnNames(sorted(list( matrixDf.distance.unique()))) resultsDf = pd.DataFrame(columns=columns) resultsDf.set_index('Tag', inplace=True) tag = 'xxx' resultsDf.loc[tag, 'R2'] = metrics.r2_score(matrixDf['reads2'], matrixDf['reads1']) resultsDf.loc[tag, 'MSE'] = metrics.mean_squared_error(matrixDf['reads2'], matrixDf['reads1']) resultsDf.loc[tag, 'MAE'] = metrics.mean_absolute_error(matrixDf['reads2'], matrixDf['reads1']) resultsDf.loc[tag, 'MSLE'] = metrics.mean_squared_log_error( matrixDf['reads2'], matrixDf['reads1']) resultsDf.loc[tag, 'AUC_OP_P'] = pearsonAucScore resultsDf.loc[tag, 'AUC_OP_S'] = spearmanAucScore resultsDf.loc[tag, 'S_OP'] = corrScoreOPredicted_Spearman resultsDf.loc[tag, 'P_OP'] = corrScoreOPredicted_Pearson resultsDf.loc[tag, 'resolution'] = binSize1 resultsDf.loc[tag, 'modelChromosome'] = modelchromosome resultsDf.loc[tag, 'modelCellType'] = modelcelltype resultsDf.loc[tag, 'predictionChromosome'] = chrom1Str resultsDf.loc[tag, 'predictionCellType'] = predictioncelltype for i, pearsonIndex in enumerate(pearsonAucIndices): columnName = int(round(pearsonIndex * matrixDf.distance.max())) resultsDf.loc[tag, columnName] = pearsonAucValues[i] resultsDf = resultsDf.sort_values(by=['predictionCellType','predictionChromosome', 'modelCellType','modelChromosome', 'conversion',\ 'Window','Merge', 'normalize']) resultsDf.to_csv(outfile)
def __init__(self, SS_dimensions, supervisor_action_mask, configuration, H=None): # Extract configuration parameters # Define method to be used in Q-learning. Either Value Iteration (VI) or Policy Iteration (PI) self.method = configuration.method # Learning parameters self.gamma = configuration.learning_rate # Sample factor self.sample_factor = configuration.sample_factor # State Space System dimensions self.n, self.m, self.p = SS_dimensions # Define action mask of agent (i.e. which inputs it controls) self.action_mask = np.ones(self.m).astype('bool') # Adapt agent characteristics if a supervisor is present if supervisor_action_mask is not None: # Correct value of m, if a supervisor is controlling some inputs self.m -= np.sum(supervisor_action_mask) # Create action mask self.action_mask = np.invert(supervisor_action_mask) # Number of samples collected for updating H min_num_samples = (self.n + self.m + self.p) * (self.n + self.m + self.p + 1) / 2 self.num_samples = int(self.sample_factor * min_num_samples) # Initialise Z vector self.Z_shape = (self.n + self.m + self.p, 1) self.Z_k = None self.Z_k_excite = None self.Z_k_1 = None self.Z_k_1_excite = None self.Z_kron_mask = np.mask_indices(self.n + self.m + self.p, np.triu) # Create saturation of action amplitude self.action_bounds = configuration.action_bounds self.min_input = self.action_bounds[:, 0].reshape( (self.action_mask.shape[0], 1)) self.max_input = self.action_bounds[:, 1].reshape( (self.action_mask.shape[0], 1)) # Kernel initialisation self.H_shape = ((self.n + self.m + self.p), (self.n + self.m + self.p)) self.num_independent_vals_H = int( (self.n + self.m + self.p) * (self.n + self.m + self.p + 1) / 2) self.H_j = None self.H_j_vector = None # Stabilising control gain initialisation self.policy_parameters = None # Initialise agent cost self.cost_k = None self.cost_k_1 = None # Compute initial H and K1 matrices if H is not None: self.H_j = H self.policy_parameters = self.calculate_K1() else: self.define_H_K1() self.H_j_vector = self.H_to_H_1D() # Create array to store all kernel values self.H_storage = np.copy(self.H_j_vector) self.Q_storage = np.zeros((1, 0)) self.u_excite_storage = None # Define sample collectors # --> Policy Iteration self.regr_m_PI = None self.regr_v_PI = None # --> Value Iteration self.regr_m_VI = None self.regr_v_VI = None # Reset all sample collector arrays self.reset_sample_collector_arrays() return
def getFeatures(self, mask=False, origin=[0, 0], win_size=0, processes=1): """ This is a Method that returns features (keypoints and descriptors) that are obtained by using the FeatureExtractor.Detector object. Parameters ---------- processors : int, optional Number of processors to use, default = 1. mask : boolean, optional, default False. Whether to use Returns ------- keypts : keypoints descs : descriptors """ detector = self.detector dset = self.data lib = self.lib if mask: def mask_func(x, winSize): x[origin[0] - winSize / 2: origin[0] + winSize / 2, origin[1] - winSize / 2: origin[1] + winSize / 2] = 2 x = x - 1 return x mask_ind = np.mask_indices(dset.shape[-1], mask_func, win_size) self.data = np.array([imp[mask_ind].reshape(win_size, win_size) for imp in dset]) # detect and compute keypoints def detect(image): if lib == 'opencv': image = (image - image.mean()) / image.std() image = image.astype('uint8') k_obj, d_obj = detector.detectAndCompute(image, None) keypts, descs = pickle_keypoints(k_obj), pickle_keypoints(d_obj) elif lib == 'skimage': imp = (image - image.mean()) / np.std(image) imp[imp < 0] = 0 imp.astype('float32') detector.detect_and_extract(imp) keypts, descs = detector.keypoints, detector.descriptors return keypts, descs # start pool of workers if processes > 1: print('launching %i kernels...' % processes) pool = mp.Pool(processes) tasks = [imp for imp in self.data] chunk = int(self.data.shape[0] / processes) jobs = pool.imap(detect, tasks, chunksize=chunk) # get keypoints and descriptors results = [] print('Extracting features...') try: for j in jobs: results.append(j) except ValueError: warnings.warn('ValueError something about 2d-image. Probably some of the detector ' 'input params are wrong.') # close the pool print('Closing down the kernels... \n') pool.close() else: results = [detect(imp) for imp in self.data] # get keypoints and descriptors keypts = [itm[0].astype('int') for itm in results] desc = [itm[1] for itm in results] return keypts, desc
# P2.1 产生索引数组 arr3: np.ndarray = arr1.reshape((3, 3)) # where函数,条件为True,则用x中的值;为False,则用y中的值。(Return elements chosen from x or y depending on condition.) arr3_res = np.where(arr3 > 3, arr3, np.zeros_like(arr3)) print(arr3_res) # 使用diag_indices获取对角线的索引;ndim可设置维度diag_indices print(np.diag_indices(3)) print(arr3[np.diag_indices(3)]) # diag_indices_from函数,根据数组来取得对角线的索引位置 print(np.diag_indices_from(arr3)) # 使用mask_indices函数,获取上三角的索引位 print(np.mask_indices(3, np.tril)) # 获取上或下三角索引位的专用函数 print(np.tril_indices(3)) print(np.tril_indices_from(arr3)) print(np.triu_indices(3)) print(np.triu_indices_from(arr3)) # P2.2 类似索引的操作(Indexing-like operations) # take函数,先将数组arr3展平,然后根据index中的索引值取数,最后形成和index数组同样类型的数组 print(np.take(arr3, np.arange(0, 4))) # compress函数,根据条件从中筛选出数据;可指定筛选的轴 arr4 = np.compress([0, 1], arr3) # 如果不指定axis的值,则先将数组展平,再筛选 print(arr4)