def _find_contacts(fragments, cutoff): """Raw version to return indices of touching fragments Parameters ---------- fragments : list of AtomGroup molecules to consider cutoff : float threshold for touching or not Returns ------- frag_idx : numpy array, shape (n, 2) indices of fragments that are touching, e.g. [[0, 1], [2, 3], ...] """ # indices of atoms within cutoff of each other # TODO: ALso change this line once distances not returned idx, _ = distances.self_capped_distance( sum(fragments).positions, max_cutoff=cutoff, box=fragments[0].dimensions, # TODO: add this back once MDA cuts release #return_distances=False, ) nfrags = len(fragments) fragsizes = [len(f) for f in fragments] # translation array from atom index to fragment index translation = np.repeat(np.arange(nfrags), fragsizes) # this array now holds pairs of fragment indices fragidx = translation[idx] # remove self contributions (i==j) and don't double count (i<j) fragidx = fragidx[fragidx[:, 0] < fragidx[:, 1]] return fragidx
def get_distances(structure: Structure): df = structure.to_dataframe() pairs, distances = self_capped_distance(df[["atom_x", "atom_y", "atom_z"]].values, max_cutoff=5, min_cutoff=1) pairs.sort(axis=1) assert pairs.max() < len(df) return df, pairs, distances
def compute_clusters(): to_group = SYSTEM.select_atoms("resname TO") center_of_masses = to_group.center_of_mass( compound="residues") #Center of mass by residues neighbors = distances.self_capped_distance( center_of_masses, ARGS.threshold, box=SYSTEM.dimensions) #Compute neighborhood with TO center of masses formated_neighbors = format_neighborhood(neighbors[0]) clusters = clustering(formated_neighbors, to_group.n_residues) return clusters
def getContactsC(selection, numNodes, nAtoms, cutoffDist, tmpDists, tmpDistsAtms, contactMat, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux, distMode=MODE_ALL): '''Executes MDAnalysis atom distance calculation and node contact detection. This function is Cython compiled as a wrapper for two optimized distance calculation and contact determination calls. The first is MDAnalysis' `self_distance_array`. The second is the internal :py:func:`calcContactC`. All results are stored in pre-allocated NumPy arrays. Args: selection (str) : Atom selection for the system being analyzed. numNodes (int): Number of nodes in the system. nAtoms (int) : Number of atoms in atom groups represented by system nodes. Usually hydrogen atoms are not included in contact detection, and are not present in atom groups. cutoffDist (float) : Distance at which atoms are no longer considered 'in contact'. tmpDists (obj) : Temporary pre-allocated NumPy array with atom distances. This is the result of MDAnalysis `self_distance_array` calculation. tmpDistsAtms (obj) : Temporary pre-allocated NumPy array to store the shortest distance between atoms in different nodes. contactMat (obj) : Pre-allocated NumPy matrix where node contacts will be stored. atomToNode (obj) : NumPy array that maps atoms in atom groups to their respective nodes. nodeGroupIndicesNP (obj) : NumPy array with atom indices for all atoms in each node group. nodeGroupIndicesNPAux (obj) : Auxiliary NumPy array with the indices of the first atom in each atom group, as listed in `nodeGroupIndicesNP`. ''' if distMode == MODE_ALL: # serial vs OpenMP mdadist.self_distance_array(selection.positions, result=tmpDists, backend='openmp') if distMode == MODE_CAPPED: # method options are: 'bruteforce' 'nsgrid' 'pkdtree' pairs, distances = mdalibdist.self_capped_distance(selection.positions, max_cutoff=cutoffDist, min_cutoff=None, box=None, method='pkdtree', return_distances=True) for k, [i, j] in enumerate(pairs): # Go from 2D node indices to 1D (nAtoms*(nAtoms-1)/2) indices: ijLI = getLinIndexC(i, j, nAtoms) tmpDists[ ijLI ] = distances[k] calcContactC(numNodes, nAtoms, cutoffDist, tmpDists, tmpDistsAtms, contactMat, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux)
def run(self): _sel = self.u.select_atoms(self.sidechain_sel) _hc_matrix = np.zeros((self.nres, self.nres), int) for ts in self.u.trajectory[self.start:self.stop:self.stride]: hydroDists, _ = self_capped_distance(_sel.center_of_mass(compound='residues', pbc=True), max_cutoff=self.hc_cutoff, box=_sel.dimensions) for pair in hydroDists: res1 = _sel.residues[pair[0]].resindex res2 = _sel.residues[pair[1]].resindex _hc_matrix[res1, res2] += 1 if res1 != res2: _hc_matrix[res2, res1] += 1 normed_hc_matrix = (_hc_matrix) * 100 / self.nframes np.savetxt(self.hc_file, normed_hc_matrix) return normed_hc_matrix
def calcDistances(selection, numNodes, nAtoms, atomToNode, cutoffDist, nodeGroupIndicesNP, nodeGroupIndicesNPAux, nodeDists, backend="serial", distMode=MODE_ALL, verbose=0): '''Executes MDAnalysis atom distance calculation and node cartesian distance calculation. This function is a wrapper for two optimized atomic distance calculation and node distance calculation calls. The first is one of MDAnalysis' atom distance calculation functions (either `self_distance_array` or `self_capped_distance`). The second is the internal :py:func:`atmToNodeDist`. All results are stored in pre-allocated NumPy arrays. This is intended as an analysis tool to allow the comparison of network distances and cartesian distances. It is similar to :py:func:`getContactsC`, which is optimized for contact detection. Args: selection (str) : Atom selection for the system being analyzed. numNodes (int): Number of nodes in the system. nAtoms (int) : Number of atoms in atom groups represented by system nodes. Usually hydrogen atoms are not included in contact detection, and are not present in atom groups. atomToNode (obj) : NumPy array that maps atoms in atom groups to their respective nodes. cutoffDist (float): Distance cutoff used to capp distance calculations. nodeGroupIndicesNP (obj) : NumPy array with atom indices for all atoms in each node group. nodeGroupIndicesNPAux (obj) : Auxiliary NumPy array with the indices of the first atom in each atom group, as listed in `nodeGroupIndicesNP`. nodeDists (obj) : Pre-allocated array to store cartesian distances. backend (str) : Controls how MDAnalysis will perform its distance calculations. Options are `serial` and `openmp`. This option is ignored if the ditance mode is not "all". distMode (str): Distance calculation method. Options are 0 (for mode "all") and 1 (for mode "capped"). verbose (int): Controls informational output. ''' if verbose: print("There are {} nodes and {} atoms in this system.".format(numNodes, nAtoms)) if distMode == MODE_ALL: if verbose: print("creating array with {} elements...".format(int(nAtoms*(nAtoms-1)/2))) start = timer() tmpDists = np.zeros( int(nAtoms*(nAtoms-1)/2), dtype=np.float64 ) if verbose: end = timer() print("Time for matrix:", timedelta(seconds=end-start)) if verbose: print("running self_distance_array...") start = timer() # serial vs OpenMP mdadist.self_distance_array(selection.positions, result=tmpDists, backend=backend) if verbose: end = timer() print("Time for contact calculation:", timedelta(seconds=end-start)) if distMode == MODE_CAPPED: if verbose: print("creating array with {} elements...".format(int(nAtoms*(nAtoms-1)/2))) start = timer() tmpDists = np.full( int(nAtoms*(nAtoms-1)/2), cutoffDist*2, dtype=float ) if verbose: end = timer() print("Time for matrix:", timedelta(seconds=end-start)) if verbose: print("running self_capped_distance...") start = timer() # method options are: 'bruteforce' 'nsgrid' 'pkdtree' pairs, distances = mdalibdist.self_capped_distance(selection.positions, max_cutoff=cutoffDist, min_cutoff=None, box=None, method='pkdtree', return_distances=True) if verbose: end = timer() print("Time for contact calculation:", timedelta(seconds=end-start)) print("Found {} pairs and {} distances".format(len(pairs), len(distances)) ) if verbose: print("loading distances in array...") start = timer() if verbose > 1: startLoop = timer() for k in range(len(pairs)): i,j = pairs[k] if verbose > 1: if not k % 1000: print("Loaded {} distances.".format(k)) print("Time for {} distances: {}".format(k, timedelta(seconds=timer()-startLoop))) startLoop = timer() # Go from 2D node indices to 1D (numNodes*(numNodes-1)/2) indices: ijLI = getLinIndexNumba(i, j, nAtoms) tmpDists[ ijLI ] = distances[k] if verbose: end = timer() print("Time for loading distances:", timedelta(seconds=end-start)) print("running atmToNodeDist...") start = timer() # Translate atoms distances in minimum node distance. atmToNodeDist(numNodes, nAtoms, tmpDists, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux, nodeDists) if verbose: end = timer() print("Time for atmToNodeDist:", timedelta(seconds=end-start))