def process(self): proj = self.calphas.positions + \ 1.5*(self.cbetas.positions-self.calphas.positions) mddist.self_distance_array(proj, box=self.processor.currbox, result=self.tempmat) self.contacts += self.tempmat
def get_dist_mda(n=5): u = mda.Universe("tutorial/data/drd3_gi_pd.pdb") sel = u.select_atoms("name N") for i in range(n): for j in range(n): distances.self_distance_array(sel.positions) h.heap().dump("benchmarking/heaps/3_mda.out")
def contact_maps_from_traj(pdb_file, traj_file, savefile, contact_cutoff=8.0): """ Get contact map from trajectory. """ mda_traj = mda.Universe(pdb_file, traj_file) traj_length = len(mda_traj.trajectory) nloops = int( brute(best_loop, (loop_range, ), args=(traj_length, size), finish=None)) print("traj_length: %d nloop: %d" % (traj_length, nloops)) write_freq = nloops // 5 ca = mda_traj.select_atoms('name CA') dist_shape = distances.self_distance_array(ca.positions).shape[0] if rank == 0: savefile = os.path.abspath(savefile) outfile = tables.open_file(savefile, 'w') atom = tables.Int8Atom() cm_table = outfile.create_earray(outfile.root, 'contact_maps', atom, shape=(0, dist_shape)) print("dist_shape ", dist_shape) contact_matrices = [] # workaround mpi4py 2^32 limit on number of objects # and ib memory size limit for loop in range(nloops): contact_matrices_loop = [] nframes = traj_length // (size * nloops) start = (rank + loop * size) * nframes end = (rank + 1 + loop * size) * nframes if loop == nloops - 1 and rank == size - 1: end = traj_length print("loop %d rank %d start %d end %d" % (loop, rank, start, end)) for frame in mda_traj.trajectory[start:end]: cm_matrix = (distances.self_distance_array(ca.positions) < contact_cutoff) * 1.0 contact_matrices_loop.append(cm_matrix.astype('int8')) print("rank %d cm size %d" % (rank, len(contact_matrices_loop))) contact_matrices_loop = comm.gather(contact_matrices_loop, root=0) if rank == 0: contact_matrices.append( list(chain.from_iterable(contact_matrices_loop))) print("loop %d " % loop, len(contact_matrices_loop), len(contact_matrices_loop[0])) if (loop + 1) % write_freq == 0: contact_matrices = list(chain.from_iterable(contact_matrices)) cm_table.append(contact_matrices) contact_matrices = [] comm.Barrier() if rank == 0: if len(contact_matrices) > 0: contact_matrices = list(chain.from_iterable(contact_matrices)) cm_table.append(contact_matrices) outfile.close()
def time_self_distance_array(self, num_atoms): """Benchmark calculation of all distances within a single numpy array of coordinates using default parameters to self_distance_array. """ distances.self_distance_array(reference=self.coords_1, box=None, result=None, backend='serial')
def time_self_distance_array_pre_allocated(self, num_atoms): """Benchmark calculation of all distances within a single numpy array of coordinates using self_distance_array with preallocated result array. """ distances.self_distance_array(reference=self.coords_1, box=None, result=self.allocated_array_1D, backend='serial')
def contact_maps_from_traj(pdb_file, traj_file, contact_cutoff=8.0, savefile=None): """ Get contact map from trajectory. """ mda_traj = mda.Universe(pdb_file, traj_file) traj_length = len(mda_traj.trajectory) ca = mda_traj.select_atoms('name CA') if savefile: savefile = os.path.abspath(savefile) outfile = tables.open_file(savefile, 'w') atom = tables.Float64Atom() cm_table = outfile.create_earray(outfile.root, 'contact_maps', atom, shape=(traj_length, 0)) contact_matrices = [] for frame in mda_traj.trajectory: cm_matrix = (distances.self_distance_array(ca.positions) < contact_cutoff) * 1.0 contact_matrices.append(cm_matrix) if savefile: cm_table.append(contact_matrices) outfile.close() return contact_matrices
def get_distmatrix(self,**kwargs): """Generates a distance matrix using the self_distance_array function of MDAnalysis.""" self._dists = distanal.self_distance_array(self.coords,**kwargs) # No PBC necessary if # trajectory already wrapped self.distmatx = self._gen_matrix()
def getContactsC(selection, numNodes, nAtoms, cutoffDist, tmpDists, tmpDistsAtms, contactMat, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux, distMode=MODE_ALL): '''Executes MDAnalysis atom distance calculation and node contact detection. This function is Cython compiled as a wrapper for two optimized distance calculation and contact determination calls. The first is MDAnalysis' `self_distance_array`. The second is the internal :py:func:`calcContactC`. All results are stored in pre-allocated NumPy arrays. Args: selection (str) : Atom selection for the system being analyzed. numNodes (int): Number of nodes in the system. nAtoms (int) : Number of atoms in atom groups represented by system nodes. Usually hydrogen atoms are not included in contact detection, and are not present in atom groups. cutoffDist (float) : Distance at which atoms are no longer considered 'in contact'. tmpDists (obj) : Temporary pre-allocated NumPy array with atom distances. This is the result of MDAnalysis `self_distance_array` calculation. tmpDistsAtms (obj) : Temporary pre-allocated NumPy array to store the shortest distance between atoms in different nodes. contactMat (obj) : Pre-allocated NumPy matrix where node contacts will be stored. atomToNode (obj) : NumPy array that maps atoms in atom groups to their respective nodes. nodeGroupIndicesNP (obj) : NumPy array with atom indices for all atoms in each node group. nodeGroupIndicesNPAux (obj) : Auxiliary NumPy array with the indices of the first atom in each atom group, as listed in `nodeGroupIndicesNP`. ''' if distMode == MODE_ALL: # serial vs OpenMP mdadist.self_distance_array(selection.positions, result=tmpDists, backend='openmp') if distMode == MODE_CAPPED: # method options are: 'bruteforce' 'nsgrid' 'pkdtree' pairs, distances = mdalibdist.self_capped_distance(selection.positions, max_cutoff=cutoffDist, min_cutoff=None, box=None, method='pkdtree', return_distances=True) for k, [i, j] in enumerate(pairs): # Go from 2D node indices to 1D (nAtoms*(nAtoms-1)/2) indices: ijLI = getLinIndexC(i, j, nAtoms) tmpDists[ ijLI ] = distances[k] calcContactC(numNodes, nAtoms, cutoffDist, tmpDists, tmpDistsAtms, contactMat, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux)
def _report_contact_maps(self, simulation, state, ca_positions): # TODO: http://docs.h5py.org/en/stable/faq.html # h5py supported integer types: 1, 2, 4 or 8 byte, BE/LE, signed/unsigned. # store as 1 byte int contact_map = (distances.self_distance_array(ca_positions) < 8.) * 1. self._cm_dset.resize(self._cm_dset.shape[1] + 1, axis=0) self._cm_dset[:, -1] = contact_map
def report(self, simulation, state): ca_indices = [] for atom in simulation.topology.atoms(): if atom.name == 'CA': ca_indices.append(atom.index) positions = np.array(state.getPositions().value_in_unit(u.angstrom)) time = int(np.round(state.getTime().value_in_unit(u.picosecond))) positions_ca = positions[ca_indices].astype(np.float32) distance_matrix = distances.self_distance_array(positions_ca) contact_map = contacts.contact_matrix(distance_matrix, radius=8.0) * 1.0 self._out.create_dataset(str(time), data=contact_map)
def get_rescontacts(protein, cutoff=2): n = len(protein) self_distances = distances.self_distance_array(protein.positions) sq_dist_arr = np.zeros((n, n), dtype=np.float32) triu = np.triu_indices_from(sq_dist_arr, k=1) sq_dist_arr[triu] = self_distances sq_dist_arr.T[triu] = self_distances contacts = np.where(sq_dist_arr < cutoff, 1, 0) df = pd.DataFrame(zip(protein.resids, contacts.sum(axis=0)), columns=('resid', 'contacts')) rescontacts = df.groupby('resid').sum().reset_index() return rescontacts
def report(self, simulation, state): ca_indices = [ atom.index for atom in simulation.topology.atoms() if atom.name == 'CA' ] positions = np.array(state.getPositions().value_in_unit(u.angstrom)) positions_ca = positions[ca_indices].astype(np.float32) distance_matrix = distances.self_distance_array(positions_ca) contact_map = (distance_matrix < 8.0) * 1.0 new_shape = (len(contact_map), self._out.shape[1] + 1) self._out.resize(new_shape) self._out[:, new_shape[1] - 1] = contact_map self._file.flush()
def run2d_frame(self, ts, *args): ts if args: g1_pos = args[0] g2_pos = args[1] if np.all(g1_pos == g2_pos): self.self_rdf = True else: g1_pos = self.g1.positions g2_pos = self.g2.positions nA = len(g1_pos) nB = len(g2_pos) N = nA * nB if N == 0: return np.zeros(len(self.bins)) area = (ts.dimensions[0] * ts.dimensions[1]) / 100 density = N / area g1_pos[:, 2] = 0.0 g2_pos[:, 2] = 0.0 if self.self_rdf: td = self_distance_array(g1_pos, box=ts.dimensions) / 10 d = np.append(td, td) else: d = distance_array(g1_pos, g2_pos, box=ts.dimensions) / 10 #if self.self_rdf: # #np.fill_diagonal(d, self.rmax + 1) # #if self.mask_array is None: # nmol = int(nA/self.mask) #update mask_array as No. atoms can change with time # mask_array = np.kron(np.eye(nmol, dtype=int), self.single_mask_array) # d += mask_array count = np.histogram(d, **self.rdf_settings)[0] count = count.astype(np.float64) rdf = count / density / self.shell_area return rdf
def run_frame(self, ts, *args): ts if args: g1_pos = args[0] g2_pos = args[1] if np.all(g1_pos == g2_pos): self.self_rdf = True else: g1_pos = self.g1.positions g2_pos = self.g2.positions nA = len(g1_pos) nB = len(g2_pos) N = nA * nB if N == 0: return np.zeros(len(self.bins)) vol = ts.volume / np.power(10, 3) density = N / vol if self.self_rdf: td = self_distance_array(g1_pos, box=ts.dimensions) / 10 d = np.append(td, td) else: d = distance_array(g1_pos, g2_pos, box=ts.dimensions) / 10 #if self.self_rdf: # #np.fill_diagonal(d, self.rmax + 1) # #if self.mask_array is None: # nmol = int(nA/self.mask) # mask_array = np.kron(np.eye(nmol, dtype=int), self.single_mask_array) # d += mask_array count = np.histogram(d, **self.rdf_settings)[0] count = count.astype(np.float64) rdf = count / density / self.shell_vol return rdf
def mda_to_nx(mda_atoms, cutoff=8): """ covert a mdanalysis atom group to a graph """ G = nx.Graph() # getting node attributes node_list = [] for i, atom in enumerate(mda_atoms): node_list.append((i, { "ID": atom.id, "Name": atom.name, "Mass": atom.mass, "resname": atom.resname, "resnum": atom.resnum })) G.add_nodes_from(node_list) # getting edgy properties dist = triu_to_full(distances.self_distance_array(mda_atoms.positions)) edges = np.where(dist < cutoff) edges = [(i, j, 1 / dist[i, j]) for i, j in zip(*edges)] G.add_weighted_edges_from(edges) return G
def get_dist_mda(): u = mda.Universe("data/two_h2o.pdb") i = 0 while i < 100: print(distances.self_distance_array(u.trajectory.ts.positions)) i += 1
import MDAnalysis as md import MDAnalysis.analysis.distances as dist import numpy as np import os SCFs = os.listdir('../') SCFs = filter(lambda x: x.startswith('SCF'),SCFs) SCFs = filter(lambda x: x.endswith('.pdb'),SCFs) todos = [] order = [] for pdb in SCFs: u = md.Universe('../'+pdb) distances = dist.self_distance_array(u.atoms.positions) todos.append(distances.max()) order.append(pdb) index = np.where(todos == max(todos))[0] print('structure with max distance:',order[index]) np.save('max_distances',todos) print('max distance',max(todos))
# protein_ca = mda_traj.select_atoms('protein and name CA') for pdb_file, traj_file in tqdm(zip(pdb_files, dcd_files)): # mda_traj = mda.Universe(pdb_file, dcd) try: mda_traj = mda.Universe(pdb_file, traj_file) except OSError: failed += [pdb_file] continue protein_ca = mda_traj.select_atoms('protein and name CA') # label = os.path.basename(os.path.dirname(pdb)).split('_')[2] # label_kinds.add(label) for _ in mda_traj.trajectory[::10]: contact_map = triu_to_full( (distances.self_distance_array(protein_ca.positions) < 8.0) * 1) contact_maps.append(contact_map) # labels.append(len(label_kinds)-1) print("failed MD cases: ", failed) contact_maps = np.array(contact_maps) # padding if odd dimension occurs in image padding = 4 pad_f = lambda x: (0, 0) if x % padding == 0 else (0, padding - x % padding) padding_buffer = [(0, 0)] for x in contact_maps.shape[1:]: padding_buffer.append(pad_f(x)) contact_maps = np.pad(contact_maps, padding_buffer, mode='constant') print(contact_maps.shape)
CA = [CA[nCA * i:nCA * (i + 1)] for i in range(nchain)] nframe = len(u.trajectory) intra = np.zeros((nframe, nCA - 1), dtype=float) n = np.zeros((nframe, nCA - 1), dtype=int) chaindist = [] for i in range(nCA - 1, 0, -1): for j in range(i): chaindist.append(j) chaindist = np.array(chaindist) matsize = len(chaindist) t = 0 for frame in u.trajectory: for ichain in range(nchain): mat = distances.self_distance_array(CA[ichain].positions, box=u.dimensions, backend='OpenMP') for i in range(matsize): intra[t, chaindist[i]] += mat[i] n[t, chaindist[i]] += 1 sys.stdout.write('\rFrame %d' % t) t += 1 intra = np.divide(intra, n) mean = np.mean(intra, axis=0) err = np.std(intra, axis=0) #/np.sqrt(intra.shape[0]) nd = range(1, len(mean) + 1) np.savetxt(sys.argv[3], zip(nd, mean, err), fmt=['%d', '%.6f', '%.6f'])
def calcDistances(selection, numNodes, nAtoms, atomToNode, cutoffDist, nodeGroupIndicesNP, nodeGroupIndicesNPAux, nodeDists, backend="serial", distMode=MODE_ALL, verbose=0): '''Executes MDAnalysis atom distance calculation and node cartesian distance calculation. This function is a wrapper for two optimized atomic distance calculation and node distance calculation calls. The first is one of MDAnalysis' atom distance calculation functions (either `self_distance_array` or `self_capped_distance`). The second is the internal :py:func:`atmToNodeDist`. All results are stored in pre-allocated NumPy arrays. This is intended as an analysis tool to allow the comparison of network distances and cartesian distances. It is similar to :py:func:`getContactsC`, which is optimized for contact detection. Args: selection (str) : Atom selection for the system being analyzed. numNodes (int): Number of nodes in the system. nAtoms (int) : Number of atoms in atom groups represented by system nodes. Usually hydrogen atoms are not included in contact detection, and are not present in atom groups. atomToNode (obj) : NumPy array that maps atoms in atom groups to their respective nodes. cutoffDist (float): Distance cutoff used to capp distance calculations. nodeGroupIndicesNP (obj) : NumPy array with atom indices for all atoms in each node group. nodeGroupIndicesNPAux (obj) : Auxiliary NumPy array with the indices of the first atom in each atom group, as listed in `nodeGroupIndicesNP`. nodeDists (obj) : Pre-allocated array to store cartesian distances. backend (str) : Controls how MDAnalysis will perform its distance calculations. Options are `serial` and `openmp`. This option is ignored if the ditance mode is not "all". distMode (str): Distance calculation method. Options are 0 (for mode "all") and 1 (for mode "capped"). verbose (int): Controls informational output. ''' if verbose: print("There are {} nodes and {} atoms in this system.".format(numNodes, nAtoms)) if distMode == MODE_ALL: if verbose: print("creating array with {} elements...".format(int(nAtoms*(nAtoms-1)/2))) start = timer() tmpDists = np.zeros( int(nAtoms*(nAtoms-1)/2), dtype=np.float64 ) if verbose: end = timer() print("Time for matrix:", timedelta(seconds=end-start)) if verbose: print("running self_distance_array...") start = timer() # serial vs OpenMP mdadist.self_distance_array(selection.positions, result=tmpDists, backend=backend) if verbose: end = timer() print("Time for contact calculation:", timedelta(seconds=end-start)) if distMode == MODE_CAPPED: if verbose: print("creating array with {} elements...".format(int(nAtoms*(nAtoms-1)/2))) start = timer() tmpDists = np.full( int(nAtoms*(nAtoms-1)/2), cutoffDist*2, dtype=float ) if verbose: end = timer() print("Time for matrix:", timedelta(seconds=end-start)) if verbose: print("running self_capped_distance...") start = timer() # method options are: 'bruteforce' 'nsgrid' 'pkdtree' pairs, distances = mdalibdist.self_capped_distance(selection.positions, max_cutoff=cutoffDist, min_cutoff=None, box=None, method='pkdtree', return_distances=True) if verbose: end = timer() print("Time for contact calculation:", timedelta(seconds=end-start)) print("Found {} pairs and {} distances".format(len(pairs), len(distances)) ) if verbose: print("loading distances in array...") start = timer() if verbose > 1: startLoop = timer() for k in range(len(pairs)): i,j = pairs[k] if verbose > 1: if not k % 1000: print("Loaded {} distances.".format(k)) print("Time for {} distances: {}".format(k, timedelta(seconds=timer()-startLoop))) startLoop = timer() # Go from 2D node indices to 1D (numNodes*(numNodes-1)/2) indices: ijLI = getLinIndexNumba(i, j, nAtoms) tmpDists[ ijLI ] = distances[k] if verbose: end = timer() print("Time for loading distances:", timedelta(seconds=end-start)) print("running atmToNodeDist...") start = timer() # Translate atoms distances in minimum node distance. atmToNodeDist(numNodes, nAtoms, tmpDists, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux, nodeDists) if verbose: end = timer() print("Time for atmToNodeDist:", timedelta(seconds=end-start))
def _single_frame(self): d = self_distance_array(self._ag.positions) self.result.append(np.asarray(d, dtype=np.float32))