Exemplo n.º 1
0
    def process(self):

        proj = self.calphas.positions +  \
            1.5*(self.cbetas.positions-self.calphas.positions)
        mddist.self_distance_array(proj, box=self.processor.currbox,
                                        result=self.tempmat)
        self.contacts += self.tempmat
Exemplo n.º 2
0
def get_dist_mda(n=5):
    u = mda.Universe("tutorial/data/drd3_gi_pd.pdb")
    sel = u.select_atoms("name N")
    for i in range(n):
        for j in range(n):
            distances.self_distance_array(sel.positions)

    h.heap().dump("benchmarking/heaps/3_mda.out")
Exemplo n.º 3
0
    def process(self):

        proj = self.calphas.positions +  \
            1.5*(self.cbetas.positions-self.calphas.positions)
        mddist.self_distance_array(proj,
                                   box=self.processor.currbox,
                                   result=self.tempmat)
        self.contacts += self.tempmat
Exemplo n.º 4
0
def contact_maps_from_traj(pdb_file, traj_file, savefile, contact_cutoff=8.0):
    """
    Get contact map from trajectory.
    """

    mda_traj = mda.Universe(pdb_file, traj_file)
    traj_length = len(mda_traj.trajectory)
    nloops = int(
        brute(best_loop, (loop_range, ), args=(traj_length, size),
              finish=None))
    print("traj_length: %d  nloop: %d" % (traj_length, nloops))
    write_freq = nloops // 5
    ca = mda_traj.select_atoms('name CA')
    dist_shape = distances.self_distance_array(ca.positions).shape[0]

    if rank == 0:
        savefile = os.path.abspath(savefile)
        outfile = tables.open_file(savefile, 'w')
        atom = tables.Int8Atom()
        cm_table = outfile.create_earray(outfile.root,
                                         'contact_maps',
                                         atom,
                                         shape=(0, dist_shape))
        print("dist_shape ", dist_shape)
    contact_matrices = []
    # workaround mpi4py 2^32 limit on number of objects
    # and ib memory size limit
    for loop in range(nloops):
        contact_matrices_loop = []

        nframes = traj_length // (size * nloops)
        start = (rank + loop * size) * nframes
        end = (rank + 1 + loop * size) * nframes
        if loop == nloops - 1 and rank == size - 1:
            end = traj_length
        print("loop %d rank %d start %d end %d" % (loop, rank, start, end))
        for frame in mda_traj.trajectory[start:end]:
            cm_matrix = (distances.self_distance_array(ca.positions) <
                         contact_cutoff) * 1.0
            contact_matrices_loop.append(cm_matrix.astype('int8'))
        print("rank %d cm size %d" % (rank, len(contact_matrices_loop)))
        contact_matrices_loop = comm.gather(contact_matrices_loop, root=0)
        if rank == 0:
            contact_matrices.append(
                list(chain.from_iterable(contact_matrices_loop)))
            print("loop %d " % loop, len(contact_matrices_loop),
                  len(contact_matrices_loop[0]))
            if (loop + 1) % write_freq == 0:
                contact_matrices = list(chain.from_iterable(contact_matrices))
                cm_table.append(contact_matrices)
                contact_matrices = []
        comm.Barrier()
    if rank == 0:
        if len(contact_matrices) > 0:
            contact_matrices = list(chain.from_iterable(contact_matrices))
            cm_table.append(contact_matrices)
        outfile.close()
Exemplo n.º 5
0
 def time_self_distance_array(self, num_atoms):
     """Benchmark calculation of all distances
     within a single numpy array of coordinates
     using default parameters to self_distance_array.
     """
     distances.self_distance_array(reference=self.coords_1,
                                   box=None,
                                   result=None,
                                   backend='serial')
Exemplo n.º 6
0
 def time_self_distance_array_pre_allocated(self, num_atoms):
     """Benchmark calculation of all distances
     within a single numpy array of coordinates
     using self_distance_array with preallocated
     result array.
     """
     distances.self_distance_array(reference=self.coords_1,
                                   box=None,
                                   result=self.allocated_array_1D,
                                   backend='serial')
Exemplo n.º 7
0
def contact_maps_from_traj(pdb_file,
                           traj_file,
                           contact_cutoff=8.0,
                           savefile=None):
    """
    Get contact map from trajectory.
    """

    mda_traj = mda.Universe(pdb_file, traj_file)
    traj_length = len(mda_traj.trajectory)
    ca = mda_traj.select_atoms('name CA')

    if savefile:
        savefile = os.path.abspath(savefile)
        outfile = tables.open_file(savefile, 'w')
        atom = tables.Float64Atom()
        cm_table = outfile.create_earray(outfile.root,
                                         'contact_maps',
                                         atom,
                                         shape=(traj_length, 0))

    contact_matrices = []
    for frame in mda_traj.trajectory:
        cm_matrix = (distances.self_distance_array(ca.positions) <
                     contact_cutoff) * 1.0
        contact_matrices.append(cm_matrix)

    if savefile:
        cm_table.append(contact_matrices)
        outfile.close()

    return contact_matrices
Exemplo n.º 8
0
   def get_distmatrix(self,**kwargs):
      """Generates a distance matrix using the self_distance_array function
         of MDAnalysis."""

      self._dists = distanal.self_distance_array(self.coords,**kwargs) # No PBC necessary if 
                                                                       # trajectory already wrapped
      self.distmatx = self._gen_matrix()
Exemplo n.º 9
0
def getContactsC(selection, numNodes, 
                        nAtoms, 
                        cutoffDist, 
                        tmpDists, 
                        tmpDistsAtms, 
                        contactMat,
                        atomToNode,
                        nodeGroupIndicesNP,
                        nodeGroupIndicesNPAux,
                        distMode=MODE_ALL):
    '''Executes MDAnalysis atom distance calculation and node contact detection.
    
    This function is Cython compiled as a wrapper for two optimized distance calculation and contact determination calls.
    The first is MDAnalysis' `self_distance_array`. The second is the internal :py:func:`calcContactC`.
    All results are stored in pre-allocated NumPy arrays.
    
    
    Args:
        selection (str) : Atom selection for the system being analyzed.
        numNodes (int): Number of nodes in the system.
        nAtoms (int) : Number of atoms in atom groups represented by system nodes. Usually hydrogen atoms are not included in contact detection, and are not present in atom groups.
        cutoffDist (float) : Distance at which atoms are no longer considered 'in contact'.
        tmpDists (obj) : Temporary pre-allocated NumPy array with atom distances. This is the result of MDAnalysis `self_distance_array` calculation.
        tmpDistsAtms (obj) : Temporary pre-allocated NumPy array to store the shortest distance between atoms in different nodes.
        contactMat (obj) : Pre-allocated NumPy matrix where node contacts will be stored.
        atomToNode (obj) : NumPy array that maps atoms in atom groups to their respective nodes.
        nodeGroupIndicesNP (obj) : NumPy array with atom indices for all atoms in each node group.
        nodeGroupIndicesNPAux (obj) : Auxiliary NumPy array with the indices of the first atom in each atom group, as listed in `nodeGroupIndicesNP`.
    
    '''
    
    if distMode == MODE_ALL:
        # serial vs OpenMP
        mdadist.self_distance_array(selection.positions, result=tmpDists, backend='openmp')
    
    if distMode == MODE_CAPPED:
        # method options are: 'bruteforce' 'nsgrid' 'pkdtree'
        pairs, distances = mdalibdist.self_capped_distance(selection.positions, max_cutoff=cutoffDist, min_cutoff=None, box=None,
                                    method='pkdtree', return_distances=True)
        
        for k, [i, j] in enumerate(pairs):
            # Go from 2D node indices to 1D (nAtoms*(nAtoms-1)/2) indices:
            ijLI = getLinIndexC(i, j, nAtoms)
            tmpDists[ ijLI ] = distances[k]
    
    calcContactC(numNodes, nAtoms, cutoffDist, tmpDists, tmpDistsAtms, 
                 contactMat, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux)
Exemplo n.º 10
0
    def _report_contact_maps(self, simulation, state, ca_positions):
        # TODO: http://docs.h5py.org/en/stable/faq.html
        #       h5py supported integer types: 1, 2, 4 or 8 byte, BE/LE, signed/unsigned.
        #       store as 1 byte int
        contact_map = (distances.self_distance_array(ca_positions) < 8.) * 1.

        self._cm_dset.resize(self._cm_dset.shape[1] + 1, axis=0)
        self._cm_dset[:, -1] = contact_map
Exemplo n.º 11
0
 def report(self, simulation, state):
     ca_indices = []
     for atom in simulation.topology.atoms():
         if atom.name == 'CA':
             ca_indices.append(atom.index)
     positions = np.array(state.getPositions().value_in_unit(u.angstrom))
     time = int(np.round(state.getTime().value_in_unit(u.picosecond)))
     positions_ca = positions[ca_indices].astype(np.float32)
     distance_matrix = distances.self_distance_array(positions_ca)
     contact_map = contacts.contact_matrix(distance_matrix,
                                           radius=8.0) * 1.0
     self._out.create_dataset(str(time), data=contact_map)
Exemplo n.º 12
0
def get_rescontacts(protein, cutoff=2):
    n = len(protein)
    self_distances = distances.self_distance_array(protein.positions)
    sq_dist_arr = np.zeros((n, n), dtype=np.float32)
    triu = np.triu_indices_from(sq_dist_arr, k=1)
    sq_dist_arr[triu] = self_distances
    sq_dist_arr.T[triu] = self_distances
    contacts = np.where(sq_dist_arr < cutoff, 1, 0)
    df = pd.DataFrame(zip(protein.resids, contacts.sum(axis=0)),
                      columns=('resid', 'contacts'))
    rescontacts = df.groupby('resid').sum().reset_index()
    return rescontacts
Exemplo n.º 13
0
 def report(self, simulation, state):
     ca_indices = [
         atom.index for atom in simulation.topology.atoms()
         if atom.name == 'CA'
     ]
     positions = np.array(state.getPositions().value_in_unit(u.angstrom))
     positions_ca = positions[ca_indices].astype(np.float32)
     distance_matrix = distances.self_distance_array(positions_ca)
     contact_map = (distance_matrix < 8.0) * 1.0
     new_shape = (len(contact_map), self._out.shape[1] + 1)
     self._out.resize(new_shape)
     self._out[:, new_shape[1] - 1] = contact_map
     self._file.flush()
Exemplo n.º 14
0
    def run2d_frame(self, ts, *args):
        ts
        if args:
            g1_pos = args[0]
            g2_pos = args[1]
            if np.all(g1_pos == g2_pos):
                self.self_rdf = True

        else:
            g1_pos = self.g1.positions
            g2_pos = self.g2.positions

        nA = len(g1_pos)
        nB = len(g2_pos)
        N = nA * nB

        if N == 0:
            return np.zeros(len(self.bins))

        area = (ts.dimensions[0] * ts.dimensions[1]) / 100
        density = N / area

        g1_pos[:, 2] = 0.0
        g2_pos[:, 2] = 0.0

        if self.self_rdf:
            td = self_distance_array(g1_pos, box=ts.dimensions) / 10
            d = np.append(td, td)
        else:
            d = distance_array(g1_pos, g2_pos, box=ts.dimensions) / 10
        #if self.self_rdf:
        #    #np.fill_diagonal(d, self.rmax + 1)
        #    #if self.mask_array is None:
        #    nmol = int(nA/self.mask) #update mask_array as No. atoms can change with time
        #    mask_array = np.kron(np.eye(nmol, dtype=int), self.single_mask_array)
        #    d += mask_array

        count = np.histogram(d, **self.rdf_settings)[0]
        count = count.astype(np.float64)
        rdf = count / density / self.shell_area
        return rdf
Exemplo n.º 15
0
    def run_frame(self, ts, *args):
        ts
        if args:
            g1_pos = args[0]
            g2_pos = args[1]
            if np.all(g1_pos == g2_pos):
                self.self_rdf = True

        else:
            g1_pos = self.g1.positions
            g2_pos = self.g2.positions

        nA = len(g1_pos)
        nB = len(g2_pos)
        N = nA * nB

        if N == 0:
            return np.zeros(len(self.bins))

        vol = ts.volume / np.power(10, 3)
        density = N / vol

        if self.self_rdf:
            td = self_distance_array(g1_pos, box=ts.dimensions) / 10
            d = np.append(td, td)
        else:
            d = distance_array(g1_pos, g2_pos, box=ts.dimensions) / 10
        #if self.self_rdf:
        #    #np.fill_diagonal(d, self.rmax + 1)
        #    #if self.mask_array is None:
        #    nmol = int(nA/self.mask)
        #    mask_array = np.kron(np.eye(nmol, dtype=int), self.single_mask_array)
        #    d += mask_array

        count = np.histogram(d, **self.rdf_settings)[0]
        count = count.astype(np.float64)
        rdf = count / density / self.shell_vol
        return rdf
Exemplo n.º 16
0
def mda_to_nx(mda_atoms, cutoff=8):
    """
    covert a mdanalysis atom group to a graph
    """
    G = nx.Graph()
    # getting node attributes
    node_list = []
    for i, atom in enumerate(mda_atoms):
        node_list.append((i, {
            "ID": atom.id,
            "Name": atom.name,
            "Mass": atom.mass,
            "resname": atom.resname,
            "resnum": atom.resnum
        }))
    G.add_nodes_from(node_list)

    # getting edgy properties
    dist = triu_to_full(distances.self_distance_array(mda_atoms.positions))
    edges = np.where(dist < cutoff)
    edges = [(i, j, 1 / dist[i, j]) for i, j in zip(*edges)]
    G.add_weighted_edges_from(edges)

    return G
Exemplo n.º 17
0
def get_dist_mda():
    u = mda.Universe("data/two_h2o.pdb")
    i = 0
    while i < 100:
        print(distances.self_distance_array(u.trajectory.ts.positions))
        i += 1
Exemplo n.º 18
0
import MDAnalysis as md
import MDAnalysis.analysis.distances as dist
import numpy as np
import os

SCFs = os.listdir('../')
SCFs = filter(lambda x: x.startswith('SCF'),SCFs)
SCFs = filter(lambda x: x.endswith('.pdb'),SCFs)

todos = []
order = []
for pdb in SCFs:
    u = md.Universe('../'+pdb)
    distances = dist.self_distance_array(u.atoms.positions)
    todos.append(distances.max())
    order.append(pdb)

index = np.where(todos == max(todos))[0]
print('structure with max distance:',order[index])

np.save('max_distances',todos)
print('max distance',max(todos))
Exemplo n.º 19
0
# protein_ca = mda_traj.select_atoms('protein and name CA')

for pdb_file, traj_file in tqdm(zip(pdb_files, dcd_files)):
    #     mda_traj = mda.Universe(pdb_file, dcd)
    try:
        mda_traj = mda.Universe(pdb_file, traj_file)
    except OSError:
        failed += [pdb_file]
        continue
    protein_ca = mda_traj.select_atoms('protein and name CA')
    #     label = os.path.basename(os.path.dirname(pdb)).split('_')[2]
    #     label_kinds.add(label)

    for _ in mda_traj.trajectory[::10]:
        contact_map = triu_to_full(
            (distances.self_distance_array(protein_ca.positions) < 8.0) * 1)
        contact_maps.append(contact_map)
#             labels.append(len(label_kinds)-1)

print("failed MD cases: ", failed)
contact_maps = np.array(contact_maps)

# padding if odd dimension occurs in image
padding = 4
pad_f = lambda x: (0, 0) if x % padding == 0 else (0, padding - x % padding)
padding_buffer = [(0, 0)]
for x in contact_maps.shape[1:]:
    padding_buffer.append(pad_f(x))
contact_maps = np.pad(contact_maps, padding_buffer, mode='constant')
print(contact_maps.shape)
Exemplo n.º 20
0
    CA = [CA[nCA * i:nCA * (i + 1)] for i in range(nchain)]

    nframe = len(u.trajectory)

    intra = np.zeros((nframe, nCA - 1), dtype=float)
    n = np.zeros((nframe, nCA - 1), dtype=int)
    chaindist = []
    for i in range(nCA - 1, 0, -1):
        for j in range(i):
            chaindist.append(j)
    chaindist = np.array(chaindist)
    matsize = len(chaindist)

    t = 0
    for frame in u.trajectory:
        for ichain in range(nchain):
            mat = distances.self_distance_array(CA[ichain].positions,
                                                box=u.dimensions,
                                                backend='OpenMP')
            for i in range(matsize):
                intra[t, chaindist[i]] += mat[i]
                n[t, chaindist[i]] += 1
        sys.stdout.write('\rFrame %d' % t)
        t += 1

    intra = np.divide(intra, n)
    mean = np.mean(intra, axis=0)
    err = np.std(intra, axis=0)  #/np.sqrt(intra.shape[0])
    nd = range(1, len(mean) + 1)
    np.savetxt(sys.argv[3], zip(nd, mean, err), fmt=['%d', '%.6f', '%.6f'])
Exemplo n.º 21
0
def calcDistances(selection, numNodes, nAtoms, atomToNode,  cutoffDist,
                       nodeGroupIndicesNP, nodeGroupIndicesNPAux, nodeDists, backend="serial", distMode=MODE_ALL, verbose=0):
    '''Executes MDAnalysis atom distance calculation and node cartesian distance calculation.
    
    This function is a wrapper for two optimized atomic distance calculation and node distance calculation calls.
    The first is one of MDAnalysis' atom distance calculation functions (either `self_distance_array` or `self_capped_distance`). The second is the internal :py:func:`atmToNodeDist`.
    All results are stored in pre-allocated NumPy arrays.
    
    This is intended as an analysis tool to allow the comparison of network distances and cartesian distances. It is similar to :py:func:`getContactsC`, which is optimized for contact detection.
    
    Args:
        selection (str) : Atom selection for the system being analyzed.
        numNodes (int): Number of nodes in the system.
        nAtoms (int) : Number of atoms in atom groups represented by system nodes. Usually hydrogen atoms are not included in contact detection, and are not present in atom groups.
        atomToNode (obj) : NumPy array that maps atoms in atom groups to their respective nodes.
        cutoffDist (float): Distance cutoff used to capp distance calculations. 
        nodeGroupIndicesNP (obj) : NumPy array with atom indices for all atoms in each node group.
        nodeGroupIndicesNPAux (obj) : Auxiliary NumPy array with the indices of the first atom in each atom group, as listed in `nodeGroupIndicesNP`.
        nodeDists (obj) : Pre-allocated array to store cartesian distances.
        backend (str) : Controls how MDAnalysis will perform its distance calculations. Options are  `serial` and `openmp`. This option is ignored if the ditance mode is not "all".
        distMode (str): Distance calculation method. Options are 0 (for mode "all") and 1 (for mode "capped").
        verbose (int): Controls informational output.
        
    '''
    
    if verbose:
        print("There are {} nodes and {} atoms in this system.".format(numNodes, nAtoms))
    
    if distMode == MODE_ALL:
        
        if verbose:
            print("creating array with {} elements...".format(int(nAtoms*(nAtoms-1)/2)))
            start = timer()
            
        tmpDists = np.zeros( int(nAtoms*(nAtoms-1)/2), dtype=np.float64 )
        
        if verbose:
            end = timer()
            print("Time for matrix:", timedelta(seconds=end-start))
        
        if verbose:
            print("running self_distance_array...")
            start = timer()
        
        # serial vs OpenMP
        mdadist.self_distance_array(selection.positions, result=tmpDists, backend=backend)
        
        if verbose:
            end = timer()
            print("Time for contact calculation:", timedelta(seconds=end-start))
        
    if distMode == MODE_CAPPED:
        
        if verbose:
            print("creating array with {} elements...".format(int(nAtoms*(nAtoms-1)/2)))
            start = timer()
        
        tmpDists =  np.full( int(nAtoms*(nAtoms-1)/2), cutoffDist*2, dtype=float )
        
        if verbose:
            end = timer()
            print("Time for matrix:", timedelta(seconds=end-start))
        
        if verbose:
            print("running self_capped_distance...")
            start = timer()
            
        # method options are: 'bruteforce' 'nsgrid' 'pkdtree'
        pairs, distances = mdalibdist.self_capped_distance(selection.positions, max_cutoff=cutoffDist, 
                                    min_cutoff=None, box=None, method='pkdtree', return_distances=True)
        
        if verbose:
            end = timer()
            print("Time for contact calculation:", timedelta(seconds=end-start))
            
            print("Found {} pairs and {} distances".format(len(pairs), len(distances)) )
        
        if verbose:
            print("loading distances in array...")
            start = timer()
            if verbose > 1:
                startLoop = timer()
        
        for k in range(len(pairs)):
            i,j = pairs[k]
            
            if verbose > 1:
                if not k % 1000:
                    print("Loaded {} distances.".format(k))
                    print("Time for {} distances: {}".format(k, timedelta(seconds=timer()-startLoop)))
                    startLoop = timer()
                
            # Go from 2D node indices to 1D (numNodes*(numNodes-1)/2) indices:
            ijLI = getLinIndexNumba(i, j, nAtoms)
            tmpDists[ ijLI ] = distances[k]
            
        if verbose:
            end = timer()
            print("Time for loading distances:", timedelta(seconds=end-start))
            
            print("running atmToNodeDist...")
            start = timer()
    
    # Translate atoms distances in minimum node distance.
    atmToNodeDist(numNodes, nAtoms, tmpDists, atomToNode, nodeGroupIndicesNP, nodeGroupIndicesNPAux, nodeDists)
    
    if verbose:
        end = timer()
        print("Time for atmToNodeDist:", timedelta(seconds=end-start))
Exemplo n.º 22
0
 def _single_frame(self):
     d = self_distance_array(self._ag.positions)
     self.result.append(np.asarray(d, dtype=np.float32))