Пример #1
0
 def time_contact_matrix(self, num_atoms):
     """Benchmark calculation of contacts within
     a single numpy array using the default arguments
     to contact_matrix.
     """
     distances.contact_matrix(coord=self.coords_1,
                              cutoff=15.0,
                              returntype='numpy',
                              box=None)
Пример #2
0
 def time_contact_matrix_sparse(self, num_atoms):
     """Benchmark calculation of contacts within
     a single numpy array using the slower reduced
     memory implementation of contact_matrix intended
     for larger systems.
     """
     distances.contact_matrix(coord=self.coords_1,
                              cutoff=15.0,
                              returntype='sparse',
                              box=None)
Пример #3
0
def populate_hmap(hmap, universe, resid):
    '''populates empty heatmap array for a single frame
    hmap = empty numpy array of N x N
    universe = a single frame or whole trajectory
    resid = list of residue ids
    aa = amino acids of interest
    '''

    hmap_pop = hmap

    com_store = []

    for item in range(len(resid)):

        aa = universe.select_atoms("resid " + str(resid[item]))

        if aa.resnames[0] == "ASP":

            asp_com = aa.atoms[[8, 9]].center_of_mass()

            com_store.append(asp_com)

        elif aa.resnames[0] == "LYS":

            lys_com = aa.atoms[[16]].center_of_mass()

            com_store.append(lys_com)

        elif aa.resnames[0] == "GLU":

            glu_com = aa.atoms[[11, 12]].center_of_mass()

            com_store.append(glu_com)

        elif aa.resnames[0] == "ARG":

            arg_com = aa.atoms[[15, 16, 19]].center_of_mass()

            com_store.append(arg_com)

    contacts = distances.contact_matrix(np.array(com_store).astype(np.float32),
                                        cutoff=4.0, returntype="numpy", box=universe.dimensions)

    hmap_pop += contacts.astype(int)

    # for k in range(len(contacts)):
    #
    #     for l in range(len(contacts)):
    #
    #         if contacts[k, l] == True:
    #
    #             hmap_pop[k, l] = hmap[k, l] + 1

    hmap_pop = (hmap_pop / np.amax(hmap_pop)) * 100 # normalise to make a percentage

    return hmap_pop
    def _collect_contact_maps(self, positions):

        contact_map = distances.contact_matrix(positions,
                                               self._threshold,
                                               returntype="sparse")

        # Represent contact map in COO sparse format
        coo = contact_map.tocoo()
        self._rows.append(coo.row.astype("int16"))
        self._cols.append(coo.col.astype("int16"))
Пример #5
0
 def run(self, **kwargs) -> List[List[int]]:
     import networkx as nx
     box = self.get_box()
     coordinates = get_centers_by_residue(self.headgroups, box=box)
     try:
         adj = contact_matrix(coordinates, cutoff=self.cutoff, box=box,
                             returntype=self.returntype)
     except ValueError as exc:
         if sparse is None:
             warnings.warn("NxN matrix is too big. Switching to sparse "
                         "matrix method")
             adj = contact_matrix(coordinates, cutoff=cutoff, box=box,
                                 returntype="sparse")
         elif sparse is False:
             raise ValueError("NxN matrix is too big. "
                             "Use `sparse=True`") from None
         else:
             raise exc 
     graph = nx.Graph(adj)
     groups = [list(c) for c in nx.connected_components(graph)]
     return groups
Пример #6
0
    def from_universe(self, a_universe, cutoff, selection=None, index=0):
        r"""Calculate residue contact map from an MDAnalysis Universe instance

        Parameters
        ----------
        a_universe: :class:`~MDAnalysis.core.universe.Universe`
            Trajectory or single-conformation instance
        cutoff: float
            Cut-off distance defining a contact between two atoms
        selection: str
            Atomic selection for calculating interatomic contacts. All atoms
            are used if None is passed. See the
            `selections page <https://www.mdanalysis.org/docs/documentation_pages/selections.html>`_
            for atom selection syntax.

        Returns
        -------
        self: :class:`~idpflex.properties.ResidueContactMap`
            Instantiated ResidueContactMap object
        """  # noqa: E501
        if selection is None:
            self.selection = a_universe.atoms
        else:
            self.selection = a_universe.select_atoms(selection)
        n_atoms = len(self.selection)
        a_universe.trajectory[index]  # jump to frame
        cm = contact_matrix(self.selection.positions, cutoff=cutoff)
        # Cast the atomic map into a residue based map
        resids = self.selection.resids
        unique_resids = list(set(resids))
        n_res = len(unique_resids)
        self.cmap = np.full((n_res, n_res), False)
        for i in range(n_atoms - 1):
            k = unique_resids.index(resids[i])
            for j in range(i + 1, n_atoms):
                ll = unique_resids.index(resids[j])
                self.cmap[k][ll] = self.cmap[k][ll] or cm[i][j]
        # self always in contact
        for k in range(n_res):
            self.cmap[k][k] = True
        # symmetrize the contact map
        for k in range(n_res - 1):
            for ll in range(k + 1, n_res):
                self.cmap[ll][k] = self.cmap[k][ll]
        self.errors = np.zeros(self.cmap.shape)
        return self
Пример #7
0
 def calculate_hic (self,polymer_text,teq,tsample,threshold=2.5) :
     """
     Calculates the 'Hi-C' matrix of the polymer, given the polymer_text
     variable that expresses how the code should select the particles that
     belongs to the polymer. User should provide the 'teq' and 'tsample'
     variables, which express, respectively, the number of frames to exclude
     at the start of the simulation, and the frequency at which the contact
     matrix should be calculated.
     
     Optional 'threshold' parameter for the
     thresholding of the contacts.
     """
     u = self.u
     polymer = u.select_atoms (polymer_text)
     N = polymer.n_atoms
     hic = np.zeros((N,N),dtype=int)
     for ts in u.trajectory[teq::tsample] :
         hic += contact_matrix(polymer.positions,
                               cutoff=threshold,
                               box=ts.dimensions).astype(int)
     self.hic = hic
Пример #8
0
def contact_map2(system, stride):
    '''The function creates contact map (0 and 1) for for every pair of aminoacids'''
    cutoff = 5.5
    sel_heavy = 'not name H*'
    num_data_points = (len(system.trajectory) // stride) + (0 if (len(system.trajectory) % stride == 0) else 1)
    size = contact_size(system)
    contacts = np.empty((num_data_points, size), dtype=np.float)
    slices = []
    min_i = 0
    max_i = 0
    for res in system.residues:
        max_i = min_i + res.atoms.select_atoms(sel_heavy).n_atoms
        slices.append((min_i, max_i))
        min_i = max_i

    #print(slices)
    heavy_system = system.select_atoms(sel_heavy)
    index = 0
    for ts in system.trajectory[::stride]:
        if ts.frame % 100 == 0 :
            output = round(100 * ts.frame / len(system.trajectory), 1)
            print(output,"% complete",end='\r')
            sys.stdout.flush()
        contact_map = np.zeros(size, dtype=np.float)
        contact_matrix = distances.contact_matrix(heavy_system.atoms.positions, cutoff = 5.5)
        counter = 0
        for i, res1 in enumerate(slices):
            i1 = res1[0]
            i2 = res1[1]
            for j, res2 in enumerate(slices):
                if abs(i - j) > 1 and i < j:
                    j1 = res2[0]
                    j2 = res2[1]
                    if np.any(contact_matrix[i1:i2, j1:j2]):
                        contact_map[counter] = 1.
                    counter += 1
        contacts[index,:] = contact_map
        index += 1
    return contacts
Пример #9
0
def _computeTwoGroupsContacts(universe, grp1, grp2, cutoff, lifetime,
                              delta_frames, resmol, collapse):

    print("\nComputing contacts in trajectory\n%s,\nbetween group of atoms %s (%d instances) and atoms %s (%d instances), using a %f A cutoff" \
         % (universe.trajectory.filename, list(set(grp1.names)), len(grp1.names), list(set(grp2.names)), len(grp2.names), cutoff) )
    print("")

    sliced_traj = universe.trajectory[::delta_frames]
    steps = int(np.ceil(universe.trajectory.n_frames / (delta_frames * 1.0)))

    # Compute distances
    contacts = []
    contacts_series = []

    total = grp1 + grp2

    distmat = np.zeros((steps, len(grp1), len(grp2)))

    for ts in sliced_traj:
        sys.stdout.write("Computing distances in frame %d/%d   \r" %
                         (ts.frame, universe.trajectory.n_frames))
        sys.stdout.flush()
        distmat[ts.frame / delta_frames] = contact_matrix(
            coord=total.positions, cutoff=cutoff)[0:len(grp1),
                                                  len(grp1):]

    sys.stdout.write(
        "Computing distances in frame %d/%d\n" %
        (universe.trajectory.n_frames, universe.trajectory.n_frames))

    # Filtering for occupancy < lifetime
    print("Filtering out pairs with lifetime < " + str(lifetime * 100) +
          "% frames analysed...")

    occupancymat = distmat.sum(0) / (1.0 * steps)

    # Computing contacts timeseries
    contacts_series = np.zeros((steps))
    for ts in universe.trajectory[::delta_frames]:
        sys.stdout.write("Computing total contacts in frame %d/%d     \r" %
                         (ts.frame, universe.trajectory.n_frames))
        sys.stdout.flush()
        contacts_series[ts.frame / delta_frames] = np.sum(
            distmat[ts.frame / delta_frames, :, :])
    sys.stdout.write(
        "Computing total contacts in frame %d/%d     \n" %
        (universe.trajectory.n_frames, universe.trajectory.n_frames))

    times = [ts.time for ts in universe.trajectory[::delta_frames]]
    contacts_series = np.vstack((times, contacts_series)).transpose()

    # Assign pair information

    bool_occupancy = occupancymat > lifetime
    nr_kept = np.sum(bool_occupancy)

    info_contact_table = []
    if (resmol is None):
        resinmol = max(total.atoms.resids)
    else:
        resinmol = resmol

    pair_nr = 0

    for atom1 in range(len(grp1)):
        for atom2 in range(len(grp2)):
            if bool_occupancy[atom1, atom2]:

                pair_nr = pair_nr + 1
                sys.stdout.write("Assign pairs information: %d/%d     \r" %
                                 (pair_nr, nr_kept))
                sys.stdout.flush()

                tmp_pair = [ pair_nr , total.atoms.ids[int(atom1)], total.atoms.resnames[int(atom1)], total.atoms.resids[int(atom1)], \
                   total.atoms.resindices[int(atom1)] + 1, (total.atoms.resindices[int(atom1)] + 1)/resinmol + 1, \
                   total.atoms.ids[int(atom2)+len(grp1)], total.atoms.resnames[int(atom2)+len(grp1)], total.atoms.resids[int(atom2)+len(grp1)], \
                   total.atoms.resindices[int(atom2)+len(grp1)] + 1, (total.atoms.resindices[int(atom2)+len(grp1)] + 1)/resinmol + 1, \
                   occupancymat[atom1,atom2] ]

                # Excluding subsequent residues in a chain
                if (tmp_pair[5] != tmp_pair[10]
                        or tmp_pair[3] + 2 < tmp_pair[8]):

                    # If collapse is True, checking if the contact was already inserted
                    if collapse:
                        # Check if pair already exists
                        new = 1
                        for item in info_contact_table:
                            if (tmp_pair[4] == item[4]
                                    and tmp_pair[9] == item[9]):
                                new = 0
                        if new == 1:
                            info_contact_table.append(tmp_pair)
                    else:
                        info_contact_table.append(tmp_pair)

    sys.stdout.write("Assigning pairs information: %d/%d     \n" %
                     (nr_kept, nr_kept))

    print("\n \
OUTPUT [contacts_series, info_contacts_table] \n \
	contacts_series:		np.array, shape (n_frames,2) \n \
	info_contacts_table:	list, shape (nr_all_contacts, 12) \n \
		for each contact: \n \
			[0] Pair_nr, 		[1] A1_pdb_id, 		[2] A1_resname, [3] A1_resid, \n \
			[4] A1_resnumber, 	[5] A1_mol, 		[6] A2_pdb_id, 	[7] A2_resname, \n \
			[8] A2_resid, 		[9] A2_resnumber, 	[10] A2_mol, 	[11] occupational_time")

    return [contacts_series, info_contact_table]
def traj_to_dset(
    topology: PathLike,
    ref_topology: PathLike,
    traj_file: PathLike,
    save_file: Optional[PathLike] = None,
    cutoff: float = 8.0,
    selection: str = "protein and name CA",
    skip_every: int = 1,
    verbose: bool = False,
    print_every: int = 10,
):
    """
    Implementation for generating machine learning datasets
    from raw molecular dynamics trajectory data. This function
    uses MDAnalysis to load the trajectory file and given a
    custom atom selection computes contact matrices, RMSD to
    reference state, fraction of reference contacts and the
    point cloud (xyz coordinates) of each frame in the trajectory.
    Parameters
    ----------
    topology : PathLike
        Path to topology file: CHARMM/XPLOR PSF topology file,
        PDB file or Gromacs GRO file.
    ref_topology : PathLike
        Path to reference topology file for aligning trajectory:
        CHARMM/XPLOR PSF topology file, PDB file or Gromacs GRO file.
    traj_file : PathLike
        Trajectory file (in CHARMM/NAMD/LAMMPS DCD, Gromacs XTC/TRR,
        or generic. Stores coordinate information for the trajectory.
    cutoff : float
        Angstrom cutoff distance to compute contact maps.
    save_file : Optional[PathLike]
        Path to output h5 dataset file name.
    selection : str
        Selection set of atoms in the protein.
    skip_every : int
        Only colelct data every `skip_every` frames.
    verbose: bool
        If true, prints verbose output.
    print_every: int
        Prints update every `print_every` frame.
    Returns
    -------
    Tuple[List] : rmsds, fncs, rows, cols
        Lists containing data to be written to HDF5.
    """

    # start timer
    start_time = time.time()

    # Load simulation and reference structures
    sim = MDAnalysis.Universe(str(topology), str(traj_file))
    ref = MDAnalysis.Universe(str(ref_topology))

    if verbose:
        print("Traj length: ", len(sim.trajectory))

    # Align trajectory to compute accurate RMSD and point cloud
    align.AlignTraj(sim, ref, select=selection, in_memory=True).run()

    if verbose:
        print(f"Finish aligning after: {time.time() - start_time} seconds")

    # Atom selection for reference
    atoms = sim.select_atoms(selection)
    # Get amino acid sequence
    residues = [r.resname for r in atoms.residues]
    # Get atomic coordinates of reference atoms
    ref_positions = ref.select_atoms(selection).positions.copy()
    # Get box dimensions
    box = sim.atoms.dimensions
    # Get contact map of reference atoms
    ref_cm = distances.contact_matrix(ref_positions,
                                      cutoff,
                                      returntype="sparse")

    rmsds, fncs, rows, cols, vals, point_clouds = [], [], [], [], [], []

    for i, _ in enumerate(sim.trajectory[::skip_every]):

        # Point cloud positions of selected atoms in frame i
        positions = atoms.positions

        # Compute contact map of current frame (scipy lil_matrix form)
        cm = distances.contact_matrix(positions,
                                      cutoff,
                                      box=box,
                                      returntype="sparse")
        coo = cm.tocoo()
        rows.append(coo.row.astype("int16"))
        cols.append(coo.col.astype("int16"))
        # TODO: Could try to use self_distance_array to compute n*(n-1)/2 dim
        # array instead of n*n
        dists = distances.distance_array(positions, positions, box=box)
        dists = np.array(
            [dists[row, col] for row, col in zip(coo.row, coo.col)])
        vals.append(dists)

        # Compute and store fraction of native contacts
        fncs.append(fraction_of_contacts(cm, ref_cm))

        # Compute and store RMSD to reference state
        rmsds.append(
            rms.rmsd(positions, ref_positions, center=True,
                     superposition=True))

        # Store reference atoms point cloud of current frame
        point_clouds.append(positions.copy())

        if verbose:
            if i % print_every == 0:
                msg = f"Frame {i}/{len(sim.trajectory)}"
                msg += f"\trmsd: {rmsds[i]}"
                msg += f"\tfnc: {fncs[i]}"
                msg += f"\tshape: {positions.shape}"
                msg += f"\trow shape: {rows[-1].shape}"
                msg += f"\tvals shape: {vals[-1].shape}"
                print(msg)

    point_clouds = np.transpose(point_clouds, [0, 2, 1])

    if save_file:
        write_h5(save_file, rmsds, fncs, rows, cols, vals, point_clouds,
                 residues)

    if verbose:
        print(f"Duration {time.time() - start_time}s")

    return rmsds, fncs, rows, cols, point_clouds
Пример #11
0
def benchmark_mdanalysis_sparse(coord, NUMBER_EXECUTORS=1):
    start = time.time()
    #distance_array(coord, coord, box=None)
    contact_matrix(coord, returntype="sparse")
    result="ComputeDistanceMDAnalysisSparse, %d, %.2f"%(len(coord), (time.time()-start))
    return result
Пример #12
0
 def _compute_contact_map(self, positions):
     contact_map = distances.contact_matrix(positions,
                                            self._threshold,
                                            returntype="sparse")
     return contact_map