示例#1
0
文件: cluster.py 项目: Cloudac7/dpana
def lindemann_per_frames(u: Universe, select_lang):
    """Calculate the lindemann index for each atom AND FRAME

    Warning this can produce extremly large ndarrays in memory
    depending on the size of the cluster and the ammount of frames.

    Parameters
    ----------
    u : MDA trajectory instance.
    select_lang : select language.

    Returns
    -------
    a ndarray of shape (len_frames, natoms, natoms)
    """
    # natoms = natoms
    sele_ori = u.select_atoms(select_lang)
    natoms = len(sele_ori)
    nframes = len(u.trajectory)
    len_frames = len(u.trajectory)
    array_mean = np.zeros((natoms, natoms))
    array_var = np.zeros((natoms, natoms))
    # array_distance = np.zeros((natoms, natoms))
    iframe = 1
    lindex_array = np.zeros((len_frames, natoms, natoms))
    cluster = u.select_atoms(select_lang, updating=True)
    for q, ts in enumerate(u.trajectory):
        # print(ts)
        coords = cluster.positions
        n, p = coords.shape
        array_distance = distance.cdist(coords, coords)

        #################################################################################
        # update mean and var arrays based on Welford algorithm suggested by Donald Knuth
        #################################################################################
        for i in range(natoms):
            for j in range(i + 1, natoms):
                xn = array_distance[i, j]
                mean = array_mean[i, j]
                var = array_var[i, j]
                delta = xn - mean
                # update mean
                array_mean[i, j] = mean + delta / iframe
                # update variance
                array_var[i, j] = var + delta * (xn - array_mean[i, j])
        iframe += 1
        if iframe > nframes + 1:
            break

        for i in range(natoms):
            for j in range(i + 1, natoms):
                array_mean[j, i] = array_mean[i, j]
                array_var[j, i] = array_var[i, j]

        lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)),
                                      array_mean)
        # lindemann_indices = np.nanmean(np.sqrt(array_var/nframes)/array_mean, axis=1)
        lindex_array[q] = lindemann_indices

    return np.array([np.nanmean(i, axis=1) for i in lindex_array])
示例#2
0
def classifyResiduesIntoTwo(apo_pdb, holo_pdb, ligname, cutoff=4.0):
    S_aromatic_resname = set(['PHE', 'TRP', 'TYR', 'HIS'])
    u_holo, u_apo = Universe(holo_pdb), Universe(apo_pdb)
    ligand = u_holo.select_atoms(f'resname {ligname}')
    holo = u_holo.select_atoms(f'not resname {ligname}')
    apo = u_apo.select_atoms(f'protein')

    resids = []
    S_cryptic, S_not_cryptic = [], []
    # -- calculate distances from atoms of a ligand to those of residues in an apo state
    # -- the aim is to detect residues in a cryptic site.
    # -- if the distance is less than a threshold (i.e., CRASHED!), then the aromatic residue is considered as cryptic one.
    for iatom in ligand:
        for jatom in apo:
            distance = np.linalg.norm(iatom.position - jatom.position)

            if distance <= cutoff and jatom.resname in S_aromatic_resname:
                #print(f'{iatom.name}-{iatom.resname}, {jatom.name}-{jatom.resname}{jatom.resid}, {distance}')
                resids.append(jatom.resid)
                S_cryptic.append(f'{jatom.resname}{jatom.resid}')
    S_cryptic = set(S_cryptic)
    # -- a set of aromatic residue's names are generated here. note that this is specialised for aromatic residues
    S_all_aroma = set([
        f'{residue.resname}{residue.resid}' for residue in holo.residues
        if residue.resname in S_aromatic_resname
    ])
    S_not_cryptic = S_all_aroma - S_cryptic

    return set(S_cryptic), set(S_not_cryptic)
示例#3
0
 def test_write_selection(self):
     ref = Universe(mol2_molecule)
     gr0 = ref.select_atoms("name C*")
     gr0.write(self.outfile)
     u = Universe(self.outfile)
     gr1 = u.select_atoms("name C*")
     assert_equal(len(gr0), len(gr1))
def main():
    u = Universe(
        '/Volumes/HD-siida/gtail_b1_sys/analysis/merged_aligned_complexes.pdb')
    #u = Universe('complex_models.pdb')
    print(u.atoms.segids)
    ca_integrinAB = u.select_atoms('segid A B and name CA')
    ca_lamininE8 = u.select_atoms('segid C D E and name CA')

    lower, upper = 6.0, 10.0
    with open('model_no.out', 'w') as fout:
        fout.write(
            f'#MODEL NO, nViolations (if r<{lower}), nContacts ({lower}<=r<={upper}) \n'
        )

        for i, frame in enumerate(tqdm(u.trajectory),
                                  1):  # Note that i starts with 1.
            distances = distance.cdist(ca_integrinAB.positions,
                                       ca_lamininE8.positions,
                                       metric='euclidean')
            #nViolations = len(distances[distances<=cutoff])
            nViolations = len(distances[distances < lower])
            nContacts = len(distances[(distances <= upper)
                                      & (distances >= lower)])
            #            score = nContacts -nViolations
            if nViolations != 0:
                score = -0.59 * np.log(nContacts / nViolations)
            else:
                score = np.nan
            fout.write(f'{i}, {nViolations}, {nContacts}, {score}\n')
示例#5
0
 def test_write_selection(self):
     ref = Universe(mol2_molecule)
     gr0 = ref.select_atoms("name C*")
     gr0.write(self.outfile)
     u = Universe(self.outfile)
     gr1 = u.select_atoms("name C*")
     assert_equal(len(gr0), len(gr1))
示例#6
0
def distanceMatrix(trajFile, sele1, sele2, ref=None):

    if ref == None:
        u = Universe(trajFile)

    else:
        print("* Reference is given.")
        u = Universe(ref, trajFile)

    s1, s2 = u.select_atoms(sele1), u.select_atoms(sele2)
    print("* pair 1: \n    ", s1)
    print("* pair 2: \n    ", s2)

    distances = []
    for itraj in tqdm(u.trajectory):
        pos1, pos2 = s1.positions, s2.positions

        if sele1 == sele2:  #if self-distance pair calculation
            dist = np.triu(distance.cdist(
                pos1, pos2,
                metric='euclidean'))  #symmetrical matrix if self-distances
            dist = dist[dist != 0]  # remove 0 elements

        else:  #if differnt distance pair calculation
            dist = distance.cdist(pos1, pos2, metric='euclidean').flatten()

        distances.append(dist)
    return distances
示例#7
0
def calc_tilt_end_to_end(universe: mda.Universe,
                         resid_up,
                         resid_down,
                         fname="TMD_tilt.dat"):
    ''' Calculate tilt related to angle between zaxis and resid_down --> resid_up
        Takes COM of resids
    '''
    fstr2 = '{: <15}{: <20}'
    fstr = '{: <15}{: <20.5f}'
    with open(fname, "w") as outf:
        print(fstr2.format("time", "tilt"), file=outf)
        for t in range(universe.trajectory.n_frames):
            time = universe.trajectory[t].time
            LOGGER.info("At %s", time)
            zaxis = np.array([0, 0, 1])
            sel_u = universe.select_atoms("resid {}".format(resid_up))
            sel_d = universe.select_atoms("resid {}".format(resid_down))
            pos_u = sel_u.center_of_mass()
            pos_d = sel_d.center_of_mass()
            costilt = np.dot(
                (pos_d - pos_u), zaxis) / np.linalg.norm(pos_d - pos_u)
            angle = np.arccos(costilt) * (180 / np.pi)
            if angle > 90:
                angle -= 180
            print(fstr.format(time, abs(angle)), file=outf)
示例#8
0
def cluster_coordinates(  # TODO: rewrite the method
    nvt_run: Universe,
    select_dict: Dict[str, str],
    run_start: int,
    run_end: int,
    species: List[str],
    distance: float,
    basis_vectors: Optional[Union[List[np.ndarray], np.ndarray]] = None,
    cluster_center: str = "center",
) -> np.ndarray:
    """Calculates the average position of a cluster.

    Args:
        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        species: A list of species in the cluster.
        distance: The coordination cutoff distance.
        basis_vectors: The basis vector for normalizing the coordinates of the cluster atoms.
        cluster_center: Cluster center atom species.

    Returns:
        An array of coordinates of the cluster atoms.
    """
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    cluster_center_atom = nvt_run.select_atoms(select_dict.get(cluster_center),
                                               periodic=True)[0]
    selection = ("(" + " or ".join(s for s in species) + ") and (around " +
                 str(distance) + " index " + str(cluster_center_atom.index) +
                 ")")
    shell = nvt_run.select_atoms(selection, periodic=True)
    cluster = []
    for atom in shell:
        coord_list = []
        for ts in trj_analysis:
            coord_list.append(atom.position)
        cluster.append(np.mean(np.array(coord_list), axis=0))
    cluster_array = np.array(cluster)
    if basis_vectors:
        if len(basis_vectors) == 2:
            vec1 = basis_vectors[0]
            vec2 = basis_vectors[1]
            vec3 = np.cross(vec1, vec2)
            vec2 = np.cross(vec1, vec3)
        elif len(basis_vectors) == 3:
            vec1 = basis_vectors[0]
            vec2 = basis_vectors[1]
            vec3 = basis_vectors[2]
        else:
            raise ValueError("incorrect vector format")
        vec1 = vec1 / np.linalg.norm(vec1)
        vec2 = vec2 / np.linalg.norm(vec2)
        vec3 = vec3 / np.linalg.norm(vec3)
        basis_xyz = np.transpose([vec1, vec2, vec3])
        cluster_norm = np.linalg.solve(basis_xyz, cluster_array.T).T
        cluster_norm = cluster_norm - np.mean(cluster_norm, axis=0)
        return cluster_norm
    return cluster_array
示例#9
0
文件: util.py 项目: htz1992213/mdgo
def res_dict_from_select_dict(u: Universe,
                              select_dict: Dict[str, str]) -> Dict[str, str]:
    """
    Infer res_dict (residue selection) from select_dict (atom selection) in a MDAnalysis.universe object.

    Args:
        u: The universe object to assign resnames to.
        select_dict: A dictionary of atom species, where each atom species name is a key
                and the corresponding values are the selection language.

    return:
        A dictionary of resnames.
    """
    saved_select = []
    res_dict = {}
    for key, val in select_dict.items():
        res_select = "same resid as (" + val + ")"
        res_group = u.select_atoms(res_select)
        if key in ["cation", "anion"] or res_group not in saved_select:
            saved_select.append(res_group)
            res_dict[key] = res_select
    if ("cation" in res_dict and "anion" in res_dict and u.select_atoms(
            res_dict.get("cation")) == u.select_atoms(res_dict.get("anion"))):
        res_dict.pop("anion")
        res_dict["salt"] = res_dict.pop("cation")
    return res_dict
示例#10
0
def save_systems(flex: mda.Universe, protein: mda.Universe,
                 crystal: mda.Universe, dir: str):
    def sel(resnum, resname, segid, icode) -> str:
        s = f"(resid {resnum}{icode} and resname {resname} and segid {segid})"

        return s

    flexres = flex.select_atoms("protein").residues

    max_rmsd = -1

    residues = []
    for res in flexres:
        ressel = (sel(res.resnum, res.resname, res.segid, res.icode) +
                  " and not (type H or name H*)")

        # Select single residue
        p_res = protein.select_atoms(ressel)
        c_res = crystal.select_atoms(ressel)

        assert p_res.n_atoms == c_res.n_atoms

        pfname = os.path.join(
            dir, f"pflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb")
        cfname = os.path.join(
            dir, f"cflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb")

        # Write out PDB files
        p_res.write(pfname)
        c_res.write(cfname)

        residues.append((res.resnum, res.resname, res.segid, res.icode))

    # Check that all flexible residues are listed
    assert len(residues) == len(flexres)

    # TODO: Can be improved by using ressel
    selection = "".join([
        sel(id, name, chain, icode) + " or "
        for id, name, chain, icode in residues
    ])
    selection = selection[:-4]  # Remove final " or "

    # Remove H atoms
    # TODO: Possibly need perception for atom name, when type is not present
    selection = f"({selection}) and not (type H or name H*)"

    p_atoms = protein.select_atoms(selection)
    c_atoms = crystal.select_atoms(selection)

    # Check that the number of atoms in the two selections is equal
    assert len(p_atoms) == len(c_atoms)

    pfname = os.path.join(dir, "pflex.pdb")
    cfname = os.path.join(dir, "cflex.pdb")

    p_atoms.write(pfname)
    c_atoms.write(cfname)
示例#11
0
 def test_atomgroups(self):
     u = Universe(self.filename)
     segidB0 = len(u.select_atoms("segid B and (not altloc B)"))
     segidB1 = len(u.select_atoms("segid B and (not altloc A)"))
     assert_equal(segidB0, segidB1)
     altlocB0 = len(u.select_atoms("segid B and (altloc A)"))
     altlocB1 = len(u.select_atoms("segid B and (altloc B)"))
     assert_equal(altlocB0, altlocB1)
     sum = len(u.select_atoms("segid B"))
     assert_equal(sum, segidB0 + altlocB0)
示例#12
0
def num_of_neighbor_simple(
    nvt_run: Universe,
    center_atom: Atom,
    distance_dict: Dict[str, float],
    select_dict: Dict[str, str],
    run_start: int,
    run_end: int,
) -> Dict[str, np.ndarray]:
    """Calculates solvation structure type (1 for SSIP, 2 for CIP and 3 for AGG) with respect to the ``enter_atom``
    in the specified frame range.

    Args:
        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        center_atom: The solvation shell center atom.
        distance_dict: A dict of coordination cutoff distance of the neighbor species.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.

    Returns:
        A dict with "total" as the key and an array of the solvation structure type in the specified frame range
        as the value.
    """

    time_count = 0
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    center_selection = "same type as index " + str(center_atom.index)
    assert len(
        distance_dict
    ) == 1, "Please only specify the counter-ion species in the distance_dict"
    species = list(distance_dict.keys())[0]
    cn_values = np.zeros(int(len(trj_analysis)))
    for ts in trj_analysis:
        selection = select_shell(select_dict, distance_dict, center_atom,
                                 species)
        shell = nvt_run.select_atoms(selection, periodic=True)
        shell_len = len(shell)
        if shell_len == 0:
            cn_values[time_count] = 1
        elif shell_len == 1:
            selection_species = select_shell(center_selection, distance_dict,
                                             shell.atoms[0], species)
            shell_species = nvt_run.select_atoms(selection_species,
                                                 periodic=True)
            shell_species_len = len(shell_species) - 1
            if shell_species_len == 0:
                cn_values[time_count] = 2
            else:
                cn_values[time_count] = 3
        else:
            cn_values[time_count] = 3
        time_count += 1
    cn_values = {"total": cn_values}
    return cn_values
示例#13
0
def split_molecules(
    u: mda.Universe,
    keep_ions: bool = False
) -> Dict[str, Union[mda.AtomGroup, List[mda.AtomGroup]]]:
    """
    Split different molecules (protein, water, ligands, ...) within a structure in separate files.

    Args:
        u (mda.Universe): MDAnalysis universe
        keep_ions (bool, optional): Flag to keep/ignore ions

    Returns:
        A dictionaty with the name of the selection and the corresponding ``mda.AtomGroup``
        (or a list of ``mda.AtomGroup`` is there are multiple molecules with the same name).
    """

    split = {}

    # Select protein
    protein = u.select_atoms("protein")
    if len(protein.atoms) != 0:  # Check if protein is present
        split["protein"] = protein

    # Select water molecules
    for water_name in ["WAT", "HOH"]:
        water = u.select_atoms(f"resname {water_name}")

        if len(water.atoms) != 0:
            break  # If selection is not empty, stop
    if len(water.atoms) != 0:  # Check if water is present
        split["water"] = water

    # Other molecules
    other = u.select_atoms("all") - protein - water
    for res in other.residues:  # Loop over all "other" residues
        name = res.resname

        if re.search("[A-Z]?[+-]", name) is not None and not keep_ions:
            break  # Skip ion if keep_ions=True

        try:
            old = split[name]

            if type(old) is list:
                split[name].append(res)
            else:
                split[name] = [old, res]

        except KeyError:

            split[name] = res

    return split
示例#14
0
def select_flexres(flex: mda.Universe, prot: mda.Universe) -> mda.AtomGroup:
    """
    Given a protein and a series of flexible residues, selectss the full flexible
    residues (including backbone atoms) from the protein structure.

    Args:
        flex (mda.Universe): flexible residues
        prot (mda.Universe): protein

    Returns:
        An `mda.AtomGroup` containing the atoms corresponding to flexible residues
        extracted from the protein (including backbone atoms)
    """

    fres = []
    for res in flex.residues:
        fres.append((res.resnum, res.resname, res.icode, res.segid))

    sel = "".join(
        [
            f"(resid {num}{icode} and resname {name} and segid {chain}) or "
            for num, name, icode, chain in fres
        ]
    )

    # Sanitize selection and remove residues without Janin dihedrals
    # Ignoring them explicitly removes a warning
    sel = (
        sel[:-4]
        + "and not (resname ALA or resname CYS or resname GLY or resname PRO or resname SER or resname THR or resname VAL)"
    )

    return prot.select_atoms(sel)
示例#15
0
 def test_bonds(self):
     u = Universe(self.filename, guess_bonds=True)
     # need to force topology to load before querying individual atom bonds
     u.build_topology()
     bonds0 = u.select_atoms("segid B and (altloc A)")[0].bonds
     bonds1 = u.select_atoms("segid B and (altloc B)")[0].bonds
     assert_equal(len(bonds0), len(bonds1))
示例#16
0
def select(system: mda.Universe,
           distance: float,
           removeHs: bool = False) -> Tuple[np.ndarray, np.ndarray]:
    """
    Select binding site.

    Parameters
    ---------
    system: mda.Universe
        Protein-ligand complex
    distance: float
        Ligand-residues distance
    removeHs: bool
        Remove hydrogen atoms

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        Array of elements and array of cartesian coordinate for ligand and protein
        atoms within the binding site

    Notes
    -----
    The binding site is defined by residues with at least one atom within
    :code:`distance` from the ligand.
    """
    resselection = system.select_atoms(
        f"(byres (around {distance} (resname LIG))) or (resname LIG)")

    if removeHs:
        mask = resselection.elements != "H"
        # Elements from PDB file needs MDAnalysis@develop (see #2648)
        return resselection.elements[mask], resselection.positions[mask]
    else:
        return resselection.elements, resselection.positions
示例#17
0
 def test_bonds(self):
     u = Universe(self.filename, guess_bonds=True)
     # need to force topology to load before querying individual atom bonds
     u.build_topology()
     bonds0 = u.select_atoms("segid B and (altloc A)")[0].bonds
     bonds1 = u.select_atoms("segid B and (altloc B)")[0].bonds
     assert_equal(len(bonds0), len(bonds1))
示例#18
0
def main():
    # get options
    options = parse_options()
    psf = options.psf_file
    dcd = options.dcd_file
    chain1 = options.segid1
    chain2 = options.segid2
    selection1 = options.selection1
    selection2 = options.selection2
    co = options.cutoff
    output = options.output_file
    visu = options.pymol
    pdbvisu = options.pymol_pdb

    # use MDAnalysis to read trajectory
    u = Universe(psf, dcd)

    # get contact probability
    cp = GetContacts(u)
    contactprob, bio1, bio2 = cp.run(chain1, chain2, selection1, selection2,
                                     co)
    np.savetxt(output, contactprob, fmt='%4.2f', delimiter=" ")

    # generate pymol scripts if needed
    if visu == 'Y':
        # if no pdb file is supplied, write one from trajectory, first frame
        if pdbvisu == None:
            seleforpymol = u.select_atoms("segid %s or segid %s" %
                                          (chain1, chain2))
            seleforpymol.write('forpymol.pdb', remarks=None)
            pdbvisu = 'forpymol.pdb'
            # check pdb file format for weird encoding
            check_pdb(pdbvisu)

        pymol_contact_visu(contactprob, pdbvisu, chain1, chain2, bio1, bio2)
示例#19
0
def analyze_radgyr(u: mda.Universe) -> List[float]:
    """Extract the radius of gyration metric for each trajectory frame."""
    trajectory_radgyr = []
    atoms = u.select_atoms(STANDARD_SELECTION)
    for _ in u.trajectory:
        trajectory_radgyr.append(atoms.radius_of_gyration())

    return trajectory_radgyr
示例#20
0
文件: util.py 项目: htz1992213/mdgo
def select_dict_from_resname(u: Universe) -> Dict[str, str]:
    """
    Infer select_dict (possibly interested atom species selection) from resnames in a MDAnalysis.universe object.
    The resname must be pre-assigned already.

    Args:
        u: The universe object to work with.

    return:
        A dictionary of atom species.
    """
    select_dict: Dict[str, str] = {}
    resnames = np.unique(u.residues.resnames)
    for resname in resnames:
        if resname == "":
            continue
        residue = u.select_atoms("resname " + resname).residues[0]
        if np.isclose(residue.charge, 0,
                      atol=1e-5):  # np.sum(residue.atoms.charges)
            if len(residue.atoms.fragments) == 2:
                for i, frag in enumerate(residue.atoms.fragments):
                    charge = np.sum(frag.charges)
                    if charge > 0.001:
                        extract_atom_from_ion(True, frag, select_dict)
                    elif charge < -0.001:
                        extract_atom_from_ion(False, frag, select_dict)
                    else:
                        extract_atom_from_molecule(resname,
                                                   frag,
                                                   select_dict,
                                                   number=i + 1)
            elif len(residue.atoms.fragments) >= 2:
                cation_number = 1
                anion_number = 1
                molecule_number = 1
                for frag in residue.atoms.fragments:
                    charge = np.sum(frag.charges)
                    if charge > 0.001:
                        extract_atom_from_ion(True, frag, select_dict,
                                              cation_number)
                        cation_number += 1
                    elif charge < -0.001:
                        extract_atom_from_ion(False, frag, select_dict,
                                              anion_number)
                        anion_number += 1
                    else:
                        extract_atom_from_molecule(resname, frag, select_dict,
                                                   molecule_number)
                        molecule_number += 1
            else:
                extract_atom_from_molecule(resname, residue, select_dict)
        elif residue.charge > 0:
            extract_atom_from_ion(True, residue, select_dict)
        else:
            extract_atom_from_ion(False, residue, select_dict)
    return select_dict
    def output_pdb_w_index(self):
        #This scales sigma. The reason for this is because PDB files accepts few significant digits/
        # sigma is usually 10^2 ~ 10^3 order, so if sigma was 0.011, then the sigma value to be written would be 0.01 in the PDB. I want to avoid this. 
        scale_factor = 100.0 

        u = Universe(self.__ref)        
        #initialize the b-factor column
        u.atoms.tempfactors = 0
        for icalpha in u.atoms.select_atoms('name CA'):
            if icalpha.resname in ['PHE','TRP','TYR','HIS']:
                 key = icalpha.resname + str(icalpha.resid) + icalpha.segid.replace('SYSTEM', 'A')
                 DF    = self.cryptic_index[key][0]
                 sigma = self.cryptic_index[key][1]
                 print(key, DF, sigma)
                 if np.abs(DF) < self.__alpha:
#                     print(key, DF, sigma)
                     icalpha.tempfactor = sigma * scale_factor

        u.select_atoms('protein').write(f'index_{self.__out_suffix}.pdb')
示例#22
0
def check_contiguous_steps(
    nvt_run: Universe,
    center_atom: Atom,
    distance_dict: Dict[str, float],
    select_dict: Dict[str, str],
    run_start: int,
    run_end: int,
    checkpoints: np.ndarray,
    lag: int = 20,
) -> Dict[str, np.ndarray]:
    """Calculates the distance between the center atom and the neighbor atom
    in the checkpoint +/- lag time range.

    Args:
        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        center_atom: The center atom object.
        distance_dict: A dictionary of Cutoff distance of neighbor for each species.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        checkpoints: The frame numberings of interest to check for contiguous steps.
        lag: The range (+/- lag) of the contiguous steps. Default to 20.

    Returns:
        An array of distance between the center atom and the neighbor atoms
        in the checkpoint +/- lag time range.
    """
    coord_num: Dict[str, Union[List[List[int]], np.ndarray]] = {
        x: [[] for _ in range(lag * 2 + 1)]
        for x in distance_dict
    }
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    has = False
    for i, ts in enumerate(trj_analysis):
        log = False
        checkpoint = -1
        for j in checkpoints:
            if abs(i - j) <= lag:
                log = True
                has = True
                checkpoint = j
        if log:
            for kw in distance_dict:
                selection = select_shell(select_dict, distance_dict,
                                         center_atom, kw)
                shell = nvt_run.select_atoms(selection, periodic=True)
                coord_num[kw][i - checkpoint + lag].append(len(shell))
    one_atom_ave = {}
    if has:
        for kw in coord_num:
            np_arrays = np.array(
                [np.array(time).mean() for time in coord_num[kw]])
            one_atom_ave[kw] = np_arrays
    return one_atom_ave
示例#23
0
def _list_types(coordinates_file):

    # Check the extension
    _check_input_file(coordinates_file, extensions=[".gro"])

    # Load the system
    system = Universe(coordinates_file)

    # List the residue names
    resnames = system.select_atoms("all").resnames

    return np.unique(resnames)
示例#24
0
def analyze_sasa(u: mda.Universe) -> np.ndarray:
    """Extract SASA value for each trajectory frame."""
    atoms = u.select_atoms(STANDARD_SELECTION)
    positions = u.trajectory.timeseries(asel=atoms)

    trajectory_sasa = []
    atom_radius = list(map(get_atom_radius, atoms))
    for frame in np.swapaxes(positions, 0, 1):
        sasa = freesasa.calcCoord(frame.reshape(-1), atom_radius).totalArea()
        trajectory_sasa.append(sasa)

    return np.array(trajectory_sasa)
示例#25
0
def analyze_pca(u: mda.Universe, n_dimensions=40):
    """Fetch PCA component contribution values for a single trajectory."""
    pca_analysis = pca.PCA(u, select='backbone')
    space = pca_analysis.run()

    space_3 = space.transform(u.select_atoms('backbone'), 3)
    w = pca.cosine_content(space_3, 0)
    print(w)

    return [
        space.variance[:n_dimensions], space.cumulated_variance[:n_dimensions]
    ]
示例#26
0
def calc_neigh_corr(
    nvt_run: Universe,
    distance_dict: Dict[str, float],
    select_dict: Dict[str, str],
    time_step: float,
    run_start: int,
    run_end: int,
    center_atom: str = "cation",
) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
    """Calculates the neighbor auto-correlation function (ACF)
    of selected species around center atom.

    Args:
        nvt_run: An MDAnalysis ``Universe``.
        distance_dict:
        select_dict:
        time_step:
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        center_atom: The center atom to calculate the ACF for. Default to "cation".

    Returns:
        A tuple containing the time series, and a dict of acf of neighbor species.
    """
    # Set up times array
    times = []
    step = 0
    center_atoms = nvt_run.select_atoms(select_dict[center_atom])
    for ts in nvt_run.trajectory[run_start:run_end]:
        times.append(step * time_step)
        step += 1
    times = np.array(times)

    acf_avg = {}
    for kw in distance_dict.keys():
        acf_all = []
        for atom in tqdm(center_atoms[::]):
            distance = distance_dict.get(kw)
            assert distance is not None
            adjacency_matrix = neighbors_one_atom(
                nvt_run,
                atom,
                kw,
                select_dict,
                distance,
                run_start,
                run_end,
            )
            acfs = calc_acf(adjacency_matrix)
            for acf in acfs:
                acf_all.append(acf)
        acf_avg[kw] = np.mean(acf_all, axis=0)
    return times, acf_avg
示例#27
0
def generate_universe(topology, trajectory=None):
    print('Generating Universe...')
    if trajectory is None or trajectory == '':
        u = Universe(topology)
    else:
        u = Universe(topology, trajectory)

    x, y, z = u.dimensions[:3]
    print(f'Universe with dimensions x: {x}, y: {y}, z: {z} loaded!')
    n_waters = u.select_atoms('resname WAT').n_residues
    print(f'{n_waters} water molecules detected!')

    return u
示例#28
0
文件: cluster.py 项目: Cloudac7/dpana
def distance_to_cnt(u: Universe, selection_cluster, cluster_size):
    """For carbon nanotube included trajectories, analyze cluster atoms.

    Parameters
    ----------
    u : MDA trajectory instance.
    selection_cluster : selection_cluster:
    cluster_size : size of clusters

    Returns
    -------

    """
    distances = np.zeros((len(u.trajectory), cluster_size))
    cnt = u.select_atoms('name C', updating=True)
    pt = u.select_atoms(selection_cluster, updating=True)
    for q, ts in enumerate(u.trajectory):
        cg = cnt.center_of_geometry()
        for p, t in enumerate(pt.positions):
            dis = distance.euclidean(t, [cg[0], cg[1], t[2]])
            distances[q, p] = dis
    return distances
示例#29
0
def check_inputs(selection: list, start: int, stop: int, step: int,
                 universe: mda.Universe):
    ag_sel = selection[0]
    ag_names = selection[1]
    ag_pair = selection[2]

    # Testing names and selections
    if len(ag_sel) > len(ag_names):
        raise InputError('Not all selections are named')
    elif len(ag_sel) < len(ag_names):
        raise InputError('Too many selection names for number of selections')

    for sel in ag_sel:
        try:
            ag = universe.select_atoms(sel)
        except mda.SelectionError:
            raise InputError('Error in selection: {}'.format(sel))

    for pair in ag_pair:
        if len(pair) != 4:
            raise InputError(
                'Pairs must be a python list of string with 4 items')
        found0 = False
        found1 = False
        for name in ag_names:
            if pair[0] == name:
                found0 = True
            if pair[1] == name:
                found1 = True
        if found0 is False:
            raise InputError(
                f'{pair[0]} in {pair} group_pair_selections is not in defined in atom_group_names'
            )
        if found1 is False:
            raise InputError(
                f'{pair[1]} in {pair} group_pair_selections is not in defined in atom_group_names'
            )

        if start >= stop:
            raise InputError('Start is greater than or equal to stop')
        if step >= stop:
            raise InputError('Step is greater than or equal to stop')
        if step == 0:
            raise InputError('Step cannot be 0')

        if len(universe.trajectory) < stop:
            raise InputError(
                f'Stop exceeds length of trajectory, trajectory is {len(universe.trajectory)} frames'
            )

    print('Input Parameters Accepted')
示例#30
0
def main():
    #24.1.2020
    #they were downloaded by a certain rule via the adcanced serarch in rcsb pdb,
    #but they contains more than 3 chains, which was out of my scope.
    omited_pdbs = ['4e7u.pdb', '4e7t.pdb', '3exx.pdb', '4fka.pdb', '5ep6.pdb',
                   '4gxv.pdb', '4uqp.pdb', '3uqy.pdb', '4dg4.pdb', '4urh.pdb',
                   '6f4j.pdb', '1xd3.pdb', '3bog.pdb', '6mee.pdb', '4pj2.pdb',
                   '5bpk.pdb', '3cjs.pdb', '4c2v.pdb', '1pid.pdb', '6fu9.pdb',
                   '2oxg.pdb', '1svf.pdb', '6fc1.pdb', '1q7l.pdb', '4kn9.pdb',
                   '4b2b.pdb', '6g6k.pdb', '4m4l.pdb', '4b2c.pdb', '1ben.pdb',
                   '3tt8.pdb', '3fq9.pdb', '5nwg.pdb', '4uql.pdb', '2xkn.pdb',
                   '5nwd.pdb']

    filenames = glob.glob('./interfaces/heterodimer/*.pdb')

    whole_distances = []
    for j, file in enumerate(filenames):
        #print(j, file, file.split('/')[-1] in omited_pdbs)

        if file.split('/')[-1] in omited_pdbs:
            continue

        else:
            print(file.split('/')[-1].split('.')[0].upper()+",",end = '')
            continue
            u = Universe(file)

            nchains = len(set(u.segments.segids))
            if nchains != 2: sys.exit(f'The number of chains = {nchains}. that is out of scope for this program.')

            chain_objs = []
            for i, chain in enumerate(set(u.segments.segids)):
                chain_objs.append(u.select_atoms(f'protein and segid {chain} and name CA'))
                print(f'    *{chain}')
#               print(chain_objs[i].atoms)

            whole_distances.append(distances(chain_objs[0], chain_objs[1]))

    sys.exit('stop! you might have already done this. so i forced you to procced.')
    print(whole_distances)
    whole_distances = np.hstack(whole_distances)
    filtered_dist   = whole_distances[whole_distances<=50.0]

    with open('whole_distances.pkl','wb') as f:
        pickle.dump(whole_distances, f)

    mean = np.mean(whole_distances)
    std  = np.std(whole_distances)

    print(f'mean: {mean}, std:{std}')
示例#31
0
文件: util.py 项目: htz1992213/mdgo
def assign_resname(u: Universe, res_dict: Dict[str, str]):
    """
    Assign resnames to residues in a MDAnalysis.universe object. The function will not overwrite existing resnames.

    Args:
        u: The universe object to assign resnames to.
        res_dict: A dictionary of resnames, where each resname is a key
            and the corresponding values are the selection language.
    """
    u.add_TopologyAttr("resname")
    for key, val in res_dict.items():
        res_group = u.select_atoms(val)
        res_names = res_group.residues.resnames
        res_names[res_names == ""] = key
        res_group.residues.resnames = res_names
示例#32
0
def neighbor_distance(
    nvt_run: Universe,
    center_atom: Atom,
    run_start: int,
    run_end: int,
    species: str,
    select_dict: Dict[str, str],
    distance: float,
) -> Dict[str, np.ndarray]:
    """
    Calculates a dictionary of distances between the ``center_atom`` and neighbor atoms.

    Args:
        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        center_atom: The center atom object.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        species: The neighbor species in the select_dict.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        distance: The neighbor cutoff distance.

    Returns:
        A dictionary of distance of neighbor atoms to the ``center_atom``. The keys are atom indexes in string type .
    """
    dist_dict = {}
    time_count = 0
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    species_selection = select_dict.get(species)
    if species_selection is None:
        raise ValueError("Invalid species selection")
    for ts in trj_analysis:
        selection = ("(" + species_selection + ") and (around " +
                     str(distance) + " index " + str(center_atom.index) + ")")
        shell = nvt_run.select_atoms(selection, periodic=True)
        for atom in shell.atoms:
            if str(atom.index) not in dist_dict:
                dist_dict[str(atom.index)] = np.full(run_end - run_start,
                                                     100.0)
        time_count += 1
    time_count = 0
    for ts in trj_analysis:
        for atom_index, val in dist_dict.items():
            dist = distance_array(ts[center_atom.index], ts[int(atom_index)],
                                  ts.dimensions)
            val[time_count] = dist
        time_count += 1
    return dist_dict
示例#33
0
 def test_write_read(self):
     u = Universe(self.filename)
     u.select_atoms("all").write(self.outfile)
     u2 = Universe(self.outfile)
     assert_equal(len(u.atoms), len(u2.atoms))
示例#34
0
    args = parser.parse_args()

    if not args.static:
        header_string = "; Umbrella potential for a spherical shell cavity\n"\
        "; Name    Type          Group  Kappa   Nstar    mu    width  cutoff  outfile    nstout\n"\
        "hydshell dyn_union_sph_sh   OW  0.0     0   XXX    0.01   0.02   phiout.dat   50  \\\n"
    else:
        header_string = "; Umbrella potential for a spherical shell cavity\n"\
        "; Name    Type          Group  Kappa   Nstar    mu    width  cutoff  outfile    nstout\n"\
        "hydshell union_sph_sh   OW  0.0     0   XXX    0.01   0.02   phiout.dat   50  \\\n"        

    if args.traj is None:
        u = Universe(args.gro)

        if args.sspec is not None:
            prot_heavies = u.select_atoms(args.sspec)
        else:
            # Select peptide heavies - exclude water's and ions
            prot_heavies = u.select_atoms("not (name H* or type H or resname SOL) and not (name NA or name CL) and not (resname WAL) and not (resname DUM)")

        fout = open(args.outfile, 'w')
        fout.write(header_string)

        if args.static:
            for atm in prot_heavies:
                fout.write("{:<10.1f} {:<10.1f} {:<10.3f} {:<10.3f} {:<10.3f}\\\n".format(-0.5, args.rad/10.0, atm.pos[0]/10.0, atm.pos[1]/10.0, atm.pos[2]/10.0))
        else:
            for atm in prot_heavies:
                fout.write("{:<10.1f} {:<10.1f} {:d} \\\n".format(-0.5, args.rad/10.0, atm.index+1))

        fout.close()
At this time, I wanted to confirm if the com of s100b was canceled.

Caution: this program is specialized for s100b-CTD system.

Usage: python conform_com_cancel.py [ PDB file name ]   
"""



file_name = sys.argv[1]
print "Input file name : ", file_name

u = Universe(file_name)
f_out = open(file_name+"_comTraj.dat", "w")
print "No of snapshots: ", len(u.trajectory)

for i, ts in enumerate(u.trajectory):

    #Select the all atoms constitute s100b
    selected_atoms = u.select_atoms("resid 1-94")

    print "atom ids: ", selected_atoms.ids

    com = selected_atoms.center_of_mass()
    cog = selected_atoms.center_of_geometry()

    f_out.write(str(com[0]) + " " + str(com[1]) + " " + str(com[2]) + " \n")



示例#36
0
.. SeeAlso:: :mod:`MDAnalysis.analysis.psa`

"""

from MDAnalysis import Universe
from MDAnalysis.analysis.align import rotation_matrix
from MDAnalysis.analysis.psa import PSAnalysis

if __name__ == '__main__':

    print("Generating AdK CORE C-alpha reference coordinates and structure...")
    # Read in closed/open AdK structures; work with C-alphas only
    u_closed = Universe('structs/adk1AKE.pdb')
    u_open = Universe('structs/adk4AKE.pdb')
    ca_closed = u_closed.select_atoms('name CA')
    ca_open = u_open.select_atoms('name CA')

    # Move centers-of-mass of C-alphas of each structure's CORE domain to origin
    adkCORE_resids = "(resid 1:29 or resid 60:121 or resid 160:214)"
    u_closed.atoms.translate(-ca_closed.select_atoms(adkCORE_resids).center_of_mass())
    u_open.atoms.translate(-ca_open.select_atoms(adkCORE_resids).center_of_mass())

    # Get C-alpha CORE coordinates for each structure
    closed_ca_core_coords = ca_closed.select_atoms(adkCORE_resids).positions
    open_ca_core_coords = ca_open.select_atoms(adkCORE_resids).positions

    # Compute rotation matrix, R, that minimizes rmsd between the C-alpha COREs
    R, rmsd_value = rotation_matrix(open_ca_core_coords, closed_ca_core_coords)

    # Rotate open structure to align its C-alpha CORE to closed structure's
from MDAnalysis import Universe, collection, Timeseries
from MDAnalysis.tests.datafiles import PSF, DCD

try:
    import matplotlib

    matplotlib.use('agg')  # no interactive plotting, only save figures
    from pylab import errorbar, legend, xlabel, ylabel, savefig, clf, gca, draw

    have_matplotlib = True
except ImportError:
    have_matplotlib = False

universe = Universe(PSF, DCD)
protein = universe.select_atoms("protein")

numresidues = protein.numberOfResidues()

collection.clear()
for res in range(2, numresidues - 1):
    print "Processing residue {0:d}".format(res)
    # selection of the atoms involved for the phi for resid '%d' %res
    ## select_atoms("atom 4AKE %d C"%(res-1), "atom 4AKE %d N"%res, "atom %d 4AKE CA"%res, "atom 4AKE %d C" % res)
    phi_sel = universe.residues[res].phi_selection()

    #  selection of the atoms involved for the psi for resid '%d' %res
    psi_sel = universe.residues[res].psi_selection()

    # collect the timeseries of a dihedral
    collection.addTimeseries(Timeseries.Dihedral(phi_sel))
示例#38
0
import MDAnalysis
from MDAnalysis import Universe
from MDAnalysis.analysis.contacts import calculate_contacts
import numpy as np
import pandas as pd

ref = Universe("conf_protein.gro.bz2")
u = Universe("conf_protein.gro.bz2", "traj_protein_0.xtc")

x = len(ref.select_atoms("protein"))
selA = "not name H* and resid 72-95 and bynum {}:{}".format(1, x//2)
selB = "not name H* and resid 72-95 and bynum {}:{}".format(x//2, x)


data = calculate_contacts(ref, u, selA, selB)
df = pd.DataFrame(data, columns=["Time (ps)", "Q"])
print(df)