Exemplo n.º 1
1
def test_5():
    t = md.load(get_fn('4waters.pdb'))
    a = md.compute_dssp(t, simplified=True)
    b = md.compute_dssp(t, simplified=False)
    ref = np.array([['NA', 'NA', 'NA', 'NA']])

    np.testing.assert_array_equal(a, ref)
    np.testing.assert_array_equal(b, ref)
Exemplo n.º 2
0
def test_4():
    t = md.load_pdb(get_fn('1am7_protein.pdb'))
    a = md.compute_dssp(t, simplified=True)
    b = md.compute_dssp(t, simplified=False)
    assert len(a) == len(b)
    assert len(a[0]) == len(b[0])
    assert list(np.unique(a[0])) == ['C', 'E', 'H']
Exemplo n.º 3
0
def test_4():
    t = md.load_pdb(get_fn('1am7_protein.pdb'))
    a = md.compute_dssp(t, simplified=True)
    b = md.compute_dssp(t, simplified=False)
    assert len(a) == len(b)
    assert len(a[0]) == len(b[0])
    assert list(np.unique(a[0])) == ['C', 'E', 'H']
Exemplo n.º 4
0
def test_5():
    t = md.load(get_fn('4waters.pdb'))
    a = md.compute_dssp(t, simplified=True)
    b = md.compute_dssp(t, simplified=False)
    ref = np.array([['NA', 'NA', 'NA', 'NA']])

    np.testing.assert_array_equal(a, ref)
    np.testing.assert_array_equal(b, ref)
Exemplo n.º 5
0
def identify_long_helices(geom, min_turns=5, aa_per_turn=3.6,
                          verbose=False, plot=False):



    ss_str = _md.compute_dssp(geom)[0]
    ss_vec = _np.zeros(len(ss_str), dtype=int)
    ss_vec[ss_str == 'H'] = 1

    helices = identify_boolean_blocks_in_sequence(ss_vec,
                                                  min_turns*aa_per_turn,
                                                  aa_per_turn,
                                                  verbose=verbose,
                                                  names=["hlx","brk"])
    if plot:
        _plt.figure()
        _plt.figure(figsize=(20, 5))
        _plt.plot(ss_vec, marker='.')
        iax = _plt.gca()
        xticks = _np.arange(geom.n_residues, step=15)
        iax.set_xticks(xticks)
        iax.set_xticklabels([geom.top.residue(ii).resSeq for ii in xticks])
        #iax.set_xticklabels([ii for ii in xticks])
        for ihx in helices:
            iax.axvspan(ihx[0]-.5, ihx[-1]+.5, alpha=.25)

        return helices, _plt.gca()
    else:
        return helices
Exemplo n.º 6
0
def test_3(tmpdir):
    # 1COY gives a small error, due to a broken chain.
    pdbids = ['1GAI', '6gsv', '2AAC']
    for pdbid in pdbids:
        t = md.load_pdb('http://www.rcsb.org/pdb/files/%s.pdb' % pdbid)
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        assert_(call_dssp(tmpdir, t), md.compute_dssp(t, simplified=False)[0])
Exemplo n.º 7
0
def compute_mdtraj_order_parmeters(trajectory_file, rmsd_reference_structure=None):
    # documentation: http://mdtraj.org/1.8.0/analysis.html#
    trajectory = md.load(trajectory_file)

    return_values = []
    return_value_names = []

    if not rmsd_reference_structure == None:
        reference = md.load(rmsd_reference_structure)
        rmsd = md.rmsd(trajectory, reference)
        return_values.append(rmsd)
        return_value_names.append("RMSD")

    hydrogen_bonds = np.array([np.sum(x) for x in md.kabsch_sander(trajectory)])
    return_values.append(hydrogen_bonds)
    return_value_names.append("HBondEnergy")

    ss = md.compute_dssp(trajectory)
    shape = ss.shape
    transdict = dict(zip(list(set(list(ss.flatten()))),range(len(list(set(list(ss.flatten())))))))
    ss = np.array([transdict[x] for x in ss.flatten()]).reshape(shape).T
    return_values.append(ss)
    return_value_names.append("SecondaryStructure")

    rg = md.compute_rg(trajectory)
    return_values.append(rg)
    return_value_names.append("Rg")

    distances, residue_pairs = md.compute_contacts(trajectory, scheme='ca')
    contacts = md.geometry.squareform(distances, residue_pairs)
    return_values.append(contacts)
    return_value_names.append("Contacts")

    return dict(zip(return_value_names, return_values))
Exemplo n.º 8
0
def construct_residue_df(traj):
    residue_features = {}
    residue_features["residue_idx"] = np.arange(traj.top.n_residues)
    residue_features["dssp"] = mdtraj.compute_dssp(traj)[0].tolist()
    # (
    #     residue_features["sasa"],
    #     residue_features["sasa_relative"],
    # ) = structure_tools.protein_structure_analysis.calculate_sasa(traj).T
    residue_features[
        "phi"] = structure_tools.protein_structure_analysis.calculate_phi(traj)
    residue_features[
        "psi"] = structure_tools.protein_structure_analysis.calculate_psi(traj)
    (
        residue_features["omega_prev"],
        residue_features["omega_next"],
    ) = structure_tools.protein_structure_analysis.calculate_omega(traj).T
    residue_features[
        "ca_angles"] = structure_tools.protein_structure_analysis.calculate_backbone_angles(
            traj)
    (
        residue_features["ca_dihedral_prev"],
        residue_features["ca_dihedral_next"],
    ) = structure_tools.protein_structure_analysis.calculate_backbone_dihedrals(
        traj).T

    return pd.DataFrame(residue_features)
Exemplo n.º 9
0
def test_1():
    for fn in ['1bpi.pdb', '1vii.pdb', '4K6Q.pdb', '1am7_protein.pdb']:
        t = md.load_pdb(get_fn(fn))
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        f = lambda : assert_(call_dssp(t), md.compute_dssp(t, simplified=False)[0])
        f.description = 'test_1: %s' % fn
        yield f
Exemplo n.º 10
0
    def _computeSecondaryStructure(self):
        """Compute the secondary structure of the selected frame and
        format it for the browser
        """
        SS_MAP = {'C': 'coil', 'H': 'helix', 'E': 'sheet', 'NA': 'coil'}

        top = self.trajectory.topology
        dssp = md.compute_dssp(self.trajectory[self.frame])[0]
        result = {}

        # iterate over the (rindx, ss) pairs in enumerate(dssp),
        # and use itertools to group them into streaks by contiguous
        # chain and ss.
        keyfunc = lambda ir : (top.residue(ir[0]).chain, ir[1])
        for (chain, ss), grouper in groupby(enumerate(dssp), keyfunc):
            # rindxs is a list of residue indices in this contiguous run
            rindxs = [g[0] for g in grouper]
            for r in rindxs:
                # add entry for each atom in the residue
                for a in top.residue(r).atoms:
                    result[a.index] = {
                        'ss': SS_MAP[ss],
                        'ssbegin': (r==rindxs[0] and ss in set(['H', 'E'])),
                        'ssend': (r==rindxs[-1] and ss in set(['H', 'E']))}
        return result
Exemplo n.º 11
0
def cal_dssp_PDB(pdbfile: str, mode=3):
    if mode == 7:
        simplified = False
    elif mode == 3:
        simplified = True
    else:
        raise Exception('The mode parameter only accept 3 or 7. %s' %
                        str(mode))
    PDB = load(pdbfile)[0]
    u = Universe(pdbfile)
    chainNames = u.segments.segids
    ssp = compute_dssp(PDB, simplified)[0]
    mask = ssp == ' '
    ssp[mask] = 'L'
    chainIDs = []
    resnames = []
    resids = []
    for chain in PDB.top.chains:
        chainName = chainNames[chain.index]
        for residue in chain.residues:
            resnames.append(residue.name)
            resids.append(residue.resSeq)
            chainIDs.append(chainName)
    if len(chainIDs) != len(ssp):
        raise Exception(
            'The chainID and ssp results must be same length. %d-%d' %
            (len(chainIDs), len(ssp)))
    ResultDF = DataFrame({
        'resid': resids,
        'resname': resnames,
        'chainID': chainIDs,
        'ssp': ssp
    })
    print(ResultDF)
    return ResultDF
Exemplo n.º 12
0
def test_1():
    for fn in ['1bpi.pdb', '1vii.pdb', '4K6Q.pdb', '1am7_protein.pdb']:
        t = md.load_pdb(get_fn(fn))
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        f = lambda : assert_(call_dssp(t), md.compute_dssp(t, simplified=False)[0])
        f.description = 'test_1: %s' % fn
        yield f
Exemplo n.º 13
0
def test_3(tmpdir):
    # 1COY gives a small error, due to a broken chain.
    pdbids = ['1GAI', '6gsv', '2AAC']
    for pdbid in pdbids:
        t = md.load_pdb('http://www.rcsb.org/pdb/files/%s.pdb' % pdbid)
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        assert_(call_dssp(tmpdir, t), md.compute_dssp(t, simplified=False)[0])
Exemplo n.º 14
0
    def _computeSecondaryStructure(self):
        """Compute the secondary structure of the selected frame and
        format it for the browser
        """
        SS_MAP = {'C': 'coil', 'H': 'helix', 'E': 'sheet', 'NA': 'coil'}

        top = self.trajectory.topology
        dssp = md.compute_dssp(self.trajectory[self.frame])[0]
        result = {}

        # iterate over the (rindx, ss) pairs in enumerate(dssp),
        # and use itertools to group them into streaks by contiguous
        # chain and ss.
        keyfunc = lambda ir: (top.residue(ir[0]).chain, ir[1])
        for (chain, ss), grouper in groupby(enumerate(dssp), keyfunc):
            # rindxs is a list of residue indices in this contiguous run
            rindxs = [g[0] for g in grouper]
            for r in rindxs:
                # add entry for each atom in the residue
                for a in top.residue(r).atoms:
                    result[a.index] = {
                        'ss': SS_MAP[ss],
                        'ssbegin': (r == rindxs[0] and ss in set(['H', 'E'])),
                        'ssend': (r == rindxs[-1] and ss in set(['H', 'E']))
                    }
        return result
Exemplo n.º 15
0
def ca_rmsd_angstroms(traj, native, cut_tails=False, verbose=True):
    ''' Computes RMSD of the CA atoms in angstroms, rather than the MDTraj default of nanometers.  If 
    cut_tails is True, the secondary structure of the native will be computed first, and all 
    all residues before the first helix or sheet and all residues after the last helix or sheet will
    be excluded from the RMSD.  This is convenient to avoid computing the RMSD of unstructured tails.
    If verbose is True, the default, then the number of excluded residues on each end will be printed for
    diagnostics.  You must pass full structures, not CA structures, to this function.'''
    ca_traj = select(traj, 'name CA')
    native_dssp = md.compute_dssp(native)[0]

    first_residue = 0
    last_residue = len(native_dssp)
    if cut_tails:
        while first_residue < len(
                native_dssp) and native_dssp[first_residue] in ('C', 'NA'):
            first_residue += 1
        while last_residue >= 0 and native_dssp[last_residue - 1] in ('C',
                                                                      'NA'):
            last_residue -= 1
        if verbose:
            print(
                'RMSD excluded %i residues from N-terminus and %i residues from C-terminus, leaving %i residues'
                % (first_residue, len(native_dssp) - last_residue,
                   last_residue - first_residue))

    # convert RMSD to angstroms
    sel = 'name CA and resid %i to %i' % (first_residue, last_residue - 1)
    return 10. * md.rmsd(select(traj, sel), select(native, sel))
Exemplo n.º 16
0
def calculate_ss(pdbfilename, simplified=True):
    '''
    The DSSP assignment codes are:
        H : Alpha helix
        B : Residue in isolated beta-bridge
        E : Extended strand, participates in beta ladder
        G : 3-helix (3/10 helix)
        I : 5 helix (pi helix)
        T : hydrogen bonded turn
        S : bend
          : Loops and irregular elements

    There are two ways to simplify 8-letter DSSP codes. 
    By default, the simplified DSSP codes in mdtraj are:
        H : Helix. Either of the H, G, or I codes.
        E : Strand. Either of the E, or B codes.
        C : Coil. Either of the T, S or ' ' codes.

    Simplify DSSP codes in this way:
        H : H
        E : E
        C : all the others
    '''
    import mdtraj as md
    prot = md.load(pdbfilename)
    ss   = md.compute_dssp(prot, simplified=False)[0]
    
    if simplified == True:
        ss[np.where((ss!='H')&(ss!='E'))] = 'C'

    return ss
Exemplo n.º 17
0
def calculate_DSSP():
	filename_details = os.getcwd() + '/' + str(args.output_folder) + '/protein_properties/DSSP/DSSP.dat'
	OUTPUT_DSSP = open(filename_details, 'w')
	dssp = mdt.compute_dssp(traj_mdt.atom_slice(atom_indices=traj.topology.select('protein')), simplified=True)
	dssp_over_time = dssp.T
	np.savetxt(OUTPUT_DSSP, (dssp_over_time), delimiter=" ")
	OUTPUT_DSSP.close()
	print "DSSP calculation complete"
Exemplo n.º 18
0
def test_6():
    t = md.load(get_fn('alanine-dipeptide-explicit.pdb'))
    a = md.compute_dssp(t, simplified=True)
    protein_residues = np.array([
        set(a.name for a in r.atoms).issuperset(('C', 'N', 'O', 'CA'))
        for r in t.topology.residues
    ])
    assert np.unique(a[:, protein_residues]) == "C"
    assert np.unique(a[:, np.logical_not(protein_residues)]) == 'NA'
Exemplo n.º 19
0
 def _superpose(self):
     """
     Superpose structured C-alphas
     """
     self.dssp = mdtraj.compute_dssp(self.traj[0])[0]
     structured_resis_bool = (self.dssp == 'H') + (self.dssp == 'E')
     alpha_indices = self.traj.topology.select_atom_indices('alpha')
     structured_alpha_indices = np.array([
         alpha_indices[x] for x in range(self.traj.n_residues) if structured_resis_bool[x]
     ])
     self.traj.superpose(reference=self.traj, frame=0, atom_indices=structured_alpha_indices)
Exemplo n.º 20
0
def helicity(traj,peptide_chain):
     print( '.... computing helicity ....')
     dssp=mdtraj.compute_dssp(traj, simplified=True)
     residues=[residue.index for residue in traj.topology.chain(peptide_chain).residues ]

     unique, counts = np.unique(dssp[:,residues[0]:residues[-1]], return_counts=True)
     hel = dict(zip(unique, counts)).get('H',0)/np.sum(counts)*10
     print('helicity ' + str(hel))

     print( '.... computing helicity .... DONE')
     return hel
Exemplo n.º 21
0
    def write_simulation_files(self, ref_traj_aa, topfilename, seqfilename, ssbias=False):

        self.generate_topology()

        if hasattr(self.model, "ref_traj"):
            traj = self.model.ref_traj
        elif hasattr(self.model, "starting_traj"):
            traj = self.model.starting_traj
        else:
            raise AttributeError("need to set intial conditions (ref_traj or starting_traj) to write")

        fasta = traj.top.to_fasta()

        with open("{}".format(seqfilename),"w") as fout:
            for line in fasta:
                fout.write("{}\n".format(line))

        with open("charge_on_residues.dat", "w") as fout:
            fout.write("{:d}\n".format(len(self.model.mapping._charged_residues)))
            for res_idx, charge in self.model.mapping._charged_residues:
                fout.write("{:6d}   {:8.4f}\n".format(res_idx, charge))

        # compute secondary structure from a reference structure
        dssp = ("".join(md.compute_dssp(ref_traj_aa)[0])).replace("C","-")
        assert len(dssp) == sum([ len(x) for x in fasta ]), "Number of residues in reference different than expected"
        with open("ssweight", "w") as fout:
            for ss in dssp:
                if ssbias:
                    if ss == "H": 
                        helix = 1.
                        sheet = 0.
                    elif ss == "E":
                        helix = 0.
                        sheet = 1.
                    else:
                        helix = 0.
                        sheet = 0.
                else:
                    helix = 0.
                    sheet = 0.
                fout.write("{:.1f} {:.1f}\n".format(helix, sheet))

        with open("jpred", "w") as fout:
            start = 0
            for i in range(len(fasta)):
                chain_length = len(fasta[i]) 
                fout.write("{}\n".format(fasta[i]))
                fout.write("{}\n".format(dssp[start:start+chain_length]))
                start += chain_length


        with open("{}".format(topfilename),"w") as fout:
            fout.write(self.topfile)
Exemplo n.º 22
0
    def Helicity(self):
        for seedi in range(len(self.FNSeeds)):
            dssps = md.compute_dssp(self.trajectories[seedi], simplified=True)
            self.helicities1[seedi] = np.zeros((len(dssps)))

            #for i in range(len(dssps)):
            #	print(dssps[i])

            for i in range(len(dssps)):
                H = 0
                for j in range(len(dssps[i])):
                    if dssps[i][j] == "H": H += 1
                self.helicities1[seedi][i] = H / len(dssps[i])
Exemplo n.º 23
0
 def _superpose(self):
     """
     Superpose structured C-alphas
     """
     self.dssp = mdtraj.compute_dssp(self.traj[0])[0]
     structured_resis_bool = (self.dssp == 'H') + (self.dssp == 'E')
     alpha_indices = self.traj.topology.select_atom_indices('alpha')
     structured_alpha_indices = np.array([
         alpha_indices[x] for x in range(self.traj.n_residues)
         if structured_resis_bool[x]
     ])
     self.traj.superpose(reference=self.traj,
                         frame=0,
                         atom_indices=structured_alpha_indices)
Exemplo n.º 24
0
def do_dssp(traj, simplified=True):
    structure = pd.DataFrame(
        md.compute_dssp(traj, simplified=simplified),
        index=traj.time,
    )

    code_set = frozenset(
        frozenset(structure[col].unique()) for col in structure.columns)
    structure_codes = list({code for codes in code_set for code in codes})

    structure_frxn = pd.DataFrame(index=traj.time, columns=structure_codes)
    for code in structure_frxn.columns:
        structure_frxn[code] = (structure == code).mean(axis=1)
    return structure_frxn
Exemplo n.º 25
0
    def compute_secondary(self, frame):
        dssp = md.compute_dssp(frame, simplified=True)[0]
        helices, sheets = [], []

        for k, g in groupby(enumerate(dssp), operator.itemgetter(1)):
            indices, keys = list(zip(*g))
            start_residue = indices[0]
            end_residue = indices[-1]
            run = [CHAIN_NAMES[self.top.topology.residue(start_residue).chain.index],
                   start_residue,
                   CHAIN_NAMES[self.top.topology.residue(end_residue).chain.index],
                   end_residue]
            if k == 'H':
                helices.append(run)
            elif k == 'E':
                sheets.append(run)
        return helices, sheets
Exemplo n.º 26
0
def cal_dssp_traj(trajfile: str, topfile: str, mode=3):
    if mode == 7:
        simplified = False
        SStype = ['H', 'B', 'E', 'G', 'I', 'T', 'S', 'L']
        comment = comment7
    elif mode == 3:
        simplified = True
        SStype = ['H', 'E', 'C']
        comment = commet3
    else:
        raise Exception('The mode parameter only accept 3 or 7. %s' %
                        str(mode))
    top = load(topfile)[0]
    u = Universe(topfile)
    chainNames = u.segments.segids
    chainIDs = []
    resnames = []
    resids = []
    for chain in top.top.chains:
        chainName = chainNames[chain.index]
        for residue in chain.residues:
            resnames.append(residue.name)
            resids.append(residue.resSeq)
            chainIDs.append(chainName)
    traj = load(trajfile, top=topfile)
    ssp = compute_dssp(traj, simplified=simplified)
    mask = ssp == ' '
    ssp[mask] == 'L'
    if len(chainIDs) != len(ssp[0]):
        raise Exception(
            'The chainID and ssp results must be same length. %d-%d' %
            (len(chainIDs), len(ssp[0])))
    ResultDict = {
        'resid': resids,
        'resname': resnames,
        'chainID': chainIDs,
    }
    residueSize = ssp.size
    for ss in SStype:
        ResultDict[ss] = sum(ssp == ss, axis=0)
        comment.append("# %s: %.4f" % (ss, sum(ssp == ss) / residueSize))
    ResultDF = DataFrame(ResultDict)
    print("\n".join(comment))
    print(ResultDF)
    return ResultDict, ssp
Exemplo n.º 27
0
def run_DSSP_analysis_OLD(CP, outdir):

    dssp_data = md.compute_dssp(CP.traj)
    C_vector = []
    E_vector = []
    H_vector = []

    n_residues = CP.n_residues
    n_frames = CP.n_frames

    for i in range(1, n_residues - 1):
        C_vector.append(float(sum(dssp_data.transpose()[i] == 'C')) / n_frames)
        E_vector.append(float(sum(dssp_data.transpose()[i] == 'E')) / n_frames)
        H_vector.append(float(sum(dssp_data.transpose()[i] == 'H')) / n_frames)

    np.savetxt('%s/DSSP_H.csv' % (outdir), np.array(H_vector), delimiter=', ')
    np.savetxt('%s/DSSP_E.csv' % (outdir), np.array(E_vector), delimiter=', ')
    np.savetxt('%s/DSSP_C.csv' % (outdir), np.array(C_vector), delimiter=', ')
Exemplo n.º 28
0
def topology_mdtraj(traj):
    '''Generate topology spec for the MolecularViewer from mdtraj.

    :param mdtraj.Trajectory traj: the trajectory
    :return: A chemview-compatible dictionary corresponding to the topology defined in mdtraj.

    '''
    import mdtraj as md

    top = {}
    top['atom_types'] = [a.element.symbol for a in traj.topology.atoms]
    top['atom_names'] = [a.name for a in traj.topology.atoms]
    top['bonds'] = [(a.index, b.index) for a, b in traj.topology.bonds]
    top['secondary_structure'] = md.compute_dssp(traj[0])[0]
    top['residue_types'] = [r.name for r in traj.topology.residues ]
    top['residue_indices'] = [ [a.index for a in r.atoms] for r in traj.topology.residues ]

    return top
Exemplo n.º 29
0
def structure_contact_fraction(traj,
                               selection,
                               selection2=None,
                               cutoff=0.4,
                               simplified=True):
    # calculate structure of each residue in each frame
    structure = pd.DataFrame(
        md.compute_dssp(traj, simplified),
        index=traj.time,
    )
    # get unique structure codes
    code_set = frozenset(
        frozenset(structure[col].unique()) for col in structure.columns)
    structure_codes = list({code for codes in code_set for code in codes})

    # calculate contacts
    contacts, atom_pairs = calculate_contacts(traj, selection, selection2,
                                              cutoff)
    # residue-wise salt bridge + structure
    contacts_mask = pd.DataFrame(
        np.zeros_like(structure, dtype=bool),
        index=traj.time,
    )
    # for each frame, set True where salt bridge occurs
    for t in contacts.index:
        # TODO: figure out how to slice mdtraj topology and vectorize
        for atom_idx in atom_pairs[np.where(contacts.loc[t, :])]:
            for atom in atom_idx:
                res_id = traj.top.atom(atom).residue.index
                contacts_mask.loc[t, res_id] = True

    #
    structure_sb_frxn = pd.DataFrame(index=traj.time, columns=structure_codes)
    for code in structure_sb_frxn.columns:
        structure_mask = (structure == code)
        structure_and_sb = (structure_mask * contacts_mask).sum(axis=1)

        # filter to only helical frames and reweight fraction
        nonzero_frames = structure_mask.sum(axis=1).nonzero()
        structure_sb_frxn[code] = (
            structure_and_sb.iloc[nonzero_frames] /
            structure_mask.sum(axis=1).iloc[nonzero_frames])

    return structure_sb_frxn
Exemplo n.º 30
0
    def compute_secondary(self, frame):
        dssp = md.compute_dssp(frame, simplified=True)[0]
        helices, sheets = [], []

        for k, g in groupby(enumerate(dssp), operator.itemgetter(1)):
            indices, keys = list(zip(*g))
            start_residue = indices[0]
            end_residue = indices[-1]
            run = [
                CHAIN_NAMES[self.top.topology.residue(
                    start_residue).chain.index], start_residue, CHAIN_NAMES[
                        self.top.topology.residue(end_residue).chain.index],
                end_residue
            ]
            if k == 'H':
                helices.append(run)
            elif k == 'E':
                sheets.append(run)
        return helices, sheets
Exemplo n.º 31
0
def calc_dssp(chimera: Chimera = None, filename: str = None, simplified: bool = True):
    """
    Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments.
    This funcion uses the MDtraj compute_dssp implementation as a basis.
    :param chimera: A Chimera object.
    :param filename: path to a pdb file
    :param simplified: Use the simplified 3-category assignment scheme. Otherwise the original 8-category scheme is used.
    :return: assignments np.ndarray. The secondary structure assignment for each residue
    """
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if chimera:
        filename = "/tmp/structure.pdb"
        chimera.write(filename)
    structure = md.load(filename)
    dssp = md.compute_dssp(structure, simplified=simplified)
    return dssp
Exemplo n.º 32
0
    def protein_calcs(self, struc):
        """
		Run calculations specified in self.calcs. Before running calculation, check
		to make sure it wasn't already done. If it was done before, load the data.

		"""
        coors = struc.xyz[0]
        CA_coors = struc.atom_slice(struc.topology.select('name CA'))[0].xyz[0]
        self.nres = struc.n_residues

        if 'Gyr' in self.calcs:
            L = sa_core.gyration_tensor(coors)
        if 'Rg' in self.calcs:
            #self.Rg.append(md.compute_rg(struc)[0])
            self.Rg.append(sa_core.compute_Rg(L))
        if 'Asph' in self.calcs:
            self.Asph.append(sa_core.compute_Asph(L))
        if 'EED' in self.calcs:
            self.EED.append(np.linalg.norm(CA_coors[0] - CA_coors[-1]))
        if 'SASA' in self.calcs:
            SASA = md.shrake_rupley(struc)
            self.SASA.append(SASA.sum(axis=1)[0])
        if 'cmaps' in self.calcs:
            dist = sa_core.contact_maps(CA_coors)
            self.cmaps.append(dist)
        if 'gcmaps' in self.calcs:
            self.gcmaps.append(sa_core.gremlin_contact_maps(dist))
        if 'SS' in self.calcs:
            self.SS.append(md.compute_dssp(struc))
        if 'flory' in self.calcs:
            self.fex.append(polymer.compute_flory(struc, self.nres))
        if 'rama' in self.calcs:
            self.dihedrals.append(rama.compute_phipsi(struc))
        if 'surface_contacts' in self.calcs:
            #self.resnames = [struc.atom_slice(struc.topology.select('name CA')).topology.atom(r).residue.name for r in range(self.nres)]
            # above was replaced by self.seq
            self.scmaps.append(sa_core.surface_contacts(struc, SASA))
        return None
Exemplo n.º 33
0
    def test_dssp_allresidues(self):
        from numpy.testing import assert_array_equal

        def update_mdtraj_dssp(mdata):
            for idx, elm in enumerate(mdata):
                if elm == 'NA':
                    mdata[idx] = 'C'
            return mdata

        trajlist = []
        trajlist.append(pt.iterload('data/DPDP.nc', 'data/DPDP.parm7'))
        trajlist.append(pt.iterload('data/tz2.ortho.nc',
                                    'data/tz2.ortho.parm7'))

        pt.io.download_PDB('1l2y', './output/', overwrite=True)
        trajlist.append(pt.iterload('output/1l2y.pdb'))

        for traj in trajlist:
            data = pt.dssp_allresidues(traj, simplified=True)[0]

            mtraj = md.load(traj.filename, top=traj.top.filename)
            mdata = md.compute_dssp(mtraj, simplified=True)[0]
            mdata = update_mdtraj_dssp(mdata)
            assert_array_equal(data, mdata)
Exemplo n.º 34
0
    def test_dssp_allresidues(self):
        from numpy.testing import assert_array_equal

        def update_mdtraj_dssp(mdata):
            for idx, elm in enumerate(mdata):
                if elm == 'NA':
                    mdata[idx] = 'C'
            return mdata

        trajlist = []
        trajlist.append(pt.iterload('data/DPDP.nc', 'data/DPDP.parm7'))
        trajlist.append(
            pt.iterload('data/tz2.ortho.nc', 'data/tz2.ortho.parm7'))

        pt.io.download_PDB('1l2y', './output/', overwrite=True)
        trajlist.append(pt.iterload('output/1l2y.pdb'))

        for traj in trajlist:
            data = pt.dssp_allresidues(traj, simplified=True)[0]

            mtraj = md.load(traj.filename, top=traj.top.filename)
            mdata = md.compute_dssp(mtraj, simplified=True)[0]
            mdata = update_mdtraj_dssp(mdata)
            assert_array_equal(data, mdata)
Exemplo n.º 35
0
    def write_simulation_files(self,
                               ref_traj_aa,
                               topfilename,
                               seqfilename,
                               ssbias=False):

        self.generate_topology()

        if hasattr(self.model, "ref_traj"):
            traj = self.model.ref_traj
        elif hasattr(self.model, "starting_traj"):
            traj = self.model.starting_traj
        else:
            raise AttributeError(
                "need to set intial conditions (ref_traj or starting_traj) to write"
            )

        fasta = traj.top.to_fasta()

        with open("{}".format(seqfilename), "w") as fout:
            for line in fasta:
                fout.write("{}\n".format(line))

        with open("charge_on_residues.dat", "w") as fout:
            fout.write("{:d}\n".format(
                len(self.model.mapping._charged_residues)))
            for res_idx, charge in self.model.mapping._charged_residues:
                fout.write("{:6d}   {:8.4f}\n".format(res_idx, charge))

        # compute secondary structure from a reference structure
        dssp = ("".join(md.compute_dssp(ref_traj_aa)[0])).replace("C", "-")
        assert len(dssp) == sum([
            len(x) for x in fasta
        ]), "Number of residues in reference different than expected"
        with open("ssweight", "w") as fout:
            for ss in dssp:
                if ssbias:
                    if ss == "H":
                        helix = 1.
                        sheet = 0.
                    elif ss == "E":
                        helix = 0.
                        sheet = 1.
                    else:
                        helix = 0.
                        sheet = 0.
                else:
                    helix = 0.
                    sheet = 0.
                fout.write("{:.1f} {:.1f}\n".format(helix, sheet))

        with open("jpred", "w") as fout:
            start = 0
            for i in range(len(fasta)):
                chain_length = len(fasta[i])
                fout.write("{}\n".format(fasta[i]))
                fout.write("{}\n".format(dssp[start:start + chain_length]))
                start += chain_length

        with open("{}".format(topfilename), "w") as fout:
            fout.write(self.topfile)
Exemplo n.º 36
0
def mktraj(targetid, ensembler_stage=None, traj_filepath=None, topol_filepath=None, models_data_filepath=None, process_only_these_templates=None):
    """Makes a trajectory for a given target, using mdtraj. The trajectory can be used with other
    software, e.g. for visualization with PyMOL or VMD.

    Parameters
    ----------
    targetid : str
        e.g. 'EGFR_HUMAN_D0'
    ensembler_stage : str
        The Ensembler stage from which to build models, e.g. 'build_models' results in a trajectory
        built from the 'model.pdb.gz' files output by the build_models command.
        options: build_models|refine_implicit_md|refine_explicit_md
        default: most advanced stage for which model files are available
    traj_filepath : str
        default: models/[targetid]/traj-[ensembler_stage].xtc
    topol_filepath : str
        default: models/[targetid]/traj-[ensembler_stage]-topol.pdb
    models_data_filepath :
        default: models/[targetid]/traj-[ensembler_stage]-data.csv
    process_only_these_templates : list of str

    Returns
    -------
    traj : mdtraj.Trajectory
    df : pandas.DataFrame
        models data (e.g. sequence identities):
    """
    ensembler.core.check_project_toplevel_dir()
    models_target_dir = os.path.join(ensembler.core.default_project_dirnames.models, targetid)

    logger.debug('Working on target %s' % targetid)

    if ensembler_stage is None:
        for stagename in ['refine_explicit_md', 'refine_implicit_md', 'build_models']:
            if check_ensembler_modeling_stage_complete(stagename, targetid):
                ensembler_stage = stagename
                break

    if ensembler_stage is None:
        raise Exception('Models have not yet been built for this Ensembler project.')

    if traj_filepath is None:
        traj_filepath = os.path.join(models_target_dir, 'traj-{0}.xtc'.format(ensembler_stage))
    if topol_filepath is None:
        topol_filepath = os.path.join(models_target_dir, 'traj-{0}-topol.pdb'.format(ensembler_stage))
    if models_data_filepath is None:
        models_data_filepath = os.path.join(models_target_dir, 'traj-{0}-data.csv'.format(ensembler_stage))

    if process_only_these_templates:
        templateids = process_only_these_templates
    else:
        dirs = os.walk(models_target_dir).next()[1]
        templateids = [dir for dir in dirs if '_D' in dir]

    model_filename = ensembler.core.model_filenames_by_ensembler_stage[ensembler_stage]
    valid_model_templateids = [templateid for templateid in templateids if os.path.exists(os.path.join(models_target_dir, templateid, model_filename))]
    valid_model_filepaths = [os.path.join(models_target_dir, templateid, model_filename) for templateid in valid_model_templateids]

    seqid_filepaths = [os.path.join(models_target_dir, templateid, 'sequence-identity.txt') for templateid in valid_model_templateids]
    seqids = [float(open(seqid_filepath).read().strip()) if os.path.exists(seqid_filepath) else None for seqid_filepath in seqid_filepaths]

    df = pd.DataFrame({
        'templateid': valid_model_templateids,
        'model_filepath': valid_model_filepaths,
        'seqid': seqids,
    })
    df.sort(columns='seqid', inplace=True, ascending=False)
    df.reset_index(drop=True, inplace=True)

    df.to_csv(models_data_filepath, columns=['templateid', 'seqid'])

    # construct traj
    traj = mdtraj.load_pdb(df.model_filepath[0])
    for model_filepath in df.model_filepath[1:]:
        traj += mdtraj.load_pdb(model_filepath)

    # superpose structured C-alphas
    dssp = mdtraj.compute_dssp(traj[0])[0]
    structured_resis_bool = (dssp == 'H') + (dssp == 'E')
    alpha_indices = traj.topology.select_atom_indices('alpha')
    structured_alpha_indices = np.array([alpha_indices[x] for x in range(traj.n_residues) if structured_resis_bool[x]])
    traj.superpose(reference=traj, frame=0, atom_indices=structured_alpha_indices)

    # write traj, and write first frame as pdb file
    traj[0].save(topol_filepath)
    traj.save(traj_filepath)
    return traj, df
Exemplo n.º 37
0
def test_2(get_fn, tmpdir):
    t = md.load(get_fn('2EQQ.pdb'))
    for i in range(len(t)):
        assert_(call_dssp(tmpdir, t[i]), md.compute_dssp(t[i], simplified=False)[0])
Exemplo n.º 38
0
"""
Created on Fri Jul 29 16:10:31 2016

@author: hliu
"""

import mdtraj as md
import pandas as pd
import numpy as np
from researchcode.plotting.plot_set import *
import glob
import os
import matplotlib as mpl
from matplotlib.ticker import FuncFormatter

struct_funct = {'ss': lambda x: md.compute_dssp(x),
                'rg': lambda x: md.compute_rg(x),
                'heli': lambda x: calSSPercent(x, 'H'),
                'beta': lambda x: calSSPercent(x, 'E'),
                #'rmsd': lambda x: rmsds[x.name]
               }


def addProperty2Traj(traj, props):
    for key in props:
        if not hasattr(traj, key):
            setattr(traj, key, props[key](traj))
        else:
            continue

Exemplo n.º 39
0
def test_7():
    t = md.load(get_fn('2EQQ.pdb'))
    a = md.compute_dssp(t, simplified=True)
Exemplo n.º 40
0
import matplotlib
matplotlib.use("AGG")
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pylab

maxrep=4
trajpath='/storage/mi/pycon/TTApplications/ALA10TT2/Results/'
for r in range(maxrep):
    print 'Using rep '+str(r)
    if os.path.exists(trajpath+'EigenfunctionFramesCenter'+str(r)+'.xtc')==True:
        print "file exists"
        Traj=md.load(trajpath+'EigenfunctionFramesCenter'+str(r)+'.xtc', top=trajpath+'md_production0_noWater.pdb')
        trajlen=len(Traj)
        Dssp=md.compute_dssp(Traj,simplified=False)
        # Plot as gromacs dssp
        fig1 = plt.figure(1)
        ax=fig1.add_subplot(111)  
        skip=1
        for t in range(trajlen/skip):
            for m in range(1,9):
                if Dssp[skip*t,m]=='H':
                    colore='b'
                elif Dssp[skip*t,m]=='B':
                    colore='k'
                elif Dssp[skip*t,m]=='E':
                    colore='r'
                elif Dssp[skip*t,m]=='G':
                    colore='gray'
                elif Dssp[skip*t,m]=='I':
Exemplo n.º 41
0
def calc_ss(traj):
    return md.compute_dssp(traj)
Exemplo n.º 42
0
def get_SS(SS, top=None):
    r"""
    Try to guess what type of input for secondary-structre computation the user wants, and compute it
    Parameters
    ----------
    SS : secondary structure information
        Can be many things:
        * triple of ints (CP_idx, traj_idx, frame_idx)
          Nothing happens, the tuple is returned as
          is and handled externally by the :obj:`ContactGroup`
          that called this method.
          Tuple representing a ContactPair, trajectory
          See the docs there for more info
        * True
          same as [0,0,0]
        * None or False
          Do nothing
        * :obj:`mdtraj.Trajectory`
          Use this geometry to compute the SS
        * string
          Path to a filename, of which only
          the first frame will be read. The
          SS will be computed from there.
          The file will be tried to read
          first witouth topology information
          (e.g. .pdb, .gro, .h5) will work,
          and when this fails, self.top
          will be passed (e.g. .xtc, .dcd)
        * array_like
          Use the SS from here, s.t.ss_inf[idx]
          gives the SS-info for the residue
          with that idx
    top : :obj:`~mdtraj.Topology`, default is None

    Returns
    -------
    from_tuple : bool
        Whether the infor should be gotten from
        a tuple or not
    ss_array : np.ndarray or None
    """
    from_tuple = False
    ss_array = None
    if SS is None or (isinstance(SS, bool) and not SS):
        pass
    elif isinstance(SS, _md.Trajectory):
        ss_array = _md.compute_dssp(SS[0], simplified=True)[0]
    elif isinstance(SS, str):
        try:
            ss_array = _md.compute_dssp(_md.load(SS, frame=0),
                                        simplified=True)[0]
        except ValueError as e:
            ss_array = _md.compute_dssp(_md.load(SS, top=top, frame=0),
                                        simplified=True)[0]
    elif SS is True:
        from_tuple = (0, 0, 0)
    elif len(SS) == 3:
        from_tuple = SS
    else:
        ss_array = SS

    return from_tuple, ss_array
Exemplo n.º 43
0
        chain = entry['chain']

        # get chains - FML
        # IGNORE FOR NOW - USING SHIFT
        mmp = app.PDBFile(pdb)
        if chain == '_':
            chain = list(mmp.topology.residues())[0].chain.id[0]
        chain_names = []
        for c in mmp.topology.chains():
            chain_names.append(c.id[0])

        chain_idx = chain_names.index(chain)

        p = md.load_pdb(pdb)

        dssp = md.compute_dssp(p)
        residues = list(p.topology.residues)
        for i in range(dssp.shape[0]):
            for j in range(dssp.shape[1]):
                fid = i
                rid = residues[j].resSeq
                if residues[j].chain.index == chain_idx:
                    rinfo.write(
                        f'{entry["pdb_file"].split("/")[-1]} {chain} {residues[j].name} {index} {fid} {rid} {dssp[i, j]}\n'
                    )
                    key = residues[j].name + '-' + dssp[i, j]
                    if key not in combos:
                        combos[key] = 0
                    combos[key] += 1
        pbar.set_description(f'Processed PDB {pdb} Total Records: {index}')
        rinfo.flush()
Exemplo n.º 44
0
def test_2():
    t = md.load(get_fn('2EQQ.pdb'))
    for i in range(len(t)):
        yield lambda: assert_(call_dssp(t[i]), md.compute_dssp(t[i], simplified=False)[0])
Exemplo n.º 45
0
plt.clf()

rmsd = md.rmsd(longest_traj,longest_traj,frame=0)
plt.plot(time, rmsd)
plt.xlabel('time (ns)')
plt.ylabel('RMSD(nm)')
plt.title('RMSD');

plt.tight_layout()
plt.savefig('rmsd.png')

### SECONDARY STRUCTURE PLOT

plt.clf()

dssp = md.compute_dssp(longest_traj)
dssp_counts = []
for d in dssp:
    unique, counts = np.unique(d, return_counts=True)
    dssp_counts.append(dict(zip(unique, counts)))

total_vals = sum(dssp_counts[0].values())
C_values = []
for d in dssp_counts:
    C_values.append(d['C']/float(total_vals))
E_values = []
for d in dssp_counts:
    E_values.append(d['E']/float(total_vals))
H_values = []
for d in dssp_counts:
    H_values.append(d['H']/float(total_vals))
Exemplo n.º 46
0
def test_6():
    t = md.load(get_fn('alanine-dipeptide-explicit.pdb'))
    a = md.compute_dssp(t, simplified=True)
    protein_residues = np.array([set(a.name for a in r.atoms).issuperset(('C', 'N', 'O', 'CA')) for r in t.topology.residues])
    assert np.unique(a[:, protein_residues]) == "C"
    assert np.unique(a[:, np.logical_not(protein_residues)]) == 'NA'
Exemplo n.º 47
0
 def results(self):
     self.framedata = md.compute_dssp(self.trj, self.simplify)[:,self._selection]
     resid_list = [res.resSeq for res in self.trj.topology.residues if res.is_protein]
     return pd.DataFrame(self.framedata, columns=resid_list)
def calculate_ss(pdbfilename):
    prot = md.load(pdbfilename)
    ss   = md.compute_dssp(prot)
    secseq = ''.join((ele for ele in ss[0])) 
    return secseq,np.where((ss[0]=='H')) 
Exemplo n.º 49
0
def calSSPercent(traj, ss_type):
    if not hasattr(traj, 'ss'):
       traj.ss = md.compute_dssp(traj)
    percent =  np.where(traj.ss==ss_type, 1, 0).mean(axis=0)*100
    return percent
Exemplo n.º 50
0
def test_1(get_fn, tmpdir):
    for fn in ['1bpi.pdb', '1vii.pdb', '4ZUO.pdb', '1am7_protein.pdb']:
        t = md.load_pdb(get_fn(fn))
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        assert_(call_dssp(tmpdir, t), md.compute_dssp(t, simplified=False)[0])