Exemplo n.º 1
0
	def add_dssp(pdb_file):
		'This is a small function that makes a dictionary with secondary structure information'

		from Bio.Seq import Seq
		from Bio import SeqIO
		from Bio.Alphabet import IUPAC

		io = PDBIO()
		import pandas as pd
		from Bio.PDB.DSSP import DSSP

		pdbl = PDBList()
		parser = PDBParser()
		ppb = PPBuilder()

		'If the structure hasnt been downloaed then it will - else parse it'

		structure = parser.get_structure(pdb_file,pdb_file)


		model = structure[0]
		chain = model['A']

		'path to the dssp excecutable:'
		dssp_exc = '/Users/thorn/dssp-2.2.1/mkdssp'
		dssp = DSSP(model, pdb_file, dssp_exc)

		sec_dict = {}
		for i in range(len(dssp)):
			a_key = dssp.keys()[i]
			index = a_key[1][1]
			sec_structure = dssp[a_key][1]
			sec_dict[index] = sec_structure

		return sec_dict
Exemplo n.º 2
0
def get_surface_residues(filename, my_acc_array, my_threshold):
    """
    Given a pdb file, finds the residues exposed to the solvent (not buried)
    according to the ASA (accessible surface area) value given by DSSP module.
    The user can select a threshold of ASA. Default is 0.2.
    """
    p = PDBParser(PERMISSIVE=1)
    s = p.get_structure("code.pdb", filename)
    model = s[0]
    d = DSSP(model, filename, dssp='mkdssp', acc_array=my_acc_array)

    sys.stderr.write("\nHandled %i residues\n" % len(d))

    residue_number = set()

    for element in sorted(d):
        if type(element[3]) is not str:  #Sometimes the element[3] is NA
            if element[3] >= my_threshold:
                # foreach aa in the surface (according to threshold) store residue_number
                try:
                    residue_number.add(
                        str(list(d.keys())[element[0] - 1][1][1]) +
                        list(d.keys())[element[0] - 1][0])
                except IndexError:
                    sys.stderr.write("Element " + str(d.keys()[0]) +
                                     " index out of range\n")
    return residue_number
Exemplo n.º 3
0
def calc_dssp(model, chain_sites: dict, pdb_name: str) -> None:
    # DSSP
    #    ============ ===
    #    Tuple Index  Value
    #    ============ ===
    #    0            DSSP index
    #    1            Amino acid
    #    2            Secondary structure
    #    3            Relative ASA
    #    4            Phi
    #    5            Psi
    #    6            NH-->O_1_relidx
    #    7            NH-->O_1_energy
    #    8            O-->NH_1_relidx
    #    9            O-->NH_1_energy
    #    10           NH-->O_2_relidx
    #    11           NH-->O_2_energy
    #    12           O-->NH_2_relidx
    #    13           O-->NH_2_energy
    #    ============ ===

    try:
        dssp = DSSP(model, pdb_name, dssp="mkdssp")
    except:
        dssp = {}
        print("dssp failed!")
    for residue in dssp.keys():
        (
            dssp_i,
            aa,
            sec_struct,
            sasa_r,
            phi,
            psi,
            nh_o1_relidx,
            nh_o1_e,
            o_nh1_relidx,
            o_nh1_e,
            nh_o2_relidx,
            nh_o2_e,
            o_nh2_relidx,
            o_nh2_e,
        ) = dssp[residue]

        chain_id, res_id = residue
        _, resnumb, _ = res_id

        if chain_id in chain_sites and resnumb in chain_sites[chain_id]:
            resid = chain_sites[chain_id][resnumb][1]
            new_res = chain_sites[chain_id][resnumb][2]

            new_res.sec_struct = sec_struct
            new_res.sasa_r = sasa_r
            new_res.phi = phi
            new_res.psi = psi
    session.commit()
Exemplo n.º 4
0
    def _run(self, pdbcode, chains):
        pdb_file = os.path.join(PDB_PATH, 'pdb' + pdbcode + '.ent')
        p = PDBParser()
        structure = p.get_structure(pdbcode, pdb_file)
        model = structure[0]
        dssp = DSSP(model, pdb_file, dssp=dssp_route)
        valid_keys = [key for key in dssp.keys() if key[0] in chains]
        chain = dssp.keys()[0][0]

        return [dssp[key] for key in valid_keys], chain
def calc_features(PATH, PDB_id, OUTPATH):

    #Loading the files
    parser = PDBParser(PERMISSIVE=1)

    filename = os.path.join(PATH, PDB_id + ".pdb")
    structure = parser.get_structure(PDB_id, filename)
    model = structure[0]

    #DSSP Analysis for SS, PHI, PSI
    dssp = DSSP(model, filename)

    #NACCESS Analysis for SASA
    rsa, asa = run_naccess(model, filename)
    rsa = process_rsa_data(rsa)

    #Feature mapping to each atomic coordinate
    dssp_present, dssp_not_present = 0, 0

    feature = dict()  #The feature dictionary

    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:

                    print(atom.get_full_id())
                    ID = (atom.get_full_id()[2], atom.get_full_id()[3])

                    if (ID in list(dssp.keys())):
                        if (rsa[ID]["all_atoms_abs"] > Threshold):
                            rsa_label = 1
                        else:
                            rsa_label = 0
                        feat = (SS_Labels[dssp[ID][2]], dssp[ID][4] / 360,
                                dssp[ID][5] / 360, rsa_label)
                        feature[tuple(atom.get_coord())] = feat

                        print(ID, atom.get_coord(), feat)
                        dssp_present += 1

                    else:
                        print("==> ID not present : ", atom.get_full_id())
                        dssp_not_present += 1

    #Printing the Stats
    print(
        "==> STATS : PDBID : %s , DSSP PRESENT : %s , DSSP NOT PRESENT : %s" %
        (PDB_id, dssp_present, dssp_not_present))

    #Saving the feature to each PDB file
    with open(os.path.join(OUTPATH, PDB_id + ".dat"), "wb+") as f:
        pickle.dump(feature, f)
        print("==> Dump completed")
def getSecondaryStructure(pdbID):
    pdb.fetchInPDBFormat(pdbID)
    p = PDBParser()
    structure = p.get_structure(pdbID, pdb.getTemporaryPDBPath())
    model = structure[0]
    dssp = DSSP(model, pdb.getTemporaryPDBPath())
    seq = ""
    secStructure = ""
    for key in dssp.keys():
        seq = seq + dssp[key][1]
        secStructure = secStructure + dssp[key][2]
    # print("Secuencia: ", seq + " de longitud " + str(len(seq)))
    # print("Estructura secundaria: ", secStructure + " de longitud " + str(len(secStructure)))
    return secStructure
Exemplo n.º 7
0
def accessible_surface_area(PDB_file):
    # Calcul de la surface accessible au solvant pour chaque residus
    ASA_dict = {}
    parser = PDBParser()
    structure_id = PDB_file.split(".")[0]
    structure = parser.get_structure(structure_id, PDB_file)
    model = structure[0]
    dssp = DSSP(model, PDB_file, dssp='mkdssp')
    id_CA = 0
    for CA in list(dssp.keys()):
        if dssp[CA][1] != 'X':
            ASA_dict[id_CA] = dssp[CA][3]
            id_CA += 1
    return ASA_dict
Exemplo n.º 8
0
def calculate_asa(model, filename, AROM_LIST, chain_list):
    """Returns a list of surface exposed residues as determined by relative solvent accessibility.

    Only standard protein residues are currently supported. Non-protein and user specified custom residues cannot be
    classified as surface exposed using this criteria.

    Parameters
    ---------
    model: :class:`Bio.PDB.Model.Model`
        Model which contains chains and residues of protein strucutre
    filename: str
        Name of pdb file to be analyzed
    AROM_LIST : list of str
        List containing which standard residues are included in analysis
    chain_list: list of str
        Chains are included in analysis

    Notes
    -----
    The relative accessible surface area (RSA) of each residue is calculated using the Bio.PDB.DSSP module. A residue
    with an RSA value of 0.05 or higher is classified as surface exposed.

    References
    ---------
    Tien, M. Z.; Meyer, A. G.; Sydykova, D. K.; Spielman, S. J.; Wilke, C. O. PLoS ONE 2013, 8 (11).
        Reference for relative solvent accessibility cutoff of 0.05, and for MaxASA values

    """
    cutoff = .05
    surface_exposed_res = []
    letter_list = []
    for res_name in AROM_LIST:
        if res_name_to_char.get(res_name):
            letter_list.append(res_name_to_char.get(res_name))
    try:
        dssp = DSSP(model, filename, acc_array="Wilke")
        keys = list(dssp.keys())
        for key in keys:
            if key[0] in chain_list and dssp[key][3] >= cutoff and dssp[key][
                    1] in letter_list:
                goal_str = dssp[key][1] + \
                    str(key[1][1]) + "(" + str(key[0]) + ")"
                surface_exposed_res.append(goal_str)
    except Exception as e:
        warnings.warn(
            "Unable to calculate solvent accessibility. Check that DSSP is installed.",
            RuntimeWarning,
            stacklevel=2)
    return surface_exposed_res
Exemplo n.º 9
0
def get_SS(structure, path_structure):
    """
    given a structure
    return a list of 3 features coresspondig to the secondary structure 
    for each residue
    Helix : (0,0,1)
    Sheet : (0,1,0)
    Coil : (1,0,0)
    """
    list_SS = []
    dssp = DSSP(structure[0], path_structure)
    list_dssp = list(dssp)
    list_dssp_features = []

    for i in range(len(list_dssp)):
        value_dssp = list(list_dssp[i])
        if value_dssp[2] == "G" or\
           value_dssp[2] == "H" or\
           value_dssp[2] == "I":
            list_dssp_features.append([1,0,0])
        elif value_dssp[2] or value_dssp[2] == "B" or value_dssp[2] =="E":
            list_dssp_features.append([0,1,0])
        elif value_dssp[2] or value_dssp[2] == "T" or\
       value_dssp[2] =="S" or value_dssp[2] =="-":
            list_dssp_features.append([0,1,0])
    return(list_dssp_features)
Exemplo n.º 10
0
def getDSSP(pdb_file):
    parser = PDBParser()
    name = os.path.splitext(os.path.basename(pdb_file))[0]
    structure = parser.get_structure(name, pdb_file)
    model = structure[0]
    dssp = DSSP(model, pdb_file, dssp='mkdssp')
    return dssp
Exemplo n.º 11
0
def make_a_dssp_model(model, pdb_filepath, dssp='dssp'):  #ticked
    """
    
    Parameters
    ----------
    model : TYPE
        The Structure class contains a collection of Model instances.
    pdb_filepath : str
        DESCRIPTION.
    dssp : str, optional
        DESCRIPTION. The default is 'dssp'.

    Returns
    -------
    dssp_object : Bio.PDB.DSSP.DSSP
        parsed secondary structure and accessibility.

    """
    #DSSP class, which maps Residue objects to their secondary structure
    try:
        dssp_object = DSSP(model, pdb_filepath, dssp)
    except Exception as e:
        if type(e) is not Exception:
            raise
        print('oops')

    return dssp_object
Exemplo n.º 12
0
def secondary_torsions_casp(domain):  #, start, end):
    """Extract Secondary structure and torsion angles using the DSSP package"""

    structure = PDBParser().get_structure('',
                                          f'../../data/pdbfiles/{domain}.pdb')
    try:
        raw = DSSP(structure[0], f'../../data/pdbfiles/{domain}.pdb')
    except:
        print('PDBException. Nothing we can do')
        return None, None
    dssp = np.array(raw.property_list, dtype='O')

    sequence = ''.join(dssp[:, 1])

    sec_torsions = dssp[:, [2, 4, 5]]

    # translating torsion angles to range (-180, 180)
    for i in range(sec_torsions.shape[0]):
        for j in range(1, 3):
            if sec_torsions[i, j] > 180:
                sec_torsions[i, j] = sec_torsions[i, j] - 360
            elif sec_torsions[i, j] < -180:
                sec_torsions[i, j] = 360 - sec_torsions[i, j]

    return sec_torsions
Exemplo n.º 13
0
    def __init__(self, pdbfile, fastafile):
        names = {'HIS':'H','ASP':'D','ARG':'R','PHE':'F','ALA':'A','CYS':'C','GLY':'G',\
                 'GLN':'Q','GLU':'E','LYS':'K','LEU':'L','MET':'M','ASN':'N','SER':'S',\
                 'TYR':'Y','THR':'T','ILE':'I','TRP':'W','PRO':'P','VAL':'V','SER':'S'}
        # Load fasta residue sequence
        f = open(fastafile)
        ff = [line.rstrip("\n") for line in f]
        f.close()
        p_id = ff[0][1:]
        self.seq = ff[1]

        # Load pdb information
        p = PDBParser(PERMISSIVE=1)                                             
        st = p.get_structure(p_id, pdbfile)
        model = st[0]                                                            
        tag = p_id[-1] 
        chain = model[tag] 
        residues = chain.get_residues()
        self.residues = [res for res in residues] 
        ## sequence info from pdb
        self.pdbseq  = "".join([names[res.get_resname()] for res in self.residues if \
                names.has_key(res.get_resname())])
        ## 3-state sse info from pdb
        dssp = DSSP(model, pdbfile)
        to3_dict = {'-':'C', 'G':'H', 'H':'H', 'I':'H', 'E':'E', 'B':'E', 'T':'C', \
                'S':'C', 'L':'C'}
        keys = list(dssp.keys())
        self.pdbss3seq = "".join([to3_dict[dssp[k][2]]for k in keys])

        # Align the pdb sequence(always missing some residues) to fasta sequence
        alignment = AlignNW(self.seq, self.pdbseq)
        self.re_index = alignment['j']
        self.re_index = [i-1 for i in alignment['j']] # minus 1 for indexing

        # generate sequence alignment between pdb sequence and fasta sequence
        self.alignment = "".join([self.pdbseq[i] if i > -1 else "-" for i in self.re_index])
        self.alignment = "\n".join([self.seq, self.alignment])

        # generate full lenght of 3-state SSE sequence according to re-index
        self.ss3seq = "".join([self.pdbss3seq[i] if i > -1 else "C" for i in self.re_index])
        
        # generate full lenght of distance matrix(distance=-1 when disappear in pdbseq)
        self.dist_matrix = self.generate_dist_matrix()
        #np.savetxt("test.txt", self.dist_matrix)
        # generate full lenght of angle matrix(distance=None when disappear in pdbseq)
        self.angle_matrix = self.generate_angle_matrix()
Exemplo n.º 14
0
def getDSSP(
    struct,
    fname,
    dsspPath=os.path.expanduser(
        "~/Tesis/rriPredMethod/dependencies/bioinformaticTools/dssp/mkdssp")):
    dssp = DSSP(struct[0], fname, dssp=dsspPath)
    chains = struct[0].child_list
    dsspDict = {
        chain.get_id(): {symbol: []
                         for symbol in DSSP_SYMBOLS}
        for chain in chains
    }
    for chainId, resId in dssp.keys():
        secStruct = dssp[(chainId, resId)][2]
        dsspDict[chainId][secStruct].append(resId)

    return dsspDict
Exemplo n.º 15
0
def _phi_psi_dic(model, pdb_filename):
    dssp = DSSP(model, pdb_filename, dssp='mkdssp')
    phi_psi_dic = {}
    for x in dssp.property_dict:
        phi_psi_dic[x[1][1]] = str(round(
            dssp.property_dict[x][4], -1))[:-2] + "_" + str(
                round(dssp.property_dict[x][5], -1))[:-2]

    return phi_psi_dic
Exemplo n.º 16
0
 def __applyDssp(self):
     import Bio.PDB as bio
     print('PSU: applying dssp')
     from Bio.PDB.DSSP import DSSP
     p = bio.PDBParser()
     pdbFile = self.pdbDataPath + 'pdb' + self.pdbCode + '.ent'
     structure = p.get_structure(self.pdbCode, pdbFile)
     model = structure[0]
     dssp = DSSP(model, pdbFile)
     for akey in list(dssp.keys()):
         chain = akey[0]
         res_no = akey[1][1]
         row = dssp[akey]
         ss = row[2]
         for atom in self.atoms:
             if atom.values['rid'] == res_no and atom.values[
                     'chain'] == chain:
                 atom.setDsspInfo(ss)
     print('PSU: applied dssp successfully')
Exemplo n.º 17
0
def get_dssp_amino_acid_sequence(pdb_path, dssp_path):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    model = structure[0]
    dssp = DSSP.DSSP(model, dssp_path)
    aa = ''
    for key in list(dssp.keys()):
        #chain_id = key[0]
        #if chain_id == letters[chain_index]:#'A': # first chain
        aa += dssp[key][1]
    return aa
Exemplo n.º 18
0
def get_dssp_torsion_angles(pdb_path, dssp_path):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    model = structure[0]
    dssp = DSSP.DSSP(model, dssp_path)
    phi = ''
    psi = ''
    for key in list(dssp.keys()):
        phi += dssp[key][4]
        psi += dssp[key][5]
    return phi, psi
Exemplo n.º 19
0
def getDSSP(model,
            dssp_map=None,
            feature_name='secondary_structure',
            formatstr="{}({})",
            clean=True):
    try:
        from Bio.PDB.DSSP import DSSP
    except ModuleNotFoundError:
        raise ModuleNotFoundError(
            "The module 'Bio.PDB.DSSP' is required for this functionality!")

    if dssp_map is None:
        # map eight ss types to three
        dssp_map = {
            "H": formatstr.format(feature_name, "H"),
            "G": formatstr.format(feature_name, "H"),
            "I": formatstr.format(feature_name, "H"),
            "E": formatstr.format(feature_name, "S"),
            "B": formatstr.format(feature_name, "L"),
            "T": formatstr.format(feature_name, "L"),
            "S": formatstr.format(feature_name, "L"),
            "-": formatstr.format(feature_name, "L")
        }

    # Write a PDB file
    pdb_file = model.save()

    # run DSSP using the DSSP class from BioPython
    dssp = DSSP(model, pdb_file)

    # store secondary structure in each atom property dict
    keys = list(sorted(set(dssp_map.values())))
    for chain in model:
        cid = chain.get_id()
        for residue in chain:
            rid = residue.get_id()
            dkey = (cid, rid)

            if dkey in dssp:
                ss = dssp_map[dssp[dkey][2]]
            else:
                ss = dssp_map['-']

            for atom in residue:
                atom.xtra[keys[0]] = 0.0
                atom.xtra[keys[1]] = 0.0
                atom.xtra[keys[2]] = 0.0
                atom.xtra[ss] = 1.0

    if clean:
        os.remove(pdb_file)

    return keys
Exemplo n.º 20
0
 def load_data(self):
     """
     Load all the pdb files in the folder. Create BIO.PDB.DSSP.DSSP object to stock each data's file
     """
     stock = []
     pdb = self.m_folder + self.m_id + ".pdb"
     p = PDBParser()
     structure = p.get_structure("prot", pdb)
     model = structure[0]
     dssp = DSSP(model, pdb)
     stock.append(dssp)
     return stock
Exemplo n.º 21
0
def get_dssp_secondary_structure(pdb_path, dssp_path):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    model = structure[0]
    dssp = DSSP.DSSP(model, dssp_path)
    q8 = ''
    for key in list(dssp.keys()):
        #chain_id = key[0]
        #if chain_id == letters[chain_index]:#'A': # first chain
        #aa += dssp[key][1]
        q8 += dssp[key][2]
    return q8
Exemplo n.º 22
0
def get_dssp_absolute_surface_area(pdb_path, dssp_path):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    model = structure[0]
    dssp = DSSP.DSSP(model, dssp_path)
    asa = ''
    #letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' # chain_id is in letters
    for key in list(dssp.keys()):
        #chain_id = key[0]
        #if chain_id == letters[chain_index]:#'A': # first chain
        asa += dssp[key][3]
    return asa
Exemplo n.º 23
0
def get_mali_structure_stats(root):
    """ Reads in the manual alignments and obtains stats.

    Parameters
    ----------
    root : path
        Path to root directory

    Returns
    -------
    pd.DataFrame
        alpha residues
        beta residues
    """
    from Bio.PDB import PDBParser
    from Bio.PDB.DSSP import DSSP

    res = []
    tool = 'manual'
    for path, directories, files in os.walk(root):
        for f in files:
            if '.pdb' in f and tool in f:

                fname = os.path.join(path, f)
                parser = PDBParser()
                # ids = os.path.basename(fname).split('_')
                structs = parser.get_structure('', fname)
                dssp1 = DSSP(structs[0], fname, dssp='mkdssp')
                classes1 = list(map(lambda x: x[2], dssp1))
                len1 = len(classes1)

                classes1 = pd.Series(Counter(classes1))
                classes1.index = list(map(lambda x: 'x' + x, classes1.index))
                pdb_name = os.path.basename(f).split('.')[0]
                # stats = pd.concat((classes1, classes2))
                stats = classes1
                stats['pdb'] = pdb_name
                stats['path'] = fname
                stats['xlen'] = len1

                # dssp2 = DSSP(structs[1], fname, dssp='mkdssp')
                # classes2 = list(map(lambda x: x[2], dssp2))
                # len2 = len(classes2)
                # classes2 = pd.Series(Counter(classes2))
                # classes2.index = list(map(lambda x: 'y' + x, classes2.index))
                # stats['ylen'] = len2
                res.append(stats)

    res = pd.DataFrame(res)
    return res
Exemplo n.º 24
0
def Make_dssp():
    ref = {
        'A': 'ALA',
        'R': 'ARG',
        'N': 'ASN',
        'D': 'ASP',
        'B': 'ASX',
        'C': 'CYS',
        'E': 'GLU',
        'Q': 'GLN',
        'Z': 'GLX',
        'G': 'GLY',
        'H': 'HIS',
        'I': 'ILE',
        'L': 'LEU',
        'K': 'LYS',
        'M': 'MET',
        'F': 'PHE',
        'P': 'PRO',
        'S': 'SER',
        'T': 'THR',
        'W': 'TRP',
        'Y': 'TYR',
        'V': 'VAL',
        'X': '---'
    }
    dssp_dict = {}
    dssp_dict['X'] = np.NaN
    p = PDBParser()
    structure = p.get_structure('model', './current_pdb.txt')
    mod = structure[0]
    dssp = DSSP(mod, './current_pdb.txt')
    for i in range(len(dssp)):
        a_key = list(dssp.keys())[i]
        dssp_dict[str(a_key[0]) + str(a_key[1][1]) +
                  str(ref[dssp[a_key][1]])] = dssp[a_key][2]
    return (dssp_dict)
Exemplo n.º 25
0
def get_dssp_amino_acid_sequences(pdb_path, dssp_path):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    model = structure[0]
    dssp = DSSP.DSSP(model, dssp_path)
    aa = ''
    aas = []
    prev_chain_id = ''
    for key in list(dssp.keys()):
        chain_id = key[0]
        if chain_id != prev_chain_id:
            prev_chain_id = chain_id
            if aa != '':
                aas.append(aa)
            aa = ''
        #if chain_id == letters[chain_index]:#'A': # first chain
        aa += dssp[key][1]
    return aas
def recup_acc_solvant(fichier_pdb):
	"""
	entrée le fichier pdb
	sortie une liste de l'accesibilité relative (entre 0 et 1) des résidues
	utilise Biopython et le programme dssp

	"""
	acces_solvant = []
	p = PDBParser()
	id_structure = fichier_pdb.split(".")[0]

	structure = p.get_structure(id_structure, fichier_pdb)
	model = structure[0]
	dssp = DSSP(model, fichier_pdb, dssp = 'mkdssp')
 
	for i in range(len(list(dssp))):
		acces_solvant.append(round((list(dssp)[i][3]),3)) #ajout de chaque élément arrondi au millième

	return acces_solvant
Exemplo n.º 27
0
def SecStr(pdb_id, chain_id, start, stop):
    
    #Change pdb_id to lower cases - as in local pdb db. 
    pdb_id = pdb_id.lower()
    
    #Read pdb structure if it exists.
    p = PDBParser()
    try:
        structure = p.get_structure(pdb_id, f'/home/m.pak/pdb/pdb{pdb_id}.pdb')
    except FileNotFoundError:
        print(f'File not found, proceed...  {pdb_id}')
        return None, None, None
    model = structure[0]
    
    #Run DSSP.
    try:
        dssp = DSSP(model, f'/home/m.pak/pdb/pdb{pdb_id}.pdb')
    except:
        print(f'DSSP unable to process the structure {pdb_id}, proceed...')
        return None, None, None
    
    #Keep annotation of secondaty structure elements, Phi and Psi angles for defined region of structure.
    sec_str = ''
    phi_lst = []
    psi_lst = []
    
    #INCLUDES STOP!!!!
    for num in range(start, stop+1):
        try: 
            res_key = (chain_id, (' ', num, ' ')) #Can not deal with hetero-flag and insertion code
            res = dssp[res_key]
        except:
            print(f'{res_key} not found in {pdb_id}, proceed...')
            continue
        
        sec_str += res[2]
        phi_lst.append(res[4])
        psi_lst.append(res[5])
        
    return sec_str, phi_lst, psi_lst
Exemplo n.º 28
0
def secondary_torsions(domain):  #, start, end):
    """Extract Secondary structure and torsion angles using the DSSP package"""

    domain_id = domain[:4]
    chain_id = domain[4]

    structure = PDBParser().get_structure(
        '', f'../../data/pdbfiles/{domain_id}.pdb')
    try:
        raw = DSSP(structure[0], f'../../data/pdbfiles/{domain_id}.pdb')
    except:
        print('PDBException. Nothing we can do')
        return None, None
    dssp = np.array(raw.property_list, dtype='O')

    # extract chain
    #keys = np.array([i[0] for i in raw.keys()])
    #positions = np.array([int(i[1][1]) for i in raw.keys()])
    #positions = positions[keys == chain_id]

    #dssp = dssp[keys == chain_id]

    sequence = ''.join(dssp[:, 1])

    sec_torsions = dssp[:, [2, 4, 5]]

    # translating torsion angles to range (-180, 180)
    for i in range(sec_torsions.shape[0]):
        for j in range(1, 3):
            if sec_torsions[i, j] > 180:
                sec_torsions[i, j] = sec_torsions[i, j] - 360
            elif sec_torsions[i, j] < -180:
                sec_torsions[i, j] = 360 - sec_torsions[i, j]
    #try:
    #    dssp_start, dssp_end = np.where(positions == start)[0][0], np.where(positions == end)[0][0]
    #except IndexError:
    #    print(domain, 'positions not found')
    #    return None, None
    return sec_torsions, sequence  #sec_torsions[dssp_start:(dssp_end + 1)], sequence[dssp_start:(dssp_end + 1)]
def retrieve_secondary_struc(chain, input_path):

    # Uses biopython's built-in DSSP Function to retrieve the secondary structure
    # note! STRIDE and DSSP agree in 95,4% of the cases. DSSP tends to assign shorter secondary structures
    # Must have DSSP installed --> see tutorial for instructions
    # conda install -c salilab dssp
    # https://en.wikipedia.org/wiki/STRIDE

    # H - Alpha-Helix
    # B - Isolated Beta-Bridge
    # E - Strand
    # G - 3-10 Helix
    # I - Pi helix
    # T - Turn
    # S - Bend

    model = chain.get_parent()
    res_list = Selection.unfold_entities(chain, "R")
    chain_len = len(res_list)

    dssp = DSSP(model, input_path)
    dssplist = list(dssp)[:chain_len]

    seq = [row[1] for row in dssplist]
    seq = ''.join(seq)
    struc = [row[2] for row in dssplist]
    struc = ''.join(struc)
    struc = struc.replace('-', ' ')

    if len(struc) > len(seq):
        struc = struc[0:len(seq)]

    if len(seq) != len(res_list):
        warnings.warn(
            f'PDB file and Secondary structure map do not match!\n {chain.get_parent().get_parent().id} - PDB: {len(res_list)} Residues VS. SS: {len(seq)} Residues. '
        )

    return seq, struc
Exemplo n.º 30
0
def getDSSP(model,
            PDBFileName,
            dssp_map=None,
            feature_name='secondary_structure',
            formatstr="{}({})"):

    if (dssp_map is None):
        # map eight ss types to three
        dssp_map = {
            "H": formatstr.format(feature_name, "H"),
            "G": formatstr.format(feature_name, "H"),
            "I": formatstr.format(feature_name, "H"),
            "E": formatstr.format(feature_name, "S"),
            "B": formatstr.format(feature_name, "L"),
            "T": formatstr.format(feature_name, "L"),
            "S": formatstr.format(feature_name, "L"),
            "-": formatstr.format(feature_name, "L")
        }

    # run DSSP using the DSSP class from BioPython
    dssp = DSSP(model, PDBFileName)

    # store secondary structure in each atom property dict
    for chain in model:
        cid = chain.get_id()
        for residue in chain:
            rid = residue.get_id()
            dkey = (cid, rid)
            if (dkey in dssp):
                ss = dssp_map[dssp[dkey][2]]
            else:
                ss = dssp_map['-']

            for atom in residue:
                atom.xtra[ss] = 1.0

    return list(set(dssp_map.values()))
Exemplo n.º 31
0
            ftp.retrbinary("RETR %s" % filename, callback=fp.write)

        print("processing: %s" % filename)

        p = PDBParser()
        with gzip.open(filename, 'rt') as f:
            structure = p.get_structure("", f)

        pdb_id = structure.header["idcode"]

        assert pdb_id, "no PDB ID for %s" % filename

        model = structure[0]

        try:
            dssp = DSSP(model, filename, dssp="/Users/luis/dssp-2.3.0/mkdssp")
        except Exception as e:
            print(e)
            print()
            os.remove(filename)
            continue

        valid_aa = [
            'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P',
            'Q', 'R', 'S', 'T', 'V', 'W', 'Y'
        ]
        current_chain = ""
        chain = ""
        phi_psis = []
        dihedrals = []
        chains = []