示例#1
0
def get_dssp_info(PDB_file, model, dir):
    """Runs DSSP on protein input"""

    #TODO : you can run DSSP through biopython. The output contains a lot of useful information.
    #Tip : make sure your secondary structure indexing matches the sequence order in the PDB file!

    return PDB.DSSP(model, dir + '/' + PDB_file, dssp='mkdssp')
示例#2
0
 def get_sasa(self, dssp_loc="/usr/bin/mkdssp", skip=None):
     self.dssp_loc = dssp_loc
     SASA = {}
     protein = self.universe.selectAtoms("protein")
     for ts in self.universe.trajectory:
         if skip:
             self.universe.trajectory.skip = skip
         sys.stdout.flush()
         sys.stdout.write('\rsasa [step {0}]  '.format(
             self.universe.trajectory.frame))
         writer = MDAnalysis.Writer("tmp.pdb")
         writer.write(protein)
         writer.close()
         parser = bp.PDBParser()
         structure = parser.get_structure('tmp', 'tmp.pdb')
         dssp = bp.DSSP(structure[0], 'tmp.pdb', self.dssp_loc)
         for key in dssp.keys():
             resnum = dssp[key][0]
             sasa = dssp[key][2]
             if resnum.id[1] in SASA:
                 SASA[resnum.id[1]].append(sasa)
             else:
                 SASA[resnum.id[1]] = [sasa]
     count = 0
     fp = open(self.out_path + self.out_file + "_sasa.dat", 'w')
     for key in SASA:
         fp.write("{0}\t{1}\t{2}\t{3}\n".format(protein.resnames()[count],
                                                key, np.mean(SASA[key]),
                                                np.std(SASA[key], ddof=1)))
         count += 1
     fp.close()
     sys.stdout.write('\rSASA table created     ')
     print
示例#3
0
 def run_dssp(self):
     pdb = PDB.PDBList()
     pdb.retrieve_pdb_file(self.pdb_code, pdir='./', file_format="pdb")
     p = PDB.PDBParser()
     f = 'pdb{}.ent'.format(self.pdb_code.lower())
     wt_residues = [
         i for i in Residue.objects.filter(
             protein_conformation__protein=self.protein).exclude(
                 protein_segment__slug__in=['N-term', 'C-term'])
     ]
     gn_residues = [
         i.sequence_number for i in wt_residues if i.generic_number
         and i.protein_segment.slug not in ['ECL1', 'ECL2', 'ICL3', 'ECL3']
     ]
     structure = p.get_structure(self.pdb_code, f)
     for chain in structure[0]:
         ch = chain.get_id()
         self.chains.append(ch)
         self.dssp_dict[ch] = OrderedDict()
         self.dssp_info[ch] = OrderedDict([('H', 0), ('B', 0), ('E', 0),
                                           ('G', 0), ('I', 0), ('T', 0),
                                           ('S', 0), ('-', 0)])
     if len(self.dssp_dict) > 1:
         dssp = PDB.DSSP(structure[0], f, dssp='/env/bin/dssp')
         for key in dssp.keys():
             if int(key[1][1]) in gn_residues:
                 self.dssp_dict[key[0]][key[1][1]] = dssp[key]
                 self.dssp_info[key[0]][dssp[key][2]] = self.dssp_info[
                     key[0]][dssp[key][2]] + 1
     os.remove(f)
示例#4
0
 def runDssp(self):
     """Run DSSP executable for this model"""
     if (self.dssp is None):
         dssp = PDB.DSSP(self.bioModel, self.pcssRunner.pdh.getFullModelFile(self), 
                         self.pcssRunner.internalConfig["dssp_executable"])
         #Hard to get exact reason why DSSP didn't work since it's BioPython, but will set 
         #as feature exception rather than global exception
         if (dssp is None or len(dssp.keys()) < 1):
             raise pcssErrors.DsspException("Did not load DSSP for model %s. This likely indicates a problem with the "
                                            "Biopython DSSP module;\ntry running DSSP from the command line to isolate "
                                            "the issue" % self.getId())
         self.dssp = dssp
示例#5
0
def secondary_structure(pdb_file, pdb_code):
    parser = biop.PDBParser()
    structure = parser.get_structure(pdb_code, pdb_file)
    model = structure[0]
    dssp = biop.DSSP(model, pdb_file)

    return {
        'helix': [np.array(np.where(np.array(dssp)[:, 2] == 'H')) + 1][0][0],
        'strand': [np.array(np.where(np.array(dssp)[:, 2] == 'E')) + 1][0][0],
        'pi_helix':
        [np.array(np.where(np.array(dssp)[:, 2] == 'I')) + 1][0][0],
        'turn': [np.array(np.where(np.array(dssp)[:, 2] == 'T')) + 1][0][0],
        'bend': [np.array(np.where(np.array(dssp)[:, 2] == 'S')) + 1][0][0],
    }
示例#6
0
def get_first_residue_id_dssp(pdbname, pdbpath, pdb_id):
    ''' get id of first residue in pdb file (pdb numbering) '''
    ''' use dssp data where pdb ids are keys of dssp data dictionary '''
    ''' input: pdbname, path to pdb file, pdb id with chain, example input: 'pdb1ztm', '/home/pdb/1ztm.pdb', '1ztm_A' '''
    ''' output: integer id of first residue in pdb, example output: 44 '''

    p = PDB.PDBParser()
    structure = p.get_structure(pdbname, pdbpath)
    model = structure[0]
    dssp = PDB.DSSP(model, pdbpath)
    chain_id = pdb_id.split('_')[-1]  # fetch chain id from pdb_id
    chain_data = []

    for res in list(dssp.keys()):
        # consider only residues of desired chain
        if res[0] == chain_id:
            chain_data.append(res)

    return chain_data[0][1][1]
示例#7
0
def assign_secondary_structure(pdbfile, modelno=0):
    """
    Uses DSSP via Biopython to assign secondary structure.
    Requires DSSP to be installed.
    
    --PARAMETERS--
    pdbfile: the path to the structure (in PDB format) for which you wish to
        get secondary structure assignments.
    modelno: specify which model in the PDB file should be analysed. Default is 
        the first.
        
    --RETURNS--
    A dictionary containing secondary structure assignments, using DSSP codes 
        (i.e. E = beta strand, H = alpha helix, etc.). Dictionary keys are the
        protein chain IDs, and values are a list of tuples giving residue 
        ss assignments: (resid, ss_assignment)
    """

    # Load structure using Biopython and select specified model
    structure = PDB.PDBParser(QUIET=True).get_structure("structure", pdbfile)
    model = structure[modelno]

    # Run DSSP
    dssp_result = PDB.DSSP(model, pdbfile)

    # Extract SS assignments
    # List of tuples format maintains the correct residue order
    result = {}
    for chain, res in dssp_result.keys():
        if chain not in result:
            result[chain] = []
        resid = biopython_resid_to_str(res)
        k = (chain, res)
        result[chain].append((resid, dssp_result[k][1], dssp_result[k][2]))

    return result
def rawChainParser(filepath, chainID, pssm):
    parser = PDB.PDBParser()
    structure = parser.get_structure(chainID, filepath)
    model = structure[0]

    d = PDB.DSSP(model, filepath)

    rd = ResidueDepth(model)

    pharmDic = getPharmacophoreDict()

    hs = PDB.HSExposure.HSExposureCA(model)

    df = pd.DataFrame()
    for residue in model[chainID]:
        seqID = getSeqIndex(residue)

        row = {}
        x, y, z = getResCoords(residue)
        resName = residue.get_resname()
        row["AA"] = PDB.Polypeptide.three_to_one(resName)
        row["x"] = x[0]
        row["y"] = y[0]
        row["z"] = z[0]

        tupKey = (chainID, (' ', seqID, ' '))
        row["res_depth"] = rd[tupKey][0]
        row["ca_depth"] = rd[tupKey][1]

        dssp = d[(chainID, seqID)]

        row["ss"] = dssp[2]
        row["asa"] = dssp[3]
        row["phi"] = dssp[4] / 360.0
        row["psi"] = dssp[5] / 360.0

        if tupKey in hs:
            row["hseu"] = hs[tupKey][0]
            row["hsed"] = hs[tupKey][1]
        else:  #NO HSE CALCULATED
            row["hseu"] = 0
            row["hsed"] = 0

        row["seqId"] = seqID - 1
        #row["bFactor"] = centralAtom.get_bfactor() #must be done using zhang lab tool resQ instead

        #get pssm row
        pssmRow = pssm[seqID - 1]

        row["aligns"] = sum(pssmRow.values())  # total alignments
        if (row["aligns"] != 0):
            for key in pssmRow.keys():
                if key not in list('ABCDEFGHIKLMNPQRSTUVWYZ'):
                    print(key)
                row["pssm_" + key] = pssmRow[key] / row["aligns"]

        if (residue.is_disordered()):
            print(f"disorded atom in res {getSeqIndex()}")

        row.update(pharmDic[row["AA"]])

        df = df.append(row, ignore_index=True)

    #check for missed columns in pssm
    for a in 'ABCDEFGHIKLMNPQRSTUVWYZ':  # check for missing aas
        if ("pssm_" + a) not in df:
            df["pssm_" + a] = 0.0

    aaEncoder = AAonehot()
    aaTransformed = aaEncoder.transform(df["AA"])
    aaCols = ["AA_" + x for x in aaEncoder.classes_]
    aaDf = pd.DataFrame(aaTransformed, columns=aaCols)

    ssEncoder = ssOneHot()
    ssTransformed = ssEncoder.transform(df["ss"])
    ssCols = ["SS_" + x for x in ssEncoder.classes_]
    ssDf = pd.DataFrame(ssTransformed, columns=ssCols)

    #center coordinates
    max = df.max()
    min = df.min()
    df["x"] = df["x"] - (max["x"] + min["x"]) / 2
    df["y"] = df["y"] - (max["y"] + min["y"]) / 2
    df["z"] = df["z"] - (max["z"] + min["z"]) / 2

    df = pd.concat([df, aaDf, ssDf], axis=1).drop(["AA", "ss"], axis=1)
    df = df.fillna(0.0)
    if (df.shape[1] != 72):
        print(df)
    assert df.shape[
        1] == 72, f"Incorrect pssmdf shape = {df.shape[1]} for file: {filepath}"  #error check
    #pd.set_option('display.max_columns', 500)
    #print(df.describe())
    return df
示例#9
0
    def add_dssp(self,selected_chains=list()):
        '''
        Adds DSSP features.
        DSSP ignores hetatoms but treats oligomers as single units
        This means that interface residues will have lower than expected SASA
        Therefore, each chain is split into individual chains and DSSP is
        calculated over each chain separately and over the whole oligomer
        DSSP takes files directly so need to create a temporary PDB file for
        each chain
        '''
        if self.debug:
            print self.debug_head+"Adding DSSP"
            print self.debug_head+"Current header: {}".format(self.header)
        if self.id == "Holder":
            c = HOLDERS.keys()
            h = ['?','?','?']+[HOLDERS[x] for x in c]
            c = self.res_header+c
            self.dssp = pd.DataFrame([h],columns=c)[self.res_header+HEADERS['dssp']]
            if self.debug:
                print self.debug_head+"added holder: {}".format(h)
        else:
            genempty = False
            dssp = list()
            try:
                #Calculate DSSP for Oligomer
                #DSSP is pain in that it only takes files. Therefore, if this was created
                # using a model or url, create a temporary file
                if self.debug:
                    print self.debug_head+"Oligomer DSSP"
                if self.filename == "url" or self.filename == "model":
                    ofile = "_".join([uuid.uuid4().hex,self.id])
                    io = PDB.PDBIO()
                    io.set_structure(self.struct)
                    io.save(ofile)
                    if self.debug:
                        print self.debug_head+"wrote file for dssp"
                    try:                        
                        oligomer = dict(PDB.DSSP(self.struct[0],ofile,self.dssp_path))
                    except Exception as e: # DSSP failures generate unnamed exceptions
                        raise DescriptorException("dssp calculation",e)
                    os.remove(ofile)
                else:
                    oligomer = dict(PDB.DSSP(self.struct[0],self.filename,self.dssp_path))
                if self.debug:
                    print self.debug_head+"finished oligomer DSSP"
                class chain_select(PDB.Select): #Needed for extracting each chain
                    def accept_chain(self,c):
                        if c.get_id() == chainid:
                            return True
                        else:
                            return False                               
                #Calculate DSSP for isolated chains
                isolated = dict()
                badchains = list()
                if len(self.struct[0].get_list()) == 1:
                    if self.debug:
                        print self.debug_head+"Single chain, no need to run DSSP on isolated chains"
                    isolated[self.struct[0].get_list()[0].get_id()] = oligomer
                else:
                    if self.debug:
                        print self.debug_head+"Running DSSP on isolated chains"
                    io = PDB.PDBIO()            
                    io.set_structure(self.struct)
                    for chain in self.struct[0]:
                        # Make sure there are residues in this chain otherwise unnamed Exception
                        if len([x for x in chain.get_residues() if x.get_id()[0]==' ' and len(x)>2])==0:
                            badchains.append(chain.get_id())
                            if self.debug:
                                print self.debug_head+"skipping no-res chain {}".format(chain.get_id())
                            continue
                        # Generate random filename
                        chainid = chain.get_id()
                        if chainid not in selected_chains:
                            if self.debug:
                                print self.debug_head+"skipping unwanted chain {}".format(chainid)
                                continue
                        cfile = "_".join([uuid.uuid4().hex,self.id,chainid])
                        # Write isolated chain to random filename
                        # Then calculate DSSP from it
                        io.save(cfile, chain_select())
                        tmp = self.parser.get_structure(chainid,cfile)
                        if self.debug:
                            print self.debug_head+"write file for chain {}, calculating DSSP".format(chainid)
                        try:
                            isolated[chainid] = dict(PDB.DSSP(tmp[0],cfile,self.dssp_path))
                        except Exception: # DSSP failures generate unnamed exceptions
                            print "Warning, DSSP failed for {} isolated chain {}, setting equal to oligomer".format(self.id,chainid)
                            isolated[chainid] = oligomer                        
                        os.remove(cfile)
                #Generate DSSP DataFrame
                if self.debug:
                    print self.debug_head+"generating DSSP dataframe"
                    if len(badchains)>0:
                        print self.debug_head+"skipping badchains {}".format(badchains)
                for res in oligomer:
                    if res[1][0]!=" ": continue
                    c = res[0]
                    if c not in selected_chains or c in badchains: continue
#                    try:
#                        r = int(res[1][1])
#                    except ValueError:
#                        r = -999
                    r = res[1][1]
                    i = res[1][2]
                    ss = oligomer[res][2]
                    try:
                        sasa = round(float(oligomer[res][3]),3)                       
                        isosasa = round(float(isolated[c][res][3]),3)
                    except ValueError: #TODO: Does this every happen?
                        sasa = oligomer[res]
                        isosasa = isolated[c][res][3]                                            
                    dssp.append([c,r,i,ss,sasa,isosasa])                                                                              
            except (OSError,DescriptorException,PDB.PDBExceptions.PDBException) as e: # Generate a holder set on the fly
                print "Warning, DSSP calculation failed for {}: {}".format(self.id,e)
                genempty = True
#                raise ParseWarning("DSSP Calculation","Failed DSSP for {}({})".format(self.id,e))
#            except ParseWarning as e:
                if self.debug:
                    print self.debug_head+"DSSP failed with exception {}".format(e)
            if len(dssp)==0: # Generate a holder set on the fly if it's still empty
                genempty = True
                if self.debug:
                    print self.debug_head+"DSSP set is empty, generating holder set"
            if genempty:
                dssp = list()
                for c in self.struct[0]:
                    for r in c:
                        res = r.get_id()
                        if res[0]!=' ': continue
                        dssp.append([c.get_id(),res[1],res[2],'?',-999,-999])                                 
            self.dssp = pd.DataFrame(dssp,columns=self.res_header+HEADERS['dssp'])
            if self.debug:
                print "Finished dssp datatable has {} rows".format(len(self.dssp.index))
        if self.descriptors is None:
            self.descriptors = self.dssp
        else:
            self.descriptors = self.descriptors.merge(self.dssp,
                                                      on=self.res_header,
                                                      how='outer')
        self.header += HEADERS['dssp']                                                   
        if self.debug:
            print self.debug_head+"Finished DSSP, current header: {}".format(self.header)
示例#10
0
文件: readpdb.py 项目: qzlshy/note
HSEA_dict = HSEA.property_dict
HSEA_keys = HSEA.property_keys
HSEA_list = HSEA.property_list

HSEB = PDB.HSExposureCB(s)

HSEB_dict = HSEB.property_dict
HSEB_keys = HSEB.property_keys
HSEB_list = HSEB.property_list

depth = PDB.ResidueDepth(s)
dep_dict = depth.property_dict
dep_keys = depth.property_keys
dep_list = depth.property_list

dssp = PDB.DSSP(s, "3skpFH.pdb")
dssp_dict = dssp.property_dict

nb_dict = {}
nb = PDB.NeighborSearch(ca_list)
for a in ca_list:
    t = nb.search(a.get_coord(), 8)
    aa = a.get_parent()
    aa_id = (aa.get_parent().get_id(), aa.get_id())
    nb_dict[aa_id] = t

dic = {}

dic["res_id"] = []
for a in aa_list:
    dic["res_id"].append(a.get_id())
示例#11
0
def get_dssp_df(model, pdb_file, outfile=None, outdir=None, outext='_dssp.df', force_rerun=False):
    """

    Args:
        model:
        pdb_file:
        outfile:
        outdir:
        outext:
        force_rerun:

    Returns:

    """
    # Create the output file name
    outfile = ssbio.utils.outfile_maker(inname=pdb_file, outname=outfile, outdir=outdir, outext=outext)

    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
        try:
            # TODO: errors with non-standard residues, ie. MSE in 4Q6U or 1nfr
            # TODO: write command line executor for DSSP and parser for raw DSSP results
            dssp = PDB.DSSP(model, pdb_file)
        except KeyError:
            return pd.DataFrame()

        if len(dssp.property_list) == 0:
            return pd.DataFrame()

        # Reorganize the results into a csv file
        appender = []
        for k in dssp.property_keys:
            to_append = []
            x = dssp.property_dict[k]
            chain = k[0]
            residue = k[1]
            het = residue[0]
            resnum = residue[1]
            icode = residue[2]
            to_append.extend([chain, resnum, icode])
            to_append.extend(x)
            appender.append(to_append)

        cols = ['chain', 'resnum', 'icode',
                'dssp_index', 'aa', 'ss', 'exposure_rsa', 'phi', 'psi',
                'NH_O_1_relidx', 'NH_O_1_energy', 'O_NH_1_relidx',
                'O_NH_1_energy', 'NH_O_2_relidx', 'NH_O_2_energy',
                'O_NH_2_relidx', 'O_NH_2_energy']

        df = pd.DataFrame.from_records(appender, columns=cols)

        # Adding additional columns
        df = df[df['aa'].isin(list(aa1))]
        df['aa_three'] = df['aa'].apply(one_to_three)
        df['max_acc'] = df['aa_three'].map(residue_max_acc['Sander'].get)
        df[['exposure_rsa', 'max_acc']] = df[['exposure_rsa', 'max_acc']].astype(float)
        df['exposure_asa'] = df['exposure_rsa'] * df['max_acc']

        df.to_csv(outfile)
    else:
        log.debug('{}: already ran DSSP and force_rerun={}, loading results'.format(outfile, force_rerun))
        df = pd.read_csv(outfile, index_col=0)

    return df
示例#12
0
文件: sasa.py 项目: handreazz/analyo
def get_sasa(topology, trajectory, dssp_loc=master_dssp_location, skip=None):
    """
	This function currently only works with one or two chains, because I am lazy.
	"""

    dssp_loc = dssp_loc
    DSSP = {'A': {}}
    universe = MDAnalysis.Universe(topology, trajectory)

    #set the chain name here. this will only work for MDAnalysis 0.16
    chain_name = universe.add_Segment(segid='A')
    universe.residues[...].segments = chain_name

    protein = universe.select_atoms("protein")
    diff_res = []
    #this attempt to identify chain breaks will only work if the resids
    #... in the chains are not numbered consecutively
    for i in range(len(protein.resnums)):
        if protein.resnums[i] - protein.resnums[i - 1] < 0 and i != 0:
            diff_res.append(i)
    if len(diff_res) >= 1:
        chain_sep = diff_res.pop(0)
        chain_end = len(protein.resnums)
        bchain = protein[chain_sep:chain_end]
        bchain.set_segids('B')
        DSSP['B'] = {}

    for ts in universe.trajectory:
        if skip:
            universe.trajectory.skip = skip
        sys.stdout.flush()
        sys.stdout.write('\rsasa [step {0}]  '.format(
            universe.trajectory.frame))
        writer = MDAnalysis.Writer("tmp.pdb")
        writer.write(protein)
        writer.close()
        parser = bp.PDBParser()
        structure = parser.get_structure('tmp', 'tmp.pdb')
        dssp = bp.DSSP(structure[0], 'tmp.pdb', dssp_loc)
        for key in dssp.keys():
            if 0:
                resobj = dssp[key][0]
                resname = dssp[key][0].resname
                residx = resobj.id[1]
                chain = key[0]
                secondary_structure = resobj.xtra['SS_DSSP']
                rel_sasa = resobj.xtra['EXP_DSSP_RASA']
                abs_sasa = resobj.xtra['EXP_DSSP_ASA']
                phi = resobj.xtra['PHI_DSSP']
                psi = resobj.xtra['PSI_DSSP']
            resobj = dssp[key]
            resname = residue_codes_reverse[resobj[1]]
            residx = key[1][1]
            chain = key[0]
            secondary_structure = resobj[2]
            rel_sasa = resobj[3]
            abs_sasa = resobj[3] * dssp.residue_max_acc[resname]
            phi = resobj[4]
            psi = resobj[5]
            if residx in DSSP[chain] and DSSP[chain][residx][
                    'resname'] == resname:
                DSSP[chain][residx]['dssp'].append(secondary_structure)
                DSSP[chain][residx]['rel_sasa'].append(rel_sasa)
                DSSP[chain][residx]['abs_sasa'].append(abs_sasa)
                DSSP[chain][residx]['phi'].append(phi)
                DSSP[chain][residx]['psi'].append(psi)
                DSSP[chain][residx]['time'].append(ts.time)
            else:
                DSSP[chain][residx] = {
                    'dssp': [secondary_structure],
                    'phi': [phi],
                    'time': [ts.time],
                    'psi': [psi],
                    'rel_sasa': [rel_sasa],
                    'chain': chain,
                    'abs_sasa': [abs_sasa],
                    'resname': resname
                }
    return DSSP
示例#13
0
pdb_file = "{}/{}.pdb".format(PDB_HOME, pdb_id.lower())
pdbIO = PDB.PDBIO()
pdbIO.set_structure(structure[0])
pdbIO.save(pdb_file)

#------------------------------------------------------------------------------
# Get Surface residue
# https://biopython.org/wiki/The_Biopython_Structural_Bioinformatics_FAQ
# https://biopython.org/DIST/docs/api/Bio.PDB.DSSP%27-module.html
# Download and Install DSSP is required
# ftp://ftp.cmbi.ru.nl//pub/molbio/software/dssp-2/
#------------------------------------------------------------------------------
# Read PDB
#pdb_file = '/Users/jjeong/local/project_dev/ppi/codes/utils/1A2Y.pdb'

# load structure
pdbParser = PDB.PDBParser()
structure = pdbParser.get_structure(pdb_id, pdb_file)
model = structure[0]
#dssp = PDB.DSSP(model=model, in_file="/Users/jjeong/local/project_dev/ppi/outputs/pdb/6dm0.dssp", file_type='DSSP')
dssp = PDB.DSSP(model=model,
                in_file=pdb_file,
                dssp='mkdssp',
                acc_array='Sander',
                file_type='PDB')

#-- To see Max ACC
maxAcc = dssp.residue_max_acc

print(hsspProfile)
示例#14
0
import glob
from Bio import PDB

pdb_files = glob.iglob('all_pdbs/*')

file = open('casp11.sec', 'w')

c = 0
for pdb in pdb_files:
    c += 1
    print(c)

    p = PDB.PDBParser()
    structure = p.get_structure(pdb[:-4], pdb)
    model = structure[0]
    dssp = PDB.DSSP(model, pdb)

    seq = ''
    ss = ''
    for key in list(dssp.keys()):
        ss += dssp[key][2]
        seq += dssp[key][1]

    file.write('>{}\n'.format(pdb))
    file.write('{}\n'.format(seq))
    file.write('>{}\n'.format(pdb))
    file.write('{}\n'.format(ss))

file.close()