def __call__(self, code, pull_outcome):
        """Implement the output handler logic"""
        filename, outcome = pull_outcome
        if outcome == "Pull Successful":
            structure = BPDB.PDBParser().get_structure(code[0:4], filename)
            os.remove(filename)
            os.rmdir("/".join(filename.split("/")[:-1]))

            try:
                chain = BPDB.PPBuilder().build_peptides(
                    structure[0][code[4].upper()])[0]
            except:
                try:
                    chain = BPDB.PPBuilder().build_peptides(
                        next(structure[0].get_chains()))[0]
                except:
                    self.log.append(code + " >>> Pull Failed on Chain")
                    return

            try:
                self.data[code[0:5]] = self.__compose__(chain)
                self.log.append(code + " >>> " + outcome)
            except:
                self.log.append(code + " >>> Pull Failed on a Feature")

        else:
            self.log.append(code + " >>> " + outcome)
        pass
示例#2
0
def main():
    parser = PDB.PDBParser()
    structure = parser.get_structure('working_pdb', '2eke_optimized.bak')

    total_length = 0
    total_sequence = ''

    count = 0
    ppb = PDB.PPBuilder()
    for pp in ppb.build_peptides(structure):
        total_length += len(pp.get_sequence())
        total_sequence += pp.get_sequence()
        if count == 0:
            first_chain_length = total_length
        count += 1

    chain_letters = ''
    residue_numbers = []
    for chains in structure.get_chains():
        chain_letters += chains.get_id()
    for chains in structure.get_residues():
        residue_numbers.append(str(chains.get_id()[1]))

    for i in range(0, len(residue_numbers)):
        if int(residue_numbers[i]) > 156:
            mutant = total_sequence[i] + chain_letters[1] + residue_numbers[
                i] + 'a'
        else:
            continue

        runFoldxSimpleMutator(mutant, ['2eke_optimized.bak'])
def get_phi_psi(structure):
    """
    Calculate phi,psi dihedral angles and return lists.
    Uses the polypeptide class."""

    # Create a list of  polypeptide objects
    ppb = PDB.PPBuilder()
    pp_list = ppb.build_peptides(structure)

    # Get phi and psi angles
    phi_angles_list = []
    psi_angles_list = []

    # Iterate over polypeptide molecules
    for pp in pp_list:

        # Calculate phi and psi angles and unpack list and tuple
        Agg_phi = []
        Agg_psi = []

        for phi,psi in pp.get_phi_psi_list():

            # put them in the lists
            Agg_phi.append(phi)
            Agg_psi.append(psi)

        phi_angles_list.append(Agg_phi)
        psi_angles_list.append(Agg_psi)

    return phi_angles_list, psi_angles_list
示例#4
0
文件: utils.py 项目: gabcg/SciPuzzle
def chain_to_fasta(chain):
    """
    Extracts the fasta sequence from a PDB file and returns a string
    containing the extracted sequence.
    """
    ppb = pdb.PPBuilder()
    for pp in ppb.build_peptides(chain):
        return pp.get_sequence()
示例#5
0
def get_seq(name):
    from Bio import PDB
    parser = PDB.PDBParser()
    struct = parser.get_structure(name, name)
    ppd = PDB.PPBuilder()
    peptides = ppd.build_peptides(struct)
    seq = ''.join([str(pep.get_sequence()) for pep in peptides])
    return seq
def extract_pdb(path):
    """
    Fonction qui extrait une sequence d'un PDB
    """
    #utilise Biopython pour extraire une séquence d'un PDB
    structure = PDB.PDBParser().get_structure("test", path)
    peptide = PDB.PPBuilder().build_peptides(structure)
    for i, pep in enumerate(peptide):
        sequence = str(pep.get_sequence())
    return sequence
示例#7
0
def write_pdb_seq_to_file(pdb_file):
    '''IN: (path to) PDB-file with only one chain
    OUT: sequence of PDB-file (from actual structure, not header)'''
    struct = PDB.PDBParser().get_structure('current', pdb_file)
    assert len(list(struct.get_chains())) == 1, \
        'WARINING: There are more than one chains in structure %s. \
        \n It will be excluded from analysis.'                                               % struct.get_id()
    seq = ''
    for pp in PDB.PPBuilder().build_peptides(struct):
        seq += pp.get_sequence()
    return seq
示例#8
0
def pdb2fasta(pdbfilename):  # 将一个pdb文件转换为fasta序列
    new_filename = pdbfilename.replace(".pdb", "")

    parser = PDB.PDBParser()
    structure = parser.get_structure(new_filename, pdbfilename)
    ppb = PDB.PPBuilder()

    for pp in ppb.build_peptides(structure):
        ppstring = pp.get_sequence()
    # print(new_filename, "转换序列为:", ppstring)
    return ppstring
示例#9
0
def getSequencefromPDB(pdbfile, chain='C', index=0):
    """Get AA sequence from PDB"""
    parser = PDB.PDBParser(QUIET=True)
    struct = parser.get_structure(pdbfile, pdbfile)
    ppb = PDB.PPBuilder()
    model = struct[0]
    peptides = ppb.build_peptides(model[chain])
    seq = ''
    for i, pep in enumerate(peptides):
        seq += str(pep.get_sequence())
    return seq
示例#10
0
def calc_ramachandran(file_name_list):
    """
    Main calculation and plotting definition
    :param file_name_list: List of PDB files to plot
    :return: Nothing
    """
    if RAMA_PREF_VALUES is None:
        global RAMA_PREF_VALUES
        RAMA_PREF_VALUES = _cache_RAMA_PREF_VALUES()

    # Read in the expected torsion angles
    normals = {}
    outliers = {}
    for key, val in RAMA_PREFERENCES.items():
        normals[key] = {"x": [], "y": []}
        outliers[key] = {"x": [], "y": []}

    # Calculate the torsion angle of the inputs
    for inp in file_name_list:
        if not os.path.isfile(inp):
            continue
        structure = PDB.PDBParser().get_structure('input_structure', inp)
        for model in structure:
            for chain in model:
                polypeptides = PDB.PPBuilder().build_peptides(chain)
                for poly_index, poly in enumerate(polypeptides):
                    phi_psi = poly.get_phi_psi_list()
                    for res_index, residue in enumerate(poly):
                        res_name = "{}".format(residue.resname)
                        res_num = residue.id[1]
                        phi, psi = phi_psi[res_index]
                        if phi and psi:
                            if str(poly[res_index + 1].resname) == "PRO":
                                aa_type = "PRE-PRO"
                            elif res_name == "PRO":
                                aa_type = "PRO"
                            elif res_name == "GLY":
                                aa_type = "GLY"
                            else:
                                aa_type = "General"
                            if RAMA_PREF_VALUES[aa_type][int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < \
                                    RAMA_PREFERENCES[aa_type]["bounds"][1]:
                                outliers[aa_type]["x"].append(
                                    math.degrees(phi))
                                outliers[aa_type]["y"].append(
                                    math.degrees(psi))
                            else:
                                normals[aa_type]["x"].append(math.degrees(phi))
                                normals[aa_type]["y"].append(math.degrees(psi))
    return normals, outliers
示例#11
0
def get_chain_sequence(chain):
    """
    This function, given a chain object, returns a string with the sequence
	of the polypeptide or the nucleotide sequence of the chain.
    """
    sequence=""
    ppb = pdb.PPBuilder()
    for pp in ppb.build_peptides(chain):
        sequence=sequence+pp.get_sequence()

    if not sequence:
        for residue in chain.get_residues():
            res= residue.get_resname()[2]
            if res in 'ATGCU':
                sequence=sequence+res

    return sequence
示例#12
0
 def get_seq_from_tar(self, tar):
     for i in tar:
         try:
             stream = tar.extractfile('model1.pdb')
         except Exception as err:
             warn(str(err))
             stream = None
         else:
             if stream:  #io.BufferedReader
                 p = PDB.PDBParser().get_structure_from_stream(
                     'model', stream)
                 b = PDB.PPBuilder()
                 pp = b.build_peptides(p)[0]
                 tar.close()
                 return pp.get_sequence()
             else:
                 if self.verbose: print('IO Stream is None from tarball.')
示例#13
0
def SeqFromPDBCode(code):
    protein_pdb = DATADIR / code / (code + '_protein.pdb')
    pocket_pdb = DATADIR / code / (code + '_pocket.pdb')
    parser = PDB.PDBParser(QUIET=True)
    chain_id = None
    try:
        pocket = parser.get_structure(code, pocket_pdb)
        protein = parser.get_structure(code, protein_pdb)
    except:
        print('fail to read {}'.format(code))
        return None
    ppb = PDB.PPBuilder()
    seqs = []
    for chain in protein.get_chains():
        seqs.extend([i.get_sequence() for i in ppb.build_peptides(chain)])
        seq_str = ''.join([str(i) for i in seqs])
        a = seqs[0].alphabet
        return Seq(seq_str, a)
示例#14
0
def get_pdb_sequence(prefix):
    start_name = prefix + '.pdb'
    total_length = 0
    total_sequence = ''
    count = 0
    parser = PDB.PDBParser()

    structure = parser.get_structure('working_pdb', start_name)

    ppb = PDB.PPBuilder()
    for pp in ppb.build_peptides(structure):
        total_length += len(pp.get_sequence())
        total_sequence += pp.get_sequence()
        if count == 0:
            first_chain_length = total_length
        count += 1

    return (total_sequence, total_length, first_chain_length, structure)
def import_protein_structure(inputs, wt_protein_fasta_file):

    from Bio import PDB
    import gzip
    file_list = []
    for subdir, dirs, files in os.walk(inputs):
        for file in files:
            if file.endswith('.pdb') or file.endswith('.pdb.gz'):
                file_list.append(os.path.join(subdir, file))

    protein_sequences = {}

    for file in file_list:
        subdir = file.split('/')[-2]
        name = subdir + os.path.basename(file).split('.')[0]

        parser = PDB.PDBParser()
        if file.endswith('.gz'):
            pdb = gzip.open(file, 'r')
        else:
            pdb = open(file, 'r')
        io = PDB.PDBIO
        struct = parser.get_structure(name, pdb)
        ppb = PDB.PPBuilder()

        chains = []
        for pp in ppb.build_peptides(struct):
            chains.append(list(pp.get_sequence()))

        if wt_protein_fasta_file:
            wt_seq_list = \
            list(import_wt_protein_sequence(wt_protein_fasta_file))
            final_design_seq = combine_chains(chains[0], chains[1],
                                              wt_seq_list)
            protein_sequences[name] = "".join(final_design_seq)
        else:
            if len(chains) > 1:
                print "Warning: Multiple chains found. Splitting \
sequence into ", len(chains), " DNA sequences for ordering."
            for index, chain in enumerate(chains):
                protein_sequences[name + "_chain_" +
                                  str(index)] = "".join(chain)

    return protein_sequences
示例#16
0
    def get_sequence( self, chain_id ):
        '''
            Input:
                self: Use Biopython.PDB structure which has been stored in an object variable
                chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
                        depends on the specific protein and the resulting structure)
            Return:
                Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id)
                in a Biopython.PDB structure as a string.
        '''
        for model in self.structure:
            for chain in model:
                if chain.get_id()==chain_id:
                    print(dir(chain))
                    print(dir(model))
                    test=chain
        ppb=PDB.PPBuilder()
        for pp in ppb.build_peptides(test):
            sequence=pp.get_sequence()

        return sequence
示例#17
0
    def __get_length_and_resolution(self, file):
        """
        Determine resolution, sequence and length of .pdb file.
        :param file: pdb file path.
        :return: pandas series with resolution, sequence and length and .pdb filename.
        """
        parser = bp.PDBParser()
        ppb = bp.PPBuilder()
        structure = parser.get_structure(
            os.path.splitext(os.path.basename(file))[0], file)

        seq_len = 0

        for pp in ppb.build_peptides(
                structure
        ):  # Retrieve length by looping through each chain in the protein
            seq_len += len(pp.get_sequence())

        # using a functions from PDBParser parser class to get the resolution and protein id from the pdb file
        return pd.Series(
            [structure.header['resolution'], seq_len, structure.id])
示例#18
0
def SeqFromPDBCode(code):
    protein_pdb = DATADIR / code / (code + '_protein.pdb')
    pocket_pdb = DATADIR / code / (code + '_pocket.pdb')
    parser = PDB.PDBParser(QUIET=True)
    chain_id = None
    try:
        pocket = parser.get_structure(code, pocket_pdb)
        protein = parser.get_structure(code, protein_pdb)
    except:
        return None
    longest_chain = None
    for chain in pocket.get_chains():
        if chain.id == ' ': continue
        if longest_chain is None or len(chain) > len(longest_chain):
            longest_chain = chain
    if longest_chain is None:
        return None
    ppb = PDB.PPBuilder()
    for chain in protein.get_chains():
        if chain.id == longest_chain.id:
            seqs = [i.get_sequence() for i in ppb.build_peptides(chain)]
            seq_str = ''.join([str(i) for i in seqs])
            a = seqs[0].alphabet
            return Seq(seq_str, a)
示例#19
0
def plot_ramachandran(file):
    __pdb__=file

    """
    The preferences were calculated from the following artice:
    Lovell et al. Structure validation by Calpha geometry: phi,psi and Cbeta deviation. 2003
    DOI: 10.1002/prot.10286
    """

    # General variable for the background preferences
    rama_preferences = {
        "General": {
            "file": os.path.join('data',"rama500-general.data"),
            "cmap": colors.ListedColormap([]),
            "bounds": [0, 0.002, 0.02, 1],
        },
        "GLY": {
            "file": os.path.join('data',"rama500-gly-sym.data"),
            "cmap": colors.ListedColormap([]),
            "bounds": [0, 0.002, 0.02, 1],
        },
        "PRO": {
            "file": os.path.join('data',"rama500-pro.data"),
            "cmap": colors.ListedColormap(['#FFFFFF00', 'skyblue', 'deepskyblue']),
            "bounds": [0, 0.0005, 0.02, 1],
        },
        "PRE-PRO": {
            "file": os.path.join('data',"rama500-prepro.data"),
            "cmap": colors.ListedColormap(['#FFFFFF', '#FFE8C5', '#FFCC7F']),
            "bounds": [0, 0.002, 0.02, 1],
        }
    }
    
    r_path = os.path.abspath(os.path.dirname(__file__))#*
    rama_pref_values = {}
    for key, val in rama_preferences.items():
        rama_pref_values[key] = np.full((360, 360), 0, dtype=np.float64)
        with open(os.path.join(r_path, val["file"])) as fn:
              for line in fn:
                if not line.startswith("#"):
                    # Preference file has values for every second position only
                    rama_pref_values[key][int(float(line.split()[1])) + 180][int(float(line.split()[0])) + 180] = float(
                        line.split()[2])
                    rama_pref_values[key][int(float(line.split()[1])) + 179][int(float(line.split()[0])) + 179] = float(
                        line.split()[2])
                    rama_pref_values[key][int(float(line.split()[1])) + 179][int(float(line.split()[0])) + 180] = float(
                        line.split()[2])
                    rama_pref_values[key][int(float(line.split()[1])) + 180][int(float(line.split()[0])) + 179] = float(
                        line.split()[2])

    normals = {}
    outliers = {}
    for key, val in rama_preferences.items():
        normals[key] = {"x": [], "y": []}
        outliers[key] = {"x": [], "y": [],'Res':[]}

   # Calculate the torsion angle of the pdb file.
    structure = PDB.PDBParser().get_structure('input_structure', __pdb__)#pdb parsing biopython algorithm.
    for model in structure:
        for chain in model:
            polypeptides = PDB.PPBuilder().build_peptides(chain)
            for poly_index, poly in enumerate(polypeptides):
                phi_psi = poly.get_phi_psi_list()
                for res_index, residue in enumerate(poly):
                    res_name = "{}".format(residue.resname)
                    res_num = residue.id[1]
                    phi, psi = phi_psi[res_index]
                    if phi and psi:
                        aa_type = ""
                        if str(poly[res_index + 1].resname) == "General":
                            aa_type = "PRE-PRO"
                        elif res_name == "PRO":
                            aa_type = "General"
                        elif res_name == "GLY":
                            aa_type = "PRE-PRO"
                        else:
                            aa_type = "PRO"
                            bb_type = "General"
                            cc_type = "PRE-PRO"
                            dd_type = "PRO"
                        if rama_pref_values[aa_type][int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < \
                                rama_preferences[aa_type]["bounds"][1]
                            outliers[aa_type]["x"].append(math.degrees(phi))
                            outliers[aa_type]["y"].append(math.degrees(psi))
                            outliers[aa_type]['Res'].append(res_name+'_'+str(res_num))
                        else:
                            normals[aa_type]["x"].append(math.degrees(phi))
                            normals[aa_type]["y"].append(math.degrees(psi))
                            

    # Generate the plots
    plt.figure(figsize=(10,10))
    for idx, (key, val) in enumerate(sorted(rama_preferences.items(), key=lambda x: x[0].lower())):      
        plt.imshow(rama_pref_values[key], cmap=rama_preferences[key]["cmap"],
                   norm=colors.BoundaryNorm(rama_preferences[key]["bounds"], rama_preferences[key]["cmap"].N),
                   extent=(-180, 180, 180, -180),alpha=0.7)
        #markers for different aminoacides residues i,e GLY,General,PRO,PRE-PRO.
        plt.scatter(normals[aa_type]["x"], normals[aa_type]["y"],color="k",s=[10],marker='o')
        plt.scatter(normals[bb_type]["x"], normals[bb_type]["y"],color="k",s=[35],marker='^')
        plt.scatter(normals[cc_type]["x"], normals[cc_type]["y"],color="k",s=[35],marker='x')
        plt.scatter(normals[dd_type]["x"], normals[dd_type]["y"],color="k",s=[25],marker='+')
        plt.scatter(outliers[key]["x"], outliers[key]["y"],color="red",s=[15],marker=',')

        for key in outliers:
            for i, name in enumerate (outliers[key]['Res']):
                plt.annotate(name, (outliers[key]["x"][i], outliers[key]["y"][i]))

        
        plt.xlim([-180, 180])
        plt.ylim([-180, 180])
        ax = plt.gca()
        ax.set_xlim(-180, 180)
        ax.set_ylim(-180, 180)
        ax.set_xticks([-180, -135, -90, -45, 0, 45, 90, 135, 180], minor=False)# For renamining the plot x, y vlues.
        ax.set_yticks([-180, -135, -90, -45, 0, 45, 90, 135, 180], minor=False)
        plt.plot([-180, 180], [0, 0], linewidth=1,color="k",alpha=0.2)
        plt.plot([0, 0], [-180, 180], linewidth=1,color="k",alpha=0.2)
        plt.xlabel(r'$\phi$',fontsize=14,color="k",alpha=1)
        plt.ylabel(r'$\psi$',fontsize=14,color="k",alpha=1)
        plt.grid(linestyle='--',color="k",alpha=0.4)
        plt.title('Ramachandran Plot',fontsize=15,color="k",alpha=1,) # for plotting tittle of plot .
            
    A = mpatches.Patch(color='deepskyblue',lw=15)#good metho
    B = mpatches.Patch(color='skyblue',lw=15)
    C = mpatches.Patch(color='#FFCC7F',lw=15)
    D = mpatches.Patch(color='#FFE8C5',lw=15)
    E = mlines.Line2D([], [], color='red', marker='s',linestyle='None',
                          markersize=10)
    F = mlines.Line2D([], [], color='black', marker='o',linestyle='None',
                          markersize=7,label="  ")
    G = mlines.Line2D([], [], color='black', marker='^',linestyle='None',
                          markersize=7,label="General/Pre-Pro/Proline Allowed")   
    H = mlines.Line2D([], [], color='black', marker='^',linestyle='None',
                          markersize=7,label="General/Pre-Pro/Proline Favoured")
    I = mlines.Line2D([], [], color='black', marker='o',linestyle='None',
                          markersize=7,label="   ")
    J = mlines.Line2D([], [], color='black', marker='x',linestyle='None',
                          markersize=7,label="  ")
    k = mlines.Line2D([], [], color='black', marker='x',linestyle='None',
                          markersize=7)
    L = mlines.Line2D([], [], color='red', marker='',linestyle='None',
                          markersize=7,label=" ")
    M = mlines.Line2D([], [], color='black', marker='',linestyle='None',
                          markersize=7,label="Glycien Favoured")
    N = mlines.Line2D([], [], color='black', marker='',linestyle='None',
                          markersize=7,label="Glycien Allowed")
    o = mlines.Line2D([], [], color='black', marker='',linestyle='None',
                          markersize=7,label="Outliers")
    plt.legend(frameon=False,handles=[A,B,C,D,E,F,I,J,k,L,H,G,M,N,o],loc='upper left', labelspacing=2,fontsize=10,ncol=3,columnspacing=-2.8,bbox_to_anchor=(0.01, -0.06))
    #plt.savefig("asd.png", dpi=300) #Uncommet this line of you want so save the plot in a specific location   
    plt.show()
示例#20
0
def get_PDB_info(dir):
    """Extracts sequence, DSSP secondary structure, TMHMM secondary structure and contact information from PDB files in input directory"""

    #the three vectors you are required to fill.
    DSSP_vector, TMHMM_vector, oracle = [], [], []

    print("There are", len(os.listdir(dir)), "PDB files to parse")

    #Assemble a machine learning dataset incrementally, for each PDB file in the directory
    for ind, PDB_file in enumerate(os.listdir(dir)):
        if ind % 10 == 0:
            print("Working on structure", ind)

        if (str(PDB_file) == ".DS_Store"): continue
        # if(str(PDB_file) == "2dco.pdb"): break
        #Step 1 : parse your PDB file with biopython to obtain a model object
        p = PDB.PDBParser()
        structure = p.get_structure(PDB_file[:-4].upper(),
                                    dir + "/" + PDB_file)
        model = structure[0]

        #TODO : extract a list of residues from your model object
        residues = extract_residues(model)
        print("file", PDB_file, len(residues))
        # print("residue_size",len(residues))
        # if(len(residues) > 500): continue

        #TODO : compute a distance matrix of size len(sequence)*len(sequence) with the distance between each residue
        matrix = compute_distance_matrix(residues)
        # print("here")

        #TODO : contact map should be a boolean numpy array of the same size as the distance matrix.
        #if two amino acids are within 5 angstroms of each other in 3D, but distant of at least 10 in sequence, the table should have True, else False.

        contact_map = removeConsecutives(matrix)
        has_contact = [
            True if True in contact_map[residue] else False
            for residue in contact_map
        ]

        #TODO : contact info should return the proportion of residues that have an intramolecular contact in your object.
        contact_info = get_contact_numbers(contact_map)
        # print(contact_info,"contacts")

        # TODO : obtain the secondary structure prediction of the PDB model with DSSP
        dssp_info = get_dssp_info(PDB_file, model, dir)

        #TODO : obtain the sequence of the PDB file in some way of your choice.
        sequence = ""
        ppb = PDB.PPBuilder()
        for pp in ppb.build_peptides(structure):
            sequence += pp.get_sequence()

        dssp_ss = ""  #ss stands for secondary structure
        dssp_seq = ""

        dssp_keys = sorted(dssp_info.keys())
        for key in dssp_keys:
            curr_ss = dssp_info[key][2]
            dssp_ss += curr_ss
            dssp_seq += dssp_info[key][1]

        converted = convert_info(dssp_ss)
        # print(dssp_ss)
        #TODO : write the sequence to a fasta file to call TMHMM with it, or to use the webserver
        filename = write_fasta(sequence, PDB_file)

        #TODO : obtain secondary structure prediction for this FASTA file with TMHMM
        # run_tmhmm will now parse tmhmmm file

        # test_file = "6j20"

        tm_ss = run_tmhmm(filename, PDB_file)

        # if(len(sequence) != len(residues)): continue
        DSSP_vector, TMHMM_vector, oracle = generate_ML_dataset(
            sequence, converted, tm_ss, has_contact, DSSP_vector, TMHMM_vector,
            oracle)
        # DSSP_vector, TMHMM_vector, oracle = generate_ML_dataset(sequence,converted,has_contact,DSSP_vector, TMHMM_vector, oracle)
    return DSSP_vector, TMHMM_vector, oracle
示例#21
0
def ramachandran(file_name_list):
    """
    Main calculation and plotting definition
    :param file_name_list: List of PDB files to plot
    :return: Nothing
    """
    # General variable for the background preferences
    rama_preferences = {
        "General": {
            "file": "data/pref_general.data",
            "cmap": colors.ListedColormap(['#FFFFFF', '#B3E8FF', '#7FD9FF']),
            "bounds": [0, 0.0005, 0.02, 1],
        },
        "GLY": {
            "file": "data/pref_glycine.data",
            "cmap": colors.ListedColormap(['#FFFFFF', '#FFE8C5', '#FFCC7F']),
            "bounds": [0, 0.002, 0.02, 1],
        },
        "PRO": {
            "file": "data/pref_proline.data",
            "cmap": colors.ListedColormap(['#FFFFFF', '#D0FFC5', '#7FFF8C']),
            "bounds": [0, 0.002, 0.02, 1],
        },
        "PRE-PRO": {
            "file": "data/pref_preproline.data",
            "cmap": colors.ListedColormap(['#FFFFFF', '#B3E8FF', '#7FD9FF']),
            "bounds": [0, 0.002, 0.02, 1],
        }
    }

    # Read in the expected torsion angles
    __location__ = os.path.realpath(os.getcwd())
    rama_pref_values = {}
    for key, val in rama_preferences.items():
        rama_pref_values[key] = np.full((360, 360), 0, dtype=np.float64)
        with open(os.path.join(__location__, val["file"])) as fn:
            for line in fn:
                if not line.startswith("#"):
                    # Preference file has values for every second position only
                    rama_pref_values[key][int(float(line.split()[1])) +
                                          180][int(float(line.split()[0])) +
                                               180] = float(line.split()[2])
                    rama_pref_values[key][int(float(line.split()[1])) +
                                          179][int(float(line.split()[0])) +
                                               179] = float(line.split()[2])
                    rama_pref_values[key][int(float(line.split()[1])) +
                                          179][int(float(line.split()[0])) +
                                               180] = float(line.split()[2])
                    rama_pref_values[key][int(float(line.split()[1])) +
                                          180][int(float(line.split()[0])) +
                                               179] = float(line.split()[2])

    normals = {}
    outliers = {}
    for key, val in rama_preferences.items():
        normals[key] = {"x": [], "y": []}
        outliers[key] = {"x": [], "y": []}

    # Calculate the torsion angle of the inputs
    for inp in file_name_list:
        if not os.path.isfile(inp):
            print("{} not found!".format(inp))
            continue
        structure = PDB.PDBParser().get_structure('input_structure', inp)
        for model in structure:
            for chain in model:
                polypeptides = PDB.PPBuilder().build_peptides(chain)
                for poly_index, poly in enumerate(polypeptides):
                    phi_psi = poly.get_phi_psi_list()
                    for res_index, residue in enumerate(poly):
                        res_name = "{}".format(residue.resname)
                        res_num = residue.id[1]
                        phi, psi = phi_psi[res_index]
                        if phi and psi:
                            if str(poly[res_index + 1].resname) == "PRO":
                                aa_type = "PRE-PRO"
                            elif res_name == "PRO":
                                aa_type = "PRO"
                            elif res_name == "GLY":
                                aa_type = "GLY"
                            else:
                                aa_type = "General"
                            if rama_pref_values[aa_type][
                                    int(math.degrees(psi)) +
                                    180][int(math.degrees(phi)) +
                                         180] < rama_preferences[aa_type][
                                             "bounds"][1]:
                                print("{} {} {} {}{} is an outlier".format(
                                    inp, model, chain, res_name, res_num))
                                outliers[aa_type]["x"].append(
                                    math.degrees(phi))
                                outliers[aa_type]["y"].append(
                                    math.degrees(psi))
                            else:
                                normals[aa_type]["x"].append(math.degrees(phi))
                                normals[aa_type]["y"].append(math.degrees(psi))

    # Generate the plots
    for idx, (key, val) in enumerate(
            sorted(rama_preferences.items(), key=lambda x: x[0].lower())):
        plt.subplot(2, 2, idx + 1)
        plt.title(key)
        plt.imshow(rama_pref_values[key],
                   cmap=rama_preferences[key]["cmap"],
                   norm=colors.BoundaryNorm(rama_preferences[key]["bounds"],
                                            rama_preferences[key]["cmap"].N),
                   extent=(-180, 180, 180, -180))
        plt.scatter(normals[key]["x"], normals[key]["y"])
        plt.scatter(outliers[key]["x"], outliers[key]["y"], color="red")
        plt.xlim([-180, 180])
        plt.ylim([-180, 180])
        plt.plot([-180, 180], [0, 0], color="black")
        plt.plot([0, 0], [-180, 180], color="black")
        plt.locator_params(axis='x', nbins=7)
        plt.xlabel(r'$\phi$')
        plt.ylabel(r'$\psi$')
        plt.grid()

    plt.tight_layout()
    plt.savefig("{0}.png".format(
        file_name_list[0][:int(len(file_name_list) - 4)]),
                dpi=300)
    plt.show()
示例#22
0
def prepare_top_dihedrals(top):
    from Bio import PDB
    import math

    structure = PDB.PDBParser().get_structure('input_structure', top)

    phi_gen = []
    psi_gen = []
    phi_pre = []
    psi_pre = []
    phi_pro = []
    psi_pro = []
    phi_gly = []
    psi_gly = []

    for model in structure:
        for chain in model:
            polypeptides = PDB.PPBuilder().build_peptides(chain)
            for poly_index, poly in enumerate(polypeptides):
                phi_psi = poly.get_phi_psi_list()
                for res_index, residue in enumerate(poly):
                    res_name = "{}".format(residue.resname)
                    phi, psi = phi_psi[res_index]
                    if phi and psi:
                        if str(poly[res_index + 1].resname) == "PRO":
                            phi_pre.append(math.degrees(phi))
                            psi_pre.append(math.degrees(psi))
                        elif res_name == "PRO":
                            phi_pro.append(math.degrees(phi))
                            psi_pro.append(math.degrees(psi))
                        elif res_name == "GLY":
                            phi_gly.append(math.degrees(phi))
                            psi_gly.append(math.degrees(psi))
                        else:
                            phi_gen.append(math.degrees(phi))
                            psi_gen.append(math.degrees(psi))

    return phi_gen, psi_gen, phi_pro, psi_pro, phi_gly, psi_gly, phi_pre, psi_pre

    #Previous attempt but for some reason MDanalysis calculates torsion angles wrong when the .pdb file
    #is too long or maybe if a chain is discontinous. I have not seen a pattern in this.
    #I switched to the dihedral method that was used in PYRAMA which seems to be a better solution.
    #I still wanted to leave this here in case I can fix this, which means that there's one import less
    '''
    from MDAnalysis.analysis.dihedrals import Ramachandran
    r_general = u.select_atoms("backbone and segid B and resname VAL PHE ALA LYS ARG CYS GLU LEU MET HIS TYR TRP SER ASN GLN THR ASP ILE")
    r_pro = u.select_atoms("resname PRO")
    r_gly = u.select_atoms("resname GLY")
    R_general = Ramachandran(r_general).run()
    R_pro = Ramachandran(r_pro).run()
    R_gly = Ramachandran(r_gly).run()

    for atom in u.select_atoms("backbone"):
        print(atom)

    phi_general = []
    psi_general = []

    for line in list(R_general.angles):
        for entry in line:
            splitted_entry = entry.tolist()
            phi_general.append(splitted_entry[0])
            psi_general.append(splitted_entry[1])

    phi_pro = []
    psi_pro = []

    for line in list(R_pro.angles):
        for entry in line:
            splitted_entry = entry.tolist()
            phi_pro.append(splitted_entry[0])
            psi_pro.append(splitted_entry[1])

    phi_gly = []
    psi_gly = []

    for line in list(R_gly.angles):
        for entry in line:
            splitted_entry = entry.tolist()
            phi_gly.append(splitted_entry[0])
            psi_gly.append(splitted_entry[1])
    '''
    '''
示例#23
0
from .IonComplex import IonComplex
from ..PolyIon import Peptide
from ..Ion import fixed_state

import tempfile
from string import ascii_uppercase
from Bio import PDB

lister = PDB.PDBList(obsolete_pdb='override')
parser = PDB.PDBParser()
builder = PDB.PPBuilder()


@fixed_state
class Protein(IonComplex):
    """Protein represents an ion composed of a complex of peptides.

    :param name: Name of the protein.
    :param ids: Names of the peptide members.
    :param sequences: Sequences of the peptide members.
    :param members: An iterable of the peptide members.

    If members and sequences are not provided, the name will be searched in the
    Protein DataBase (PDB). If a protein of the same name is available, the
    sequences of the peptides will be gathered from the PDB.
    """

    _state = {
        'name': 'Protein name.',
        'members': 'Name of the peptide members.'
    }
示例#24
0
data = []  #initializes a list called data
for row in datareader:
    data.append(row)  #adds an element to data for each row in structures.csv

#parses csv data using PDB_info class
pdb_info = [PDB_info(item) for item in data]

for i in range(1, len(pdb_info)):
    #assigns variable names to pdb_info elements
    pdb_name = pdb_info[i].id  #saves given pdb name as a variable
    protein_name = pdb_info[i].protein  #saves given protein name as a variable
    complete = pdb_info[i].complete  #saves yes or no for complete
    structure_conf = pdb_info[
        i].conformation  #saves active or inactive for conformation
    mutation = pdb_info[i].mutation
    ppb = pdb.PPBuilder()  #peptide class to get sequence
    last = 10000

    #gives location of the pdb file
    pdb_file = './PDBs/' + pdb_name + '.pdb'
    parser = pdb.PDBParser()
    struct = parser.get_structure("name",
                                  pdb_file)  #read in pdb file using PDBParser

    #gets name of the structure file
    if structure_conf == 'active':
        if complete == 'yes':
            structure_file = './actives/complete/' + protein_name + '_active.pdb'
        else:
            structure_file = './actives/incomplete/' + protein_name + '_active.pdb'
    else:
示例#25
0
from Bio import PDB

parser = PDB.PDBParser()
structure = parser.get_structure('2FH7', '2FH7.pdb')
ppb = PDB.PPBuilder()

for pp in ppb.build_peptides(structure):
    print(pp.get_sequence())

model = structure[0]
for pp in ppb.build_peptides(model):
    print(pp.get_sequence())
def getAminoAcids(structure):
    ppb = PDB.PPBuilder()
    sequence = ""
    for pp in ppb.build_peptides(structure):
        sequence += str(pp.get_sequence())
    return list(sequence)
示例#27
0
normals = {}
outliers = {}
for key, val in rama_preferences.items():
    normals[key] = {"x": [], "y": []}
    outliers[key] = {"x": [], "y": []}

# Calculate the torsion angle of the inputs
for inp in sys.argv[1:]:
    if not os.path.isfile(inp):
        print("{} not found!".format(inp))
        continue
    structure = PDB.PDBParser().get_structure('input_structure', inp)
    for model in structure:
        for chain in model:
            polypeptides = PDB.PPBuilder().build_peptides(chain)
            for poly_index, poly in enumerate(polypeptides):
                phi_psi = poly.get_phi_psi_list()
                for res_index, residue in enumerate(poly):
                    res_name = "{}".format(residue.resname)
                    res_num = residue.id[1]
                    phi, psi = phi_psi[res_index]
                    if phi and psi:
                        aa_type = ""
                        if str(poly[res_index + 1].resname) == "PRO":
                            aa_type = "PRE-PRO"
                        elif res_name == "PRO":
                            aa_type = "PRO"
                        elif res_name == "GLY":
                            aa_type = "GLY"
                        else: