def __init__(self, table, pdb): table = table.reset_index(drop=True) struct = PDBParser().get_structure(table['pdb_id'][0], pdb) table = table.fillna('') alpha_num = sum([1 for x in table['tcr_v_allele'].tolist() if x.find('TRA') != -1]) beta_num = table.shape[0] - alpha_num table.insert(table.columns.get_loc('tcr_chain'), 'tcr_chain_name', ['alpha'] * alpha_num + ['beta'] * beta_num) print table self.__table = table self.__name = str(struct.get_id()) self.__struct = struct self.__chains = [chain.get_id() for chain in struct[0]] self.__regions = table.groupby(['tcr_chain_name', 'tcr_region']) # Dictionary of regions residues; # looks like : { ('alpha', 'CDR1') : [residue list], # ('alpha', 'CDR2') : [residue list], ... } self.__regions_res = self.__regions.groups for key in self.__regions_res.keys(): self.__regions_res[key] = [] # Pepdide residue list self.__peptide = [] # Dictionaries with pairwise region matrices; # look like : { (('alpha', 'CDR1'), ('alpha', 'CDR2')) : dataframe, # (('alpha', 'CDR1'), ('alpha', 'CDR3')) : dataframe, ... } self.__d_matrices = {} self.__e_matrices = {} self.verbose = True if not self.getRegionsResidues(): print 'SOME REGION WAS NOT FOUND IN PDB' if not self.definePeptideChain(): print 'PEPTIDE WAS NOT FOUND IN PDB'
def download_and_get_chains(): from Bio.PDB import PDBParser, PDBIO failed = [] pdbs_dict = read_rostdb_entries() io = PDBIO() pdbl = PDBList() for pdb_e, chains in pdbs_dict.items(): for chain_e in chains: try: pdbl.retrieve_pdb_file(pdb_e, pdir='./') pdb = PDBParser().get_structure(pdb_e, 'pdb'+pdb_e.lower()+'.ent') for chain in pdb.get_chains(): if chain.get_id() == chain_e: io.set_structure(chain) io.save(pdb.get_id() + '_' + chain.get_id() + '.pdb') except: failed.append((pdb_e, chain_e)) print("failures:", failed)
def download_and_get_chains(): from Bio.PDB import PDBParser, PDBIO failed = [] pdbs_dict = read_rostdb_entries() io = PDBIO() pdbl = PDBList() for pdb_e, chains in pdbs_dict.items(): for chain_e in chains: try: pdbl.retrieve_pdb_file(pdb_e, pdir='./') pdb = PDBParser().get_structure(pdb_e, 'pdb' + pdb_e.lower() + '.ent') for chain in pdb.get_chains(): if chain.get_id() == chain_e: io.set_structure(chain) io.save(pdb.get_id() + '_' + chain.get_id() + '.pdb') except: failed.append((pdb_e, chain_e)) print("failures:", failed)
# p = Pool(20) parts = len(list_result_n) // 20 for i in tqdm(range(parts)): p.map(download, list_result_n[i * 20:i * 20 + 20]) p.map(download, list_result_n[parts * 20:]) result_chains = {} for i in result: for i1 in result[i]: struc = i1.split(";")[0] chain = i1.split(";")[1].strip() if struc in result_chains: result_chains[struc].append(chain) else: result_chains[struc] = [chain] print(result_chains) for i in tqdm(os.listdir("pdb_m")): io = PDBIO() pdb = PDBParser().get_structure(i, "pdb_m/%s" % i) for chain in pdb.get_chains(): if chain.get_id() in result_chains[i.split("_")[1]]: io.set_structure(chain) io.save("pdb_chain/" + pdb.get_id() + "_" + chain.get_id() + ".pdb")
from Bio.PDB import PDBParser from Bio.PDB import PPBuilder from Bio.PDB import Polypeptide item = '2bnr' structure = PDBParser().get_structure(item, '../pdbs/'+item+'.pdb') ppb=PPBuilder() peps = ppb.build_peptides(structure) print structure.get_id() print peps[0] #print peps[0][1:-3] print peps[0][3:9] p = peps[0][3:9] print peps[0][1].get_resname()
from Bio.PDB import PDBParser, PDBIO from Bio.PDB.Polypeptide import is_aa, three_to_one import sys path = sys.argv[1] code = path[:-4] io = PDBIO() pdb = PDBParser().get_structure(code, path) for chain in pdb.get_chains(): io.set_structure(chain) io.save(pdb.get_id() + "_" + chain.get_id() + ".pdb") seq = list() out = open(code + "_" + chain.get_id() + '.fasta', 'w') for residue in chain: if is_aa(residue.get_resname(), standard=True): seq.append(three_to_one(residue.get_resname())) else: seq.append("X") ## This line is used to display the sequence from each chain print(">Chain_" + chain.get_id() + "\n" + str("".join(seq)), file=out) out.close()