def check(self, structure, filename, file_format, obsolete=False, pdir=None): with self.make_temp_directory(os.getcwd()) as tmp: pdblist = PDBList(pdb=tmp, obsolete_pdb=os.path.join(tmp, "obsolete")) path = os.path.join(tmp, filename) if pdir: pdir = os.path.join(tmp, pdir) pdblist.retrieve_pdb_file(structure, obsolete=obsolete, pdir=pdir, file_format=file_format) self.assertTrue(os.path.isfile(path)) os.remove(path)
def download_PDB_file(): fichiers = os.listdir("balibase/RV11.unaligned") for file in fichiers: records = saveFASTA("balibase/RV11.unaligned/" + file) ids = [] for record in records: ids.append(record.id.split("_")[0]) for i in ids: pdbl = PDBList() pdbl.retrieve_pdb_file(i, pdir="PDB")
from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBList import PDBList pdbl = PDBList() parser = PDBParser() for i in ["ID"]: pdbl.retrieve_pdb_file(pdb_code=i,file_format="pdb",pdir="./") structure_id = i filename = "pdb"+i.lower()+".ent" structure = parser.get_structure(structure_id, filename) print("id: ",structure_id) print("name: ", structure.header["name"]) print("deposition date :", structure.header["deposition_date"]) print("release date :", structure.header["release_date"]) print("structure method : ", structure.header["structure_method"]) print("resolution : ", structure.header["resolution"]) print("")
from Bio.PDB.PDBList import PDBList pdbl = PDBList() pdbl.retrieve_pdb_file("6WO1", file_format="mmtf", pdir="/home/koreanraichu/") # 확장자를 따로 입력하지 않으면 CIF파일로 다운르드 된다. # file_format="확장자"로 입력하면 특정 파일 형식으로 받을 수 있다. # pdir="경로"를 입력하면 다운로드 경로도 정할 수 있다.
""" prots,enzsites = findRestr("orf_coding_all.fasta") print("Non restrictive proteins : ",prots) for enz in enzsites: print("ID ",enz[0]) print("EcoRI ",enz[1][0]) print("XhoI ",enz[1][1]) print("TaqI ",enz[1][2])""" #######################################################################################################"" from Bio.PDB.MMCIFParser import MMCIFParser from Bio.PDB.PDBList import PDBList from Bio.PDB.MMCIF2Dict import MMCIF2Dict pdbl = PDBList() pdbl.retrieve_pdb_file("2GAA") def readPDBFile(filename): mmcif_dict = MMCIF2Dict(filename) nbchains, nbres, nbatoms, res = mmcif_dict[ '_struct_sheet.number_strands'], mmcif_dict[ '_struct_site.pdbx_num_residues'], mmcif_dict[ '_refine_hist.number_atoms_total'], mmcif_dict['_exptl.method'] return sum([int(nbchains[i]) for i in range(len(nbchains))]), nbres, nbatoms, res print(readPDBFile("ga/2gaa.cif"))
from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBList import PDBList pdbl = PDBList() for vrstica in open('./structures lists/new structures.txt'): structure_id = vrstica.strip('\n') pdbl.retrieve_pdb_file(structure_id, file_format='pdb', pdir='pdb structures')
from Bio.PDB.PDBList import PDBList pdblist = PDBList() pdblist.retrieve_pdb_file( "127d") # downloads structure 127D in PDBx/mmCif format pdblist.retrieve_pdb_file( "127d", file_format="pdb") # downloads structure 127D in PDB format pdblist.retrieve_pdb_file( "127d", file_format="xml") # downloads structure 127D in PDBML/XML format pdblist.retrieve_pdb_file( "127d", file_format="mmtf") # downloads structure 127D in mmtf format pdblist.retrieve_pdb_file( "3k1q", file_format="bundle") # downloads large structure 3K1Q in pdb-like bundle pdblist.retrieve_pdb_file( "347d", obsolete=True) # downloads obsolete structure 347D in PDBx/mmCif format pdblist.download_pdb_files( "1esy", "127D") # downloads structures 127D and 1ESY in PDBx/mmCif format pdblist.download_entire_pdb( ) # downloads entire PDB database in PDBx/mmCif format pdblist.update_pdb() # performs weekle update of the database
def DoRetrievePDBFile(aPDB_Code, aFolder): global USE_ALT_PDB_SERVER done = False errors_before_quit = 20 seconds_between_retries = 30 fetchedfile = "" alt_server = "http://www.rcsb.org/pdb/files/" while done == False: pdblist = None if USE_ALT_PDB_SERVER: pdblist = PDBList(server=alt_server) else: pdblist = PDBList() #pdblist = PDBList( server='ftp://ftp.wwpdb.org') #server = 'ftp://ftp.rcsb.org' #server = "ftp.ebi.ac.uk/pub/databases/pdb/" try: #http://biopython.org/DIST/docs/api/Bio.PDB.PDBList%27-pysrc.html #fetchedfile = pdblist.retrieve_pdb_file( pdb_code=aPDB_Code, pdir=aFolder, file_format="pdb", obsolete=False) fetchedfile = pdblist.retrieve_pdb_file(pdb_code=aPDB_Code, pdir=aFolder, file_format="pdb", obsolete=False) done = True if fetchedfile and len(fetchedfile) and ( fetchedfile.find(".ent") > 0 or fetchedfile.find(".pdb") > 0): #print "Structure fetched, PDB code: " + aPDB_Code print "INFO: Structure " + aPDB_Code + " fetched. [OK]" #io = PDBIO() #io.set_structure( s) #io.save( filename) else: print "WARNING: Fetch failed [FAIL]" except IOError as ex: sys.stderr.write( "WARNING: Could not download structure {0}. An exception of type {1} occured.\n Arguments: {2!r}\n" .format(aPDB_Code, type(ex).__name__, ex.args)) sys.stderr.write("INFO: Retrying connection in %i seconds...\n" % seconds_between_retries) for a in ex.args: #Downloading too many structures too fast? if str(a).lower().find("too many") >= 0: seconds_between_retries += 10 break if str(a).lower().find("No such file") >= 0: #No need to retry return fetchedfile if str(a).lower().find("did not properly respond") >= 0: #No need to retry sys.stderr.write( "INFO: Switching download thread to alternative server '%s'.\n" % alt_server) USE_ALT_PDB_SERVER = True time.sleep(seconds_between_retries) done = False errors_before_quit -= 1 if errors_before_quit <= 0: sys.stderr.write("ERROR: Failed too many times. Quitting...\n") break return fetchedfile
def generate_structural_statistics(jobId, dom, pdb_code, selchain, uploaded_str, modeled_str=False, savequeue="jobinfo"): try: tdata = TripleMapping.objects.get(pk=jobId) except (KeyError, TripleMapping.DoesNotExist): return "str stats gen error!" threeList = [ "ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS", "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", "TRP", "TYR" ] if uploaded_str == False: pdbl = PDBList() pdbl.retrieve_pdb_file(pdb_code, pdir='./PDB', file_format="pdb") pdb_filename = "./PDB/pdb" + pdb_code.lower() + ".ent" else: if modeled_str == False: pdb_filename = "./PDB/" + jobId + "___" + pdb_code else: pdb_filename = "./PDB/model/" + pdb_code pdbsequencefull = [] pdbsequencenum = [] structure = Bio.PDB.PDBParser().get_structure(pdb_code, pdb_filename) model = structure[0] dssp = DSSP(model, pdb_filename, dssp='mkdssp', acc_array="Wilke") for chain in model: if chain.id == selchain: for residue in chain: if Bio.PDB.Polypeptide.is_aa(residue) == True: number = residue.get_id() try: num = str(number[1]) + str((number[2].rstrip())[0]) except IndexError: num = str(number[1]) pdbsequencenum.append(residue.get_resname() + num) #new_id = (" ", residue.get_id()[1], residue.get_id()[2]) pdbsequencefull.append(residue.get_id()) pdbsequencenum = pdbsequencenum[1:-1] pdbsequencefull = pdbsequencefull[1:-1] dssp_info = [] for i in range(0, len(pdbsequencenum)): chain_res = pdbsequencenum[i] residue_key = pdbsequencefull[i] if (chain_res[0:3] in threeList): dssp_res = dssp[selchain, residue_key] dssp_info.append({ "name": chain_res, "sec": str(dssp_res[2]), "phi": str(dssp_res[4]), "psi": str(dssp_res[5]), "depth": str(dssp_res[3]) }) pdb_coded = pdb_code if (modeled_str == True): pdb_coded = pdb_code.split("_")[3] + "_" + pdb_code.split("_")[4] full_dssp_info = {"_".join([dom, pdb_coded, selchain]): dssp_info} prev_dsspinfo = getattr(tdata, "dsspinfo") if prev_dsspinfo: prev_dsspinfo = prev_dsspinfo.split("]}]")[0] + "]}," else: prev_dsspinfo = "[" setattr(tdata, "dsspinfo", prev_dsspinfo + str(full_dssp_info) + "]") tdata.save() # run ring software and obtain results process = Popen([ "./bin/Ring", "-i", pdb_filename, "-c", selchain, "-N", "./jobs/nodes/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain + ".nds", "-E", "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain + ".eds", "-g", "1" ], stdout=PIPE) (output, err) = process.communicate() exit_code = process.wait() # read ring software results and generate json objects f1 = open( "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain + ".eds", "r+") lines = f1.readlines() G = nx.MultiGraph() G2 = nx.Graph() pairs = [] singlegraph = {} for l in range(1, len(lines)): line = lines[l] res1 = line.split()[0].split(":")[-1] + line.split()[0].split(":")[1] res2 = line.split()[2].split(":")[-1] + line.split()[2].split(":")[1] order_pair = sorted([res1, res2]) interaction = line.split()[1] energy = float(line.split()[5]) if "NLA" not in res1 and "NLA" not in res1: G.add_edge(res1, res2, weight=energy, itype=interaction) if order_pair not in pairs: singlegraph["".join(order_pair)] = (res1, res2, energy) pairs.append(order_pair) else: new_energy = singlegraph["".join(order_pair)][2] + energy singlegraph["".join(order_pair)] = (res1, res2, new_energy) G2.add_weighted_edges_from(singlegraph.values()) g_distance_dict1 = {(e1, e2, w): 1 / w for e1, e2, w in G.edges(data='weight')} nx.set_edge_attributes(G, g_distance_dict1, 'distance') g_distance_dict = {(e1, e2): 1 / weight for e1, e2, weight in G2.edges(data='weight')} nx.set_edge_attributes(G2, g_distance_dict, 'distance') graph_stats = [] weighted_degree = G.degree(weight='weight') between = nx.betweenness_centrality(G2, weight='weight') closeness = nx.closeness_centrality(G, distance='distance') mutstats = None for k in between: graph_stats.append({ "res": k, "betweeness": between[k], "closeness": closeness[k], "wdegree": weighted_degree[k] }) if (modeled_str == True and k == pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]): mutstats = { "res": k, "betweeness": between[k], "closeness": closeness[k], "wdegree": weighted_degree[k] } graph_stats_full = {"_".join([dom, pdb_coded, selchain]): graph_stats} if (modeled_str == False): prev_gstats = getattr(tdata, "graph_stats") if prev_gstats: prev_gstats = prev_gstats.replace("];", ", ") #.split("}}]")[0] + "}}," else: prev_gstats = "[" setattr(tdata, "graph_stats", prev_gstats + str(graph_stats_full) + "];") tdata.save() else: prev_muts = getattr(tdata, "mut_stats") if prev_muts: prev_muts = prev_muts.replace("]", " , ") else: prev_muts = "[" setattr(tdata, "mut_stats", prev_muts + str(mutstats) + "]") tdata.save() #pass #print(graph_stats) #quit() datag = json_graph.node_link_data(G) s = json.dumps(datag) datag_full = {"_".join([dom, pdb_coded, selchain]): s} if (modeled_str == False): prev_datag = getattr(tdata, "graph_json") if prev_datag: prev_datag = prev_datag.replace("];", ", ") #.split("]}]")[0] + "]}," else: prev_datag = "[" setattr(tdata, "graph_json", prev_datag + str(datag_full) + "];") tdata.save() else: mNode = { "id": pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0] } #nodesAt5 = [x for x,y in G.nodes(data=True) if y['id']== pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]] #mNode = pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0] #newedges = [(u,v,d) for u,v,d in G.edges(data = True) if ((u['id'] == mNode) or (v['id'] == mNode))] nodesAt5 = [x for x in G.nodes() if x == mNode] #H = nx.MultiGraph() #H.add_edges_from(newedges) H = G.subgraph(nodesAt5) datam = json_graph.node_link_data(H) sm = json.dumps(datam) datam_full = { "_".join([ dom, pdb_coded, pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0] ]): s } prevdatam = getattr(tdata, "mut_json") if prevdatam: prevdatam = prevdatam.replace("];end;", ", ") else: prevdatam = "[" setattr(tdata, "mut_json", prevdatam + str(datam_full) + "];end;") tdata.save() pass f1.close() #save objects into attributes jobs = getattr(tdata, savequeue) print("jobs") print(jobs) job_this = dom + "_" + pdb_code + "_" + selchain if (modeled_str == True): job_this = dom + "_" + pdb_code.split("_")[3] + "_" + pdb_code.split( "_")[4] + "_" + pdb_code.split("_")[6].split( ".")[0] + "_" + pdb_code.split("_")[5] print("job_this") print(job_this) new_jobs = [] for job in jobs.split(","): if job_this in job: new_job = "_".join(job.split("_")[:-1]) + "_done" new_jobs.append(new_job) else: new_jobs.append(job) print("new_jobs") print(new_jobs) setattr(tdata, savequeue, ",".join(new_jobs)) tdata.save() return "str stats gen!"