Пример #1
0
 def check(self, structure, filename, file_format, obsolete=False, pdir=None):
     with self.make_temp_directory(os.getcwd()) as tmp:
         pdblist = PDBList(pdb=tmp, obsolete_pdb=os.path.join(tmp, "obsolete"))
         path = os.path.join(tmp, filename)
         if pdir:
             pdir = os.path.join(tmp, pdir)
         pdblist.retrieve_pdb_file(structure, obsolete=obsolete, pdir=pdir, file_format=file_format)
         self.assertTrue(os.path.isfile(path))
         os.remove(path)
Пример #2
0
 def check(self, structure, filename, file_format, obsolete=False, pdir=None):
     with self.make_temp_directory(os.getcwd()) as tmp:
         pdblist = PDBList(pdb=tmp, obsolete_pdb=os.path.join(tmp, "obsolete"))
         path = os.path.join(tmp, filename)
         if pdir:
             pdir = os.path.join(tmp, pdir)
         pdblist.retrieve_pdb_file(structure, obsolete=obsolete, pdir=pdir, file_format=file_format)
         self.assertTrue(os.path.isfile(path))
         os.remove(path)
Пример #3
0
def download_PDB_file():
    fichiers = os.listdir("balibase/RV11.unaligned")
    for file in fichiers:
        records = saveFASTA("balibase/RV11.unaligned/" + file)
        ids = []
        for record in records:
            ids.append(record.id.split("_")[0])
        for i in ids:
            pdbl = PDBList()
            pdbl.retrieve_pdb_file(i, pdir="PDB")
Пример #4
0
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBList import PDBList

pdbl = PDBList()
parser = PDBParser()

for i in ["ID"]:
    pdbl.retrieve_pdb_file(pdb_code=i,file_format="pdb",pdir="./")
    structure_id = i
    filename = "pdb"+i.lower()+".ent"
    structure = parser.get_structure(structure_id, filename)
    print("id: ",structure_id)
    print("name: ", structure.header["name"])
    print("deposition date :", structure.header["deposition_date"])
    print("release date :", structure.header["release_date"])
    print("structure method : ", structure.header["structure_method"])
    print("resolution : ", structure.header["resolution"]) 
    print("")
from Bio.PDB.PDBList import PDBList
pdbl = PDBList()
pdbl.retrieve_pdb_file("6WO1", file_format="mmtf", pdir="/home/koreanraichu/")
# 확장자를 따로 입력하지 않으면 CIF파일로 다운르드 된다.
# file_format="확장자"로 입력하면 특정 파일 형식으로 받을 수 있다.
# pdir="경로"를 입력하면 다운로드 경로도 정할 수 있다.
Пример #6
0
"""
prots,enzsites = findRestr("orf_coding_all.fasta")
print("Non restrictive proteins : ",prots)
for enz in enzsites:
    print("ID ",enz[0])
    print("EcoRI ",enz[1][0])
    print("XhoI ",enz[1][1])
    print("TaqI ",enz[1][2])"""

#######################################################################################################""
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB.PDBList import PDBList
from Bio.PDB.MMCIF2Dict import MMCIF2Dict

pdbl = PDBList()
pdbl.retrieve_pdb_file("2GAA")


def readPDBFile(filename):
    mmcif_dict = MMCIF2Dict(filename)
    nbchains, nbres, nbatoms, res = mmcif_dict[
        '_struct_sheet.number_strands'], mmcif_dict[
            '_struct_site.pdbx_num_residues'], mmcif_dict[
                '_refine_hist.number_atoms_total'], mmcif_dict['_exptl.method']
    return sum([int(nbchains[i])
                for i in range(len(nbchains))]), nbres, nbatoms, res


print(readPDBFile("ga/2gaa.cif"))
Пример #7
0
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBList import PDBList

pdbl = PDBList()

for vrstica in open('./structures lists/new structures.txt'):
    structure_id = vrstica.strip('\n')
    pdbl.retrieve_pdb_file(structure_id,
                           file_format='pdb',
                           pdir='pdb structures')
Пример #8
0
from Bio.PDB.PDBList import PDBList

pdblist = PDBList()
pdblist.retrieve_pdb_file(
    "127d")  # downloads structure 127D in PDBx/mmCif format
pdblist.retrieve_pdb_file(
    "127d", file_format="pdb")  # downloads structure 127D in PDB format
pdblist.retrieve_pdb_file(
    "127d", file_format="xml")  # downloads structure 127D in PDBML/XML format
pdblist.retrieve_pdb_file(
    "127d", file_format="mmtf")  # downloads structure 127D in mmtf format
pdblist.retrieve_pdb_file(
    "3k1q",
    file_format="bundle")  # downloads large structure 3K1Q in pdb-like bundle
pdblist.retrieve_pdb_file(
    "347d",
    obsolete=True)  # downloads obsolete structure 347D in PDBx/mmCif format
pdblist.download_pdb_files(
    "1esy", "127D")  # downloads structures 127D and 1ESY in PDBx/mmCif format
pdblist.download_entire_pdb(
)  # downloads entire PDB database in PDBx/mmCif format
pdblist.update_pdb()  # performs weekle update of the database
Пример #9
0
def DoRetrievePDBFile(aPDB_Code, aFolder):

    global USE_ALT_PDB_SERVER

    done = False
    errors_before_quit = 20
    seconds_between_retries = 30
    fetchedfile = ""
    alt_server = "http://www.rcsb.org/pdb/files/"

    while done == False:

        pdblist = None
        if USE_ALT_PDB_SERVER: pdblist = PDBList(server=alt_server)
        else: pdblist = PDBList()
        #pdblist = PDBList( server='ftp://ftp.wwpdb.org')
        #server = 'ftp://ftp.rcsb.org'
        #server = "ftp.ebi.ac.uk/pub/databases/pdb/"

        try:
            #http://biopython.org/DIST/docs/api/Bio.PDB.PDBList%27-pysrc.html
            #fetchedfile = pdblist.retrieve_pdb_file( pdb_code=aPDB_Code, pdir=aFolder, file_format="pdb", obsolete=False)
            fetchedfile = pdblist.retrieve_pdb_file(pdb_code=aPDB_Code,
                                                    pdir=aFolder,
                                                    file_format="pdb",
                                                    obsolete=False)
            done = True
            if fetchedfile and len(fetchedfile) and (
                    fetchedfile.find(".ent") > 0
                    or fetchedfile.find(".pdb") > 0):
                #print "Structure fetched, PDB code: " + aPDB_Code
                print "INFO: Structure " + aPDB_Code + " fetched. [OK]"
                #io = PDBIO()
                #io.set_structure( s)
                #io.save( filename)
            else:
                print "WARNING: Fetch failed [FAIL]"

        except IOError as ex:
            sys.stderr.write(
                "WARNING: Could not download structure {0}. An exception of type {1} occured.\n       Arguments: {2!r}\n"
                .format(aPDB_Code,
                        type(ex).__name__, ex.args))
            sys.stderr.write("INFO: Retrying connection in %i seconds...\n" %
                             seconds_between_retries)

            for a in ex.args:
                #Downloading too many structures too fast?
                if str(a).lower().find("too many") >= 0:
                    seconds_between_retries += 10
                    break
                if str(a).lower().find("No such file") >= 0:
                    #No need to retry
                    return fetchedfile
                if str(a).lower().find("did not properly respond") >= 0:
                    #No need to retry
                    sys.stderr.write(
                        "INFO: Switching download thread to alternative server '%s'.\n"
                        % alt_server)
                    USE_ALT_PDB_SERVER = True

            time.sleep(seconds_between_retries)
            done = False
            errors_before_quit -= 1
            if errors_before_quit <= 0:
                sys.stderr.write("ERROR: Failed too many times. Quitting...\n")
                break

    return fetchedfile
Пример #10
0
def generate_structural_statistics(jobId,
                                   dom,
                                   pdb_code,
                                   selchain,
                                   uploaded_str,
                                   modeled_str=False,
                                   savequeue="jobinfo"):
    try:
        tdata = TripleMapping.objects.get(pk=jobId)
    except (KeyError, TripleMapping.DoesNotExist):
        return "str stats gen error!"
    threeList = [
        "ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS", "LEU",
        "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", "TRP", "TYR"
    ]
    if uploaded_str == False:
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(pdb_code, pdir='./PDB', file_format="pdb")
        pdb_filename = "./PDB/pdb" + pdb_code.lower() + ".ent"
    else:
        if modeled_str == False:
            pdb_filename = "./PDB/" + jobId + "___" + pdb_code
        else:
            pdb_filename = "./PDB/model/" + pdb_code
    pdbsequencefull = []
    pdbsequencenum = []
    structure = Bio.PDB.PDBParser().get_structure(pdb_code, pdb_filename)
    model = structure[0]
    dssp = DSSP(model, pdb_filename, dssp='mkdssp', acc_array="Wilke")
    for chain in model:
        if chain.id == selchain:
            for residue in chain:
                if Bio.PDB.Polypeptide.is_aa(residue) == True:
                    number = residue.get_id()
                    try:
                        num = str(number[1]) + str((number[2].rstrip())[0])
                    except IndexError:
                        num = str(number[1])
                    pdbsequencenum.append(residue.get_resname() + num)
                    #new_id = (" ", residue.get_id()[1], residue.get_id()[2])
                    pdbsequencefull.append(residue.get_id())
    pdbsequencenum = pdbsequencenum[1:-1]
    pdbsequencefull = pdbsequencefull[1:-1]
    dssp_info = []
    for i in range(0, len(pdbsequencenum)):
        chain_res = pdbsequencenum[i]
        residue_key = pdbsequencefull[i]
        if (chain_res[0:3] in threeList):
            dssp_res = dssp[selchain, residue_key]
            dssp_info.append({
                "name": chain_res,
                "sec": str(dssp_res[2]),
                "phi": str(dssp_res[4]),
                "psi": str(dssp_res[5]),
                "depth": str(dssp_res[3])
            })

    pdb_coded = pdb_code
    if (modeled_str == True):
        pdb_coded = pdb_code.split("_")[3] + "_" + pdb_code.split("_")[4]

    full_dssp_info = {"_".join([dom, pdb_coded, selchain]): dssp_info}
    prev_dsspinfo = getattr(tdata, "dsspinfo")
    if prev_dsspinfo:
        prev_dsspinfo = prev_dsspinfo.split("]}]")[0] + "]},"
    else:
        prev_dsspinfo = "["
    setattr(tdata, "dsspinfo", prev_dsspinfo + str(full_dssp_info) + "]")
    tdata.save()

    # run ring software and obtain results
    process = Popen([
        "./bin/Ring", "-i", pdb_filename, "-c", selchain, "-N",
        "./jobs/nodes/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain +
        ".nds", "-E", "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code +
        "_" + selchain + ".eds", "-g", "1"
    ],
                    stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()

    # read ring software results and generate json objects
    f1 = open(
        "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain +
        ".eds", "r+")
    lines = f1.readlines()
    G = nx.MultiGraph()
    G2 = nx.Graph()
    pairs = []
    singlegraph = {}
    for l in range(1, len(lines)):
        line = lines[l]
        res1 = line.split()[0].split(":")[-1] + line.split()[0].split(":")[1]
        res2 = line.split()[2].split(":")[-1] + line.split()[2].split(":")[1]
        order_pair = sorted([res1, res2])
        interaction = line.split()[1]
        energy = float(line.split()[5])
        if "NLA" not in res1 and "NLA" not in res1:
            G.add_edge(res1, res2, weight=energy, itype=interaction)
            if order_pair not in pairs:
                singlegraph["".join(order_pair)] = (res1, res2, energy)
                pairs.append(order_pair)
            else:
                new_energy = singlegraph["".join(order_pair)][2] + energy
                singlegraph["".join(order_pair)] = (res1, res2, new_energy)
    G2.add_weighted_edges_from(singlegraph.values())

    g_distance_dict1 = {(e1, e2, w): 1 / w
                        for e1, e2, w in G.edges(data='weight')}
    nx.set_edge_attributes(G, g_distance_dict1, 'distance')

    g_distance_dict = {(e1, e2): 1 / weight
                       for e1, e2, weight in G2.edges(data='weight')}
    nx.set_edge_attributes(G2, g_distance_dict, 'distance')
    graph_stats = []

    weighted_degree = G.degree(weight='weight')

    between = nx.betweenness_centrality(G2, weight='weight')

    closeness = nx.closeness_centrality(G, distance='distance')

    mutstats = None
    for k in between:
        graph_stats.append({
            "res": k,
            "betweeness": between[k],
            "closeness": closeness[k],
            "wdegree": weighted_degree[k]
        })
        if (modeled_str == True and k == pdb_code.split("_")[5] +
                pdb_code.split("_")[6].split(".")[0]):
            mutstats = {
                "res": k,
                "betweeness": between[k],
                "closeness": closeness[k],
                "wdegree": weighted_degree[k]
            }

    graph_stats_full = {"_".join([dom, pdb_coded, selchain]): graph_stats}
    if (modeled_str == False):
        prev_gstats = getattr(tdata, "graph_stats")
        if prev_gstats:
            prev_gstats = prev_gstats.replace("];",
                                              ", ")  #.split("}}]")[0] + "}},"
        else:
            prev_gstats = "["
        setattr(tdata, "graph_stats",
                prev_gstats + str(graph_stats_full) + "];")
        tdata.save()
    else:
        prev_muts = getattr(tdata, "mut_stats")
        if prev_muts:
            prev_muts = prev_muts.replace("]", " , ")
        else:
            prev_muts = "["
        setattr(tdata, "mut_stats", prev_muts + str(mutstats) + "]")
        tdata.save()

        #pass
    #print(graph_stats)
    #quit()

    datag = json_graph.node_link_data(G)
    s = json.dumps(datag)
    datag_full = {"_".join([dom, pdb_coded, selchain]): s}
    if (modeled_str == False):
        prev_datag = getattr(tdata, "graph_json")
        if prev_datag:
            prev_datag = prev_datag.replace("];",
                                            ", ")  #.split("]}]")[0] + "]},"
        else:
            prev_datag = "["
        setattr(tdata, "graph_json", prev_datag + str(datag_full) + "];")
        tdata.save()
    else:
        mNode = {
            "id": pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]
        }
        #nodesAt5 = [x for x,y in G.nodes(data=True) if y['id']== pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]]
        #mNode = pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]
        #newedges = [(u,v,d) for u,v,d in G.edges(data = True) if ((u['id'] == mNode) or (v['id'] == mNode))]
        nodesAt5 = [x for x in G.nodes() if x == mNode]
        #H = nx.MultiGraph()
        #H.add_edges_from(newedges)
        H = G.subgraph(nodesAt5)
        datam = json_graph.node_link_data(H)
        sm = json.dumps(datam)
        datam_full = {
            "_".join([
                dom, pdb_coded,
                pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]
            ]):
            s
        }
        prevdatam = getattr(tdata, "mut_json")
        if prevdatam:
            prevdatam = prevdatam.replace("];end;", ", ")
        else:
            prevdatam = "["
        setattr(tdata, "mut_json", prevdatam + str(datam_full) + "];end;")
        tdata.save()
        pass
    f1.close()

    #save objects into attributes

    jobs = getattr(tdata, savequeue)
    print("jobs")
    print(jobs)
    job_this = dom + "_" + pdb_code + "_" + selchain
    if (modeled_str == True):
        job_this = dom + "_" + pdb_code.split("_")[3] + "_" + pdb_code.split(
            "_")[4] + "_" + pdb_code.split("_")[6].split(
                ".")[0] + "_" + pdb_code.split("_")[5]
    print("job_this")
    print(job_this)
    new_jobs = []
    for job in jobs.split(","):
        if job_this in job:
            new_job = "_".join(job.split("_")[:-1]) + "_done"
            new_jobs.append(new_job)
        else:
            new_jobs.append(job)
    print("new_jobs")
    print(new_jobs)
    setattr(tdata, savequeue, ",".join(new_jobs))
    tdata.save()
    return "str stats gen!"