Пример #1
0
def filterBlastResult (d_dataset, p_dir_dataset, substruct, thresold_RX = 2.5, thresold_blast = 1e-4, debug = 1) : 
    
    """
    Filter resolution PDB
    Filter evalue
    """
    
    for pdb_ref in d_dataset.keys () : 
        if d_dataset[pdb_ref]["conserve"] == 0 : continue
        for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys () : 
            # filter e.value
            if debug == 1 : print d_dataset[pdb_ref]["align"][pdb_blast_chain], pdb_ref, thresold_blast
            # remove thresold and reference cleanner, remove if alignement on same protein
            if d_dataset[pdb_ref]["align"][pdb_blast_chain] <= thresold_blast and pdb_blast_chain [0:4].upper() != pdb_ref: 
                if debug == 1 :print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref]["align"][pdb_blast_chain]
                # dowload PDB files
                pdb_blast = pdb_blast_chain[0:4]
                p_pdb_blast = downloadFile.importPDB(pdb_blast, p_dir_dataset + pdb_ref + "/", dir_by_PDB=0, dbPDB = "/home/borrel/PDB/")
                if p_pdb_blast == 0 : 
                    continue
                
#                 print p_pdb_blast, pdb_blast, "************"
                l_queries_by_chain = separeByChain(p_pdb_blast) # divise chain in reference folder
                try : RX = parsePDB.resolution(p_pdb_blast)
                except : RX = 100.0
                l_ligand = parsePDB.retrieveListLigand(p_pdb_blast)
                if debug == 1 : print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join(l_queries_by_chain) + "\n" 
                
                # remove apo forms and remove not substiuant + list ligand do not considered
                if l_ligand == [] or substruct in l_ligand: 
                    remove (p_pdb_blast)
                    for queries_by_chain in l_queries_by_chain : 
                        remove (queries_by_chain)
                    continue
                
                # case RMN structure
                try : RX = float(RX)
                except: continue
                
                if debug == 1 :print "----", RX, thresold_RX, "----"
                if float(RX) <= thresold_RX : 
                    if not "blast" in d_dataset[pdb_ref].keys () : 
                        d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper ()]
                    else : 
                        # control if other chain in list
                        flag_in = 0
                        for PDB_in in d_dataset[pdb_ref]["blast"] : 
                            if PDB_in.split ("_")[0] == pdb_blast_chain.upper ().split ("_")[0] :
                                flag_in = 1
                        if flag_in == 0 : 
                            d_dataset[pdb_ref]["blast"].append (pdb_blast_chain.upper ())
                            
                    if debug == 1 :print d_dataset[pdb_ref]["blast"], pdb_ref
Пример #2
0
def extractReference(p_list_ligand, p_dir_dataset, p_dir_result, substruct):

    # struct reference
    d_dataset = {}

    # retrieve list of ligand in PDB
    d_ligand = tool.parseLigandPDBList(p_list_ligand)

    # download PDB and fasta associated
    l_p_PDB = []
    l_p_fasta = []
    for PDB_ID in d_ligand[substruct]:
        PDB_ID = PDB_ID.upper()
        p_pdb = downloadFile.importPDB(PDB_ID,
                                       p_dir_dataset,
                                       dir_by_PDB=1,
                                       debug=1,
                                       dbPDB="/home/borrel/PDB/")
        p_fasta = downloadFile.importFasta(
            PDB_ID,
            p_dir_dataset,
            dir_by_PDB=1,
            debug=1,
            fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt")

        if p_pdb != 0 and p_fasta != 0:
            l_p_pdb_chain = separeByChain(p_pdb)
            l_p_fasta_chain = separeChainFasta(p_fasta)
            d_dataset[PDB_ID] = {}
            d_dataset[PDB_ID]["p_pdb"] = p_pdb
            d_dataset[PDB_ID]["p_fasta"] = p_fasta
            d_dataset[PDB_ID]["p_pdb_chain"] = l_p_pdb_chain
            d_dataset[PDB_ID]["p_fasta_chain"] = l_p_fasta_chain
            d_dataset[PDB_ID]["conserve"] = 1

    # plot resolution
    p_file_RX = p_dir_result + "resolution_ref.txt"
    file_RX = open(p_file_RX, "w")
    for PDB_ID in d_dataset.keys():
        RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"])
        try:
            d_dataset[PDB_ID]["RX"] = float(RX)
        except:
            d_dataset[PDB_ID]["RX"] = 100.0
        file_RX.write(PDB_ID + "\t" + str(RX) + "\n")
    file_RX.close()

    runOtherSoft.Rhistogram(p_file_RX, "RX_ref_no_filter")
    return d_dataset
Пример #3
0
def extractReference (p_list_ligand, p_dir_dataset, p_dir_result, substruct):    
    
    # struct reference
    d_dataset = {}
    
    # retrieve list of ligand in PDB
    d_ligand = tool.parseLigandPDBList (p_list_ligand)
    
    # download PDB and fasta associated
    l_p_PDB = []
    l_p_fasta = []
    for PDB_ID in d_ligand[substruct] :
        PDB_ID = PDB_ID.upper() 
        p_pdb = downloadFile.importPDB(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, dbPDB = "/home/borrel/PDB/" )
        p_fasta = downloadFile.importFasta(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt")
        
        if p_pdb != 0 and p_fasta != 0 : 
            l_p_pdb_chain = separeByChain (p_pdb)
            l_p_fasta_chain = separeChainFasta(p_fasta)
            d_dataset[PDB_ID] = {}
            d_dataset[PDB_ID] ["p_pdb"] = p_pdb
            d_dataset[PDB_ID] ["p_fasta"] = p_fasta
            d_dataset[PDB_ID] ["p_pdb_chain"] = l_p_pdb_chain
            d_dataset[PDB_ID] ["p_fasta_chain"] = l_p_fasta_chain
            d_dataset[PDB_ID] ["conserve"] = 1
        
    # plot resolution
    p_file_RX = p_dir_result + "resolution_ref.txt"
    file_RX = open (p_file_RX, "w")
    for PDB_ID in d_dataset.keys () : 
        RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"])
        try : d_dataset[PDB_ID] ["RX"] = float(RX)
        except : d_dataset[PDB_ID] ["RX"] = 100.0
        file_RX.write (PDB_ID + "\t" + str (RX) + "\n") 
    file_RX.close ()
    
    runOtherSoft.Rhistogram (p_file_RX, "RX_ref_no_filter")
    return d_dataset
Пример #4
0
def filterBlastResult(d_dataset,
                      p_dir_dataset,
                      substruct,
                      thresold_RX=2.5,
                      thresold_blast=1e-4,
                      debug=1):
    """
    Filter resolution PDB
    Filter evalue
    """

    for pdb_ref in d_dataset.keys():
        if d_dataset[pdb_ref]["conserve"] == 0: continue
        for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys():
            # filter e.value
            if debug == 1:
                print d_dataset[pdb_ref]["align"][
                    pdb_blast_chain], pdb_ref, thresold_blast
            # remove thresold and reference cleanner, remove if alignement on same protein
            if d_dataset[pdb_ref]["align"][
                    pdb_blast_chain] <= thresold_blast and pdb_blast_chain[
                        0:4].upper() != pdb_ref:
                if debug == 1:
                    print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref][
                        "align"][pdb_blast_chain]
                # dowload PDB files
                pdb_blast = pdb_blast_chain[0:4]
                p_pdb_blast = downloadFile.importPDB(pdb_blast,
                                                     p_dir_dataset + pdb_ref +
                                                     "/",
                                                     dir_by_PDB=0,
                                                     dbPDB="/home/borrel/PDB/")
                if p_pdb_blast == 0:
                    continue


#                 print p_pdb_blast, pdb_blast, "************"
                l_queries_by_chain = separeByChain(
                    p_pdb_blast)  # divise chain in reference folder
                try:
                    RX = parsePDB.resolution(p_pdb_blast)
                except:
                    RX = 100.0
                l_ligand = parsePDB.retrieveListLigand(p_pdb_blast)
                if debug == 1:
                    print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join(
                        l_queries_by_chain) + "\n"

                # remove apo forms and remove not substiuant + list ligand do not considered
                if l_ligand == [] or substruct in l_ligand:
                    remove(p_pdb_blast)
                    for queries_by_chain in l_queries_by_chain:
                        remove(queries_by_chain)
                    continue

                # case RMN structure
                try:
                    RX = float(RX)
                except:
                    continue

                if debug == 1: print "----", RX, thresold_RX, "----"
                if float(RX) <= thresold_RX:
                    if not "blast" in d_dataset[pdb_ref].keys():
                        d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper()]
                    else:
                        # control if other chain in list
                        flag_in = 0
                        for PDB_in in d_dataset[pdb_ref]["blast"]:
                            if PDB_in.split("_")[0] == pdb_blast_chain.upper(
                            ).split("_")[0]:
                                flag_in = 1
                        if flag_in == 0:
                            d_dataset[pdb_ref]["blast"].append(
                                pdb_blast_chain.upper())

                    if debug == 1: print d_dataset[pdb_ref]["blast"], pdb_ref