示例#1
0
def definiAllele(fic,database):
    """
    """
    totg = 0
    g = 0
    totp = 0
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            lis = line.split("\t")
            def1 = lis[0]
            loc1 = lis[1]
            def2 = lis[2]
            loc2 = lis[3]
            if loc1 != "" and loc2 != "":
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
            elif loc1 != "" and def1 == "noSim":
                ficin = "FastaGene/%s.tfa" % loc1
                ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                if not os.path.isfile(ficout):
                    alignement.run_blastn(ficin, ficout, database)
                if os.path.isfile(ficout):
                    lis = open(ficout,"r").read().split("\n")
                    if len(lis) == 2:
                        inf1 = "noSim"
                    else:
                        det = lis[1].split("\t")
                        if string.atof(det[2]) > 70:
                            inf1 = "%s-%s" % (det[1].split("|")[-1],det[2])  
                        else : 
                            inf1 = "soSignSim"  
                print "%s\t%s" % (loc1,inf1)  
                continue
            elif loc2 != "" and def2 == "noSim":
                ficin = "FastaGene/%s.tfa" % loc2
                ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                alignement.run_blastn(ficin, ficout, database)
                if os.path.isfile(ficout):
                    lis = open(ficout,"r").read().split("\n")
                    if len(lis) == 2:
                        inf1 = "noSim"
                    else:
                        det = lis[1].split("\t")
                        if string.atof(det[2]) > 70:
                            inf1 = "%s-%s" % (det[1].split("|")[-1],det[2])  
                        else : 
                            inf1 = "soSignSim"  
                print "%s\t%s" % (loc2,inf1)  
                continue
            elif loc1 != "":     
                ficg1 = "FastaGene/%s.tfa" % loc1
                loc2 = def1.split("-")[0]
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2               
            elif loc2 != "":     
                ficg1 = "FastaGene/%s.tfa" % loc2
                loc1 = def2.split("-")[0]
                ficg2 = "FastaGene/%s.tfa" % loc1
                ficp1 = "FastaProt/%s.tfa" % loc2
                ficp2 = "FastaProt/%s.tfa" % loc1            
                
                
            outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
            if not os.path.isfile("FastaGene/%s" % outf):
                alignement.ali_needle(ficg1,ficg2)
            if not os.path.isfile("FastaProt/%s" % outf):            
                alignement.ali_needle(ficp1,ficp2)
                
            idg = alignement.extrait_id_needle("FastaGene/%s" % outf)
            idp = alignement.extrait_id_needle("FastaProt/%s" % outf)
            if idg == "100":
                suf = 1
            else:
                if idg > 70:
                    suf = 2
                    totg = totg + string.atof(idg)
                    g = g + 1
                    totp = totp + string.atof(idp)
            print "%s\t%s\t%s\t%s" % (loc1,loc2,idg,idp)
                 
            
            
    print "\nMoyenne des pourcentages d identite (sans tenir compte des 100%)"
    print "au niveau des genes : %s" % (totg/g)
    print "au niveau des proteines: %s" % (totp/g)       
示例#2
0
def compAnnot(fic,database):
    """
    """
    totg = 0
    g = 0
    totp = 0
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            idg = "-"
            idp = "-"
            lis = line.split("\t")
            type1 = lis[0]
            type2 = lis[9]
            annot1 = lis[1]
            annot2 = lis[10]
            loc1 = lis[2]
            loc2 = lis[11]
            orf1 = lis[3]
            orf2 = lis[12]
            deb1 = lis[4]
            deb2 = lis[13]
            fin1 = lis[5]
            fin2 = lis[14]
            long1 = lis[6]
            long2 = lis[15]
            sens1 = lis[7]
            sens2 = lis[16]
            if loc1 != "" and loc2 != "":
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
                outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
                if not os.path.isfile("FastaGene/%s" % outf):
                    if os.path.isfile(ficg1) and os.path.isfile(ficg2):
                        alignement.ali_needle(ficg1,ficg2)
                if not os.path.isfile("FastaProt/%s" % outf):
                    if os.path.isfile(ficp1) and os.path.isfile(ficp2):    
                        alignement.ali_needle(ficp1,ficp2)
                if os.path.isfile("FastaGene/%s" % outf):
                    idg = alignement.extrait_id_needle("FastaGene/%s" % outf)
                if os.path.isfile("FastaProt/%s" % outf):    
                    idp = alignement.extrait_id_needle("FastaProt/%s" % outf)
                
                if idg == "100":
                    suf = 1
                else:
                    suf = 2
                    if idg != "-":
                        totg = totg + string.atof(idg)
                        g = g + 1
                        totp = totp + string.atof(idp)
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\t%s\t%s\t\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (type1,annot1,loc1,orf1,deb1,fin1,long1,sens1,idg,idp,type2,annot2,loc2,orf2,deb2,fin2,long2,sens2,suf) 
                continue
            
            else:     
                suf = ""
                idg = ""
                idp = ""
                inf1 = ""
                inf2 = ""
                # si n est pas un allele evident, je lance mon gene contre ma banque complete pour voir si y aurait pas une similarite significative
                if loc1 != "":
                    ficin = "FastaGene/%s.tfa" % loc1
                    if os.path.isfile(ficin):
                        ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                        alignement.run_blastn(ficin, ficout, database)
                        if os.path.isfile(ficout):
                            lis = open(ficout,"r").read().split("\n")
                            if len(lis) == 2:
                                inf1 = "noSim"
                            else:
                                det = lis[1].split("\t")
                                inf1 = "%s-%s" % (det[1].split("|")[-1],det[2])   
                    
                if loc2 != "":
                    ficin = "FastaGene/%s.tfa" % loc2
                    if os.path.isfile(ficin):
                        ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                        alignement.run_blastn(ficin, ficout, database)
                        if os.path.isfile(ficout):
                            lis = open(ficout,"r").read().split("\n")
                            if len(lis) == 2:
                                inf2 = "noSim"
                            else:
                                det = lis[1].split("\t")
                                inf2 = "%s-%s" % (det[1].split("|")[-1],det[2]) 
                            
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (type1,annot1,loc1,orf1,deb1,fin1,long1,sens1,inf1,inf2,type2,annot2,loc2,orf2,deb2,fin2,long2,sens2)   
    print "\nMoyenne des pourcentages d identite (sans tenir compte des 100%)"
    print "au niveau des genes : %s" % (totg/g)
    print "au niveau des proteines: %s" % (totp/g)