def fromVarfilter2tsvLikeCyrielle(ficVar): print ficVar ficSnp = traitementSamtools.decomposeVarfilter(ficVar) ficTsv = "%s.tsv" % ficSnp of = open(ficTsv, "w") fM = open(ficSnp, "r") line = fM.readline() while line: el = line.split("\t") # ne tient pas compte des indels etc dans la ligne #print "---%s---" % el while string.find(el[8], "^") != -1: el[8] = string.replace(el[8], el[8][string.find(el[8], "^"):string.find(el[8], "^") + 2], "") while string.find(el[8], "+") != -1: el[8] = string.replace(el[8], el[8][string.find(el[8], "+"):string.find(el[8], "+") + string.atoi( el[8][string.find(el[8], "+") + 1]) + 2], "") while string.find(el[8], "-") != -1: el[8] = string.replace(el[8], el[8][string.find(el[8], "-"):string.find(el[8], "-") + string.atoi( el[8][string.find(el[8], "-") + 1]) + 2], "") nbA = el[8].count("A") + el[8].count("a") nbG = el[8].count("G") + el[8].count("g") nbC = el[8].count("C") + el[8].count("c") nbT = el[8].count("T") + el[8].count("t") cov = string.atof(el[7]) nbRef = el[8].count(".") + el[8].count(",") if el[2] == "a" or el[2] == "A": nbA = nbRef elif el[2] == "g" or el[2] == "G": nbG = nbRef elif el[2] == "c" or el[2] == "C": nbC = nbRef elif el[2] == "t" or el[2] == "T": nbT = nbRef if el[3] == "A": nbCns = nbA elif el[3] == "G": nbCns = nbG elif el[3] == "C": nbCns = nbC elif el[3] == "T": nbCns = nbT else: nbCns = "-" listNb = [nbA, nbC, nbG, nbT] resMaj = max(listNb) ratio = nbRef / cov ratio2 = resMaj / cov if ratio2 != 1: of.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\t%s\t%s\t%.0f\t%.3f\n" % ( el[0], el[1], el[2].upper(), el[3], nbCns, el[5], ratio, nbA, nbC, nbG, nbT, cov, ratio2)) line = fM.readline() of.close() fM.close()
def fromVarfilter2tsvLike(ficVar): ficSnp = traitementSamtools.decomposeVarfilter(ficVar) #ficSnp = ficVar #ficSnp = ficVar.replace("varfilter","SNP") ficTsv = "%s.tsv" % ficSnp of = open(ficTsv,"w") fM = open(ficSnp,"r") line = fM.readline() of.write("Chromosome\tposition\tReference\tResidue\tnb Residue(if A,C,T orG)\tQuality score?\t(nb ref)/cov\tnb A\tnb C\tnb G\tnb T\tcov\n") while line: el = line.split("\t") # ne tient pas compte des indels etc dans la ligne #print "---%s---" % el while string.find(el[8],"^") != -1: # si dedans el[8] = string.replace(el[8],el[8][string.find(el[8],"^"):string.find(el[8],"^")+2],"") #on vire les infos ^et 2 par rien lfin=removePatternFromLine("[\+|-]([0-9]+)([ACGTNacgtn]+)",el[8]) lfin=lfin.upper() nbA=lfin.count("A") nbC=lfin.count("C") nbG=lfin.count("G") nbT=lfin.count("T") cov = string.atof(el[7]) nbRef = el[8].count(".") + el[8].count(",") if el[2] == "a" or el[2] == "A": nbA = nbRef elif el[2] == "g" or el[2] == "G": nbG = nbRef elif el[2] == "c" or el[2] == "C": nbC = nbRef elif el[2] == "t" or el[2] == "T": nbT = nbRef if el[3] == "A": nbCns = nbA elif el[3] == "G": nbCns = nbG elif el[3] == "C": nbCns = nbC elif el[3] == "T": nbCns = nbT else : nbCns = "-" ratio = nbRef / cov of.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\t%s\t%s\t%.0f\n" % (el[0],el[1],el[2].upper(),el[3],nbCns,el[5],ratio,nbA,nbC,nbG,nbT,cov)) line = fM.readline() of.close() fM.close() return ficTsv
def fromVarfilter2tsvLikeCyrielle(ficVar): """ compte le nombre de A,C,G,T par Reads pour un SNP input: fichier pileup.SNP """ ficSnp = traitementSamtools.decomposeVarfilter(ficVar) ficTsv = "%s.bestFilter.tsv" % ficSnp of = open(ficTsv,"w") fM = open(ficSnp,"r") line = fM.readline() of.write("Chromosome\tposition\tReference\tResidue\tnb Residue(if A,C,T orG)\tQuality score?\t(nb ref)/cov\tnb A\tnb C\tnb G\tnb T\tcov\t(nb max represented)/cov\n") while line: el = line.split("\t") # ne tient pas compte des indels etc dans la ligne #print "---%s---" % el while string.find(el[8],"^") != -1: # si dedans el[8] = string.replace(el[8],el[8][string.find(el[8],"^"):string.find(el[8],"^")+2],"") #on vire les infos ^et 2 par rien lfin=removePatternFromLine("[\+|-]([0-9]+)([ACGTNacgtn]+)",el[8]) lfin=lfin.upper() nbA=lfin.count("A") nbC=lfin.count("C") nbG=lfin.count("G") nbT=lfin.count("T") cov = string.atof(el[7]) nbRef = lfin.count(".") + lfin.count(",") el2=el[2].upper() if el2 == "A": nbA = nbRef elif el2 == "G": nbG = nbRef elif el2 == "C": nbC = nbRef elif el2 == "T": nbT = nbRef if el[3] == "A": nbCns = nbA elif el[3] == "G": nbCns = nbG elif el[3] == "C": nbCns = nbC elif el[3] == "T": nbCns = nbT else : nbCns = "-" listNb = [nbA,nbC,nbG,nbT] resMaj = max(listNb) ratio = nbRef / cov ratio2 = resMaj / cov if ratio2 <= 0.95: of.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\t%s\t%s\t%.0f\t%.3f\n" % (el[0],el[1],el[2].upper(),el[3],nbCns,el[5],ratio,nbA,nbC,nbG,nbT,cov,ratio2)) line = fM.readline() of.close() fM.close() return ficTsv