def filter_known(dbsnpH, inFileN): inFile = open(inFileN, 'r') outFile = open('%s.dbsnp_flt' % inFileN, 'w') for line in inFile: colL = line.rstrip().split('\t') chrom = colL[0] pos = int(colL[1]) idx = mybasic.index(dbsnpH[chrom], pos) if idx < 0:## not found in dbsnp outFile.write(line) outFile.flush() outFile.close()
def get_alt_frac_mutscan(sampN, snpH, fileL): outH = {} for chrom in snpH: for pos in snpH[chrom]: outH[(chrom, pos)] = {} for alt in snpH[chrom][pos]: outH[(chrom, pos)][alt] = 'NA' chromL = snpH.keys() chromL.sort() for file in fileL: inFile = open(file, 'r') prevChr = '' posL = [] for line in inFile: colL = line.rstrip().split('\t') chrom = colL[0] pos = int(colL[1]) alt = colL[3] r1 = int(colL[4]) r2 = int(colL[5]) frac = float(colL[6]) if chrom == 'chrM' or (r1 + r2) < 15: continue if prevChr != chrom: prevChr = chrom posL = snpH[chrom].keys() posL.sort() idx = mybasic.index(posL, pos) if idx >= 0 and alt in snpH[chrom][pos]: ## mutation is in dbsnp outH[(chrom, pos)][alt] = frac ##for line ##for file for chrom in chromL: posL = snpH[chrom].keys() posL.sort() for pos in posL: altL = list(snpH[chrom][pos]) for alt in altL: sys.stdout.write( '%s\t%s:%s:%s\t%s\n' % (sampN, chrom, pos, alt, outH[(chrom, pos)][alt]))
def get_alt_frac_mutscan(sampN, snpH, fileL): outH = {} for chrom in snpH: for pos in snpH[chrom]: outH[(chrom, pos)] = {} for alt in snpH[chrom][pos]: outH[(chrom,pos)][alt] = 'NA' chromL = snpH.keys() chromL.sort() for file in fileL: inFile = open(file, 'r') prevChr = '' posL = [] for line in inFile: colL = line.rstrip().split('\t') chrom = colL[0] pos = int(colL[1]) alt = colL[3] r1 = int(colL[4]) r2 = int(colL[5]) frac = float(colL[6]) if chrom == 'chrM' or (r1 + r2) < 15: continue if prevChr != chrom: prevChr = chrom posL = snpH[chrom].keys() posL.sort() idx = mybasic.index(posL, pos) if idx >= 0 and alt in snpH[chrom][pos]: ## mutation is in dbsnp outH[(chrom,pos)][alt] = frac ##for line ##for file for chrom in chromL: posL = snpH[chrom].keys() posL.sort() for pos in posL: altL = list(snpH[chrom][pos]) for alt in altL: sys.stdout.write('%s\t%s:%s:%s\t%s\n' % (sampN, chrom,pos,alt, outH[(chrom,pos)][alt]))