cdscns_string = "" outcns = open(options.outfileprename + "_cns.fa", 'w') outaa = open(options.outfileprename + "_aa.fa", 'w') outcdscns = open(options.outfileprename + "_cdscns.fa", 'w') cdsmap = {} if __name__ == '__main__': if options.genomedepth != None: depthfile = Util.GATK_depthfile(options.genomedepth, options.genomedepth + ".index") species_idx = depthfile.title.index("Depth_for_" + options.species) Considerdepth = True else: Considerdepth = False depthfile = None species_idx = -1 vcfpop = VCFutil.VCF_Data(options.variants) # new a class RefSeqMap, currentChromNO, nextChromNO = Util.getRefSeqMap( refFastafilehander=reffa) print(currentChromNO, nextChromNO) cns_string += currentChromNO + "\n" gtfMap = Util.getGtfMap(options.gtffile) lastposofdepthfp = 0 #because this time RefSeqMap[0] is 0 vcfchrom = "begin" while currentChromNO != "end of the reffile": print("\t\twhile loop:", currentChromNO) currentBaselocinGenome = RefSeqMap[currentChromNO][0] + 1 # statue = depthfile.set_depthfilefp(currentChromNO, currentBaselocinGenome, lastposofdepthfp) # depth_chrom, depth_pos, depth_linelist,lastposofdepthfp = depthfile.getnextposline() if currentChromNO in gtfMap: gtfListOfCurrentChrom = gtfMap[currentChromNO]
def make_freq_xaxisKEY_yaxisseqVALUERelation(a): chromlistfilename = a[0] topleveltablename = a[1] targetpopvcffile_withdepthconfig = a[2] refpopvcffile_withdepthconfig = a[3] numberofindvdoftargetpop_todividintobin = int(a[4]) mindepthtojudefixed = 20 d_increase = fractions.Fraction( 1, (2 * int(numberofindvdoftargetpop_todividintobin))) d_increase = round(d_increase, 11) minvalue = 0.000000000000 freq_xaxisKEY_yaxisVALUE_seq_list = {} for i in range(numberofindvdoftargetpop_todividintobin * 2 - 1): freq_xaxisKEY_yaxisVALUE_seq_list[(minvalue, minvalue + d_increase + 0.00000000004)] = [] minvalue += d_increase else: freq_xaxisKEY_yaxisVALUE_seq_list[(minvalue, 1)] = [] for a, b in sorted(freq_xaxisKEY_yaxisVALUE_seq_list.keys()): print(str(a), str(b)) # while minvalue+d_increase<=1: # freq_xaxisKEY_yaxisVALUE_seq_list[(minvalue,minvalue+d_increase+0.00000000004)]=[] # print('%.12f'%minvalue,'%.12f'%(minvalue+d_increase+0.00000000004)) # minvalue+=d_increase # else: # freq_xaxisKEY_yaxisVALUE_seq_list[] print("process ID:", os.getpid(), "start", chromlistfilename) dbvariantstools = dbm.DBTools(Util.ip, Util.username, Util.password, Util.vcfdbname) chromlistfile = open(chromlistfilename, "r") chromlistfilelines = chromlistfile.readlines() chromlistfile.close() chromlist = [] for chrrow in chromlistfilelines: chrrowlist = re.split(r'\s+', chrrow.strip()) chromlist.append((chrrowlist[0].strip(), int(chrrowlist[1].strip()))) vcfnamelist = [] listofpopvcfmapOfAChr = [] methodlist = [] vcfnameKEY_vcfobj_pyBAMfilesVALUE = {} N_of_targetpop = len(targetpopvcffile_withdepthconfig) N_of_refpop = len(refpopvcffile_withdepthconfig) #{ vcftablename1:[depthfilename1,name1,name2] , vcftablename2:[depthfilename2,name1,name2] } or {vcftablename1:None, vcftablename2:None} for vcfconfigfilename in targetpopvcffile_withdepthconfig[:] + refpopvcffile_withdepthconfig[:]: listofpopvcfmapOfAChr.append({}) vcfconfig = open(vcfconfigfilename, "r") for line in vcfconfig: vcffilename_obj = re.search(r"vcffilename=(.*)", line.strip()) if vcffilename_obj != None: vcfname = vcffilename_obj.group(1).strip() vcfnamelist.append(vcfname) vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfname] = [] vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfname].append( VCFutil.VCF_Data(vcfname)) elif line.split(): vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfname].append( pysam.Samfile(line.strip(), 'rb')) vcfconfig.close() if re.search(r"indvd[^/]+", vcfname) != None: methodlist.append("indvd") elif re.search(r"pool[^/]+", vcfname) != None: methodlist.append("pool") else: print("vcfname must with 'pool' or 'indvd'") exit(-1) for currentchrID, currentchrLen in chromlist: for vcfname in vcfnamelist: if currentchrID in vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfname][ 0].VcfIndexMap: break else: print("this chr doesn't exist in anypop") continue for vcfobj_idx in range(len(vcfnamelist)): listofpopvcfmapOfAChr[vcfobj_idx] = {} listofpopvcfmapOfAChr[vcfobj_idx][ currentchrID] = vcfnameKEY_vcfobj_pyBAMfilesVALUE[ vcfnamelist[vcfobj_idx]][0].getVcfListByChrom(currentchrID) target_ref_SNPs = Util.alinmultPopSnpPos(listofpopvcfmapOfAChr, "o") for snp_aligned in target_ref_SNPs[currentchrID]: if len(snp_aligned[1]) != 1 or len(snp_aligned[2]) != 1: print("multple allele", snp_aligned) continue curpos = int(snp_aligned[0]) snp = dbvariantstools.operateDB( "select", "select * from " + topleveltablename + " where chrID='" + currentchrID + "' and snp_pos=" + str(curpos) + "") if not snp or snp == 0: print(currentchrID, curpos, "snp not find in db,skip") continue else: #judge the ancenstrall allele fanyadepthlist = re.split(r",", snp[0][9]) if len(fanyadepthlist) == 2 and int( fanyadepthlist[1] ) >= mindepthtojudefixed and fanyadepthlist[0].strip() == "0": A_base_idx = 1 elif len(fanyadepthlist) == 2 and int( fanyadepthlist[0] ) >= mindepthtojudefixed and fanyadepthlist[1].strip() == "0": A_base_idx = 0 else: print("skip snp", snp[0][1], snp[0][7:]) continue ancestrallcontext = snp[0][5].strip()[0].upper() + snp[0][ 3 + A_base_idx].strip().upper() + snp[0][5].strip()[2].upper() if "CG" in ancestrallcontext or "GC" in ancestrallcontext: print("skip CG site", ancestrallcontext) continue ##########x-axis countedAF = 0 target_DAF_sum = 0 #;noofnocoveredsample=0 for i in range(3, N_of_targetpop + 3): if snp_aligned[i] == None: if len(vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfnamelist[ i - 3]]) == 1: print("no depth file") continue else: sum_depth = 0 for samfile in vcfnameKEY_vcfobj_pyBAMfilesVALUE[ vcfnamelist[i - 3]][1:]: ACGTdep = samfile.count_coverage( currentchrID, curpos - 1, curpos) for dep in ACGTdep: sum_depth += dep[0] if sum_depth >= mindepthtojudefixed: AF = 0 else: continue else: if methodlist[i - 3] == "indvd": AF = float( re.search(r"AF=([\d\.]+);", snp_aligned[i][0]).group(1)) elif methodlist[i - 3] == "pool": refdep = 0 altalleledep = 0 AD_idx = (re.split(":", snp_aligned[i][1])).index( "AD") # gatk GT:AD:DP:GQ:PL for sample in snp_aligned[i][2]: if len(re.split(":", sample)) == 1: # ./. continue AD_depth = re.split(",", re.split(":", sample)[AD_idx]) try: refdep += int(AD_depth[0]) altalleledep += int(AD_depth[1]) except ValueError: print(sample, end="|") if refdep == altalleledep and altalleledep == 0: print("no sample available in this pop") # noofnocoveredsample+=1 continue AF = altalleledep / (altalleledep + refdep) if A_base_idx == 0: DAF = 1 - AF elif A_base_idx == 1: DAF = AF target_DAF_sum += DAF countedAF += 1 if countedAF == 0: #or target_DAF_sum==0: print( "skip this snp,because it fiexd as ancestral or no covered in this pos in target pops", snp_aligned, snp) continue target_DAF = target_DAF_sum / countedAF ###############y-axis countedAF = 0 rer_DAF_sum = 0 for i in range(3 + N_of_targetpop, N_of_refpop + N_of_targetpop + 3): if snp_aligned[i] == None: if len(vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfnamelist[ i - 3]]) == 1: continue else: # depth_linelist=vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfnamelist[i-3-N_of_targetpop]].getdepthByPos_optimized(currentchrID,curpos) sum_depth = 0 for samfile in vcfnameKEY_vcfobj_pyBAMfilesVALUE[ vcfnamelist[i - 3]][1:]: ACGTdep = samfile.count_coverage( currentchrID, curpos - 1, curpos) for dep in ACGTdep: sum_depth += dep[0] if sum_depth >= mindepthtojudefixed: AF = 0 else: continue else: if methodlist[i - 3] == "indvd": AF = float( re.search(r"AF=([\d\.]+);", snp_aligned[i][0]).group(1)) AN = float( re.search(r"AN=([\d\.]+);", snp_aligned[i][0]).group(1)) if AN < 5: continue elif methodlist[i - 3] == "pool": refdep = 0 altalleledep = 0 AD_idx = (re.split(":", snp_aligned[i][1])).index( "AD") # gatk GT:AD:DP:GQ:PL for sample in snp_aligned[i][2]: if len(re.split(":", sample)) == 1: # ./. continue AD_depth = re.split(",", re.split(":", sample)[AD_idx]) try: refdep += int(AD_depth[0]) altalleledep += int(AD_depth[1]) except ValueError: print(sample, end="|") if (refdep == altalleledep and altalleledep == 0) or altalleledep + refdep < 10: continue AF = altalleledep / (altalleledep + refdep) if A_base_idx == 0: DAF = 1 - AF elif A_base_idx == 1: DAF = AF rer_DAF_sum += DAF countedAF += 1 if countedAF == 0 or rer_DAF_sum == 0: print( "skip this snp,because it no covered in this pos in ref pops", snp_aligned, snp) continue ######collect according bins for a, b in sorted(freq_xaxisKEY_yaxisVALUE_seq_list.keys()): if target_DAF > a and target_DAF <= b: freq_xaxisKEY_yaxisVALUE_seq_list[(a, b)].append( rer_DAF_sum / countedAF) break # freq_xaxisKEY_yaxisVALUERelation={} # for a,b in sorted(freq_xaxisKEY_yaxisVALUE_seq_list.keys()): # freq_xaxisKEY_yaxisVALUERelation[(a,b)]=numpy.mean(freq_xaxisKEY_yaxisVALUE_seq_list[(a,b)]) # print('%.12f'%a,'%.12f'%(b),'%.12f'%(freq_xaxisKEY_yaxisVALUERelation[(a,b)]),"process ID:",os.getpid(),"done",sep="\t") print("process ID:", os.getpid(), "done") return copy.deepcopy(freq_xaxisKEY_yaxisVALUE_seq_list)
sitesingap = open(sys.argv[5], 'w') if __name__ == '__main__': win = Util.Window() i = 0 interferf = open(sys.argv[5] + ".InterferingTEMP", 'w') for gapregion in gapf: i += 1 filledsites = [] gaplist = re.split(r"\s+", gapregion.strip()) if not os.path.exists(sys.argv[5] + "temp" + str(i) + ".recode.vcf"): os.system( vcftools + " --vcf " + sys.argv[2] + " --recode --recode-INFO-all --remove-indv DSW33216 --chr " + gaplist[0] + " --from-bp " + str(gaplist[1]) + " --to-bp " + str(gaplist[2]) + " --out " + sys.argv[5] + "temp" + str(i)) vcfobj = VCFutil.VCF_Data(sys.argv[5] + "temp" + str(i) + ".recode.vcf") vcflist = vcfobj.getVcfListByChrom(gaplist[0], MQfilter=0) findtagcaculator = Caculators.CaculatorToFindTAGs( mod="randomvcf", Interferingf=interferf) findtagcaculator.curchrom = gaplist[0] win.slidWindowOverlap(vcflist, int(gaplist[2]), winsize, winsize, findtagcaculator, int(gaplist[1])) filledsites = copy.deepcopy(win.winValueL) for s, e, n, poss in filledsites: if poss[0] != "NA": RefSeqMap = Util.getRefSeqBypos_faster(reff, refidx, gaplist[0], poss[0][0] - 35, poss[0][0] + 35, chrlenm[gaplist[0]])
dilute = 1 else: print("error") exit(-1) print(chromlisttosub) software = options.software.upper().strip() Morganperbp = float(options.Morganperbp) chromlistfile = open(options.chromlistfilename, "r") chromlist = [] for chrrow in chromlistfile: chrrowlist = re.split(r'\s+', chrrow.strip()) chromlist.append(chrrowlist[0].strip()) tempvcffile = open(outputprefix + ".vcf", "w") if __name__ == '__main__': vcfdata = VCFutil.VCF_Data(options.vcffilename.strip()) i = 0 outputfilepart = 0 sumRecOfVCF = 0 if chromlisttosub == None: lastpos = 0 for chrom in chromlist: if chrom not in vcfdata.chromOrder: continue vcfRecOfAChrom = vcfdata.getVcfListByChrom( chrom, dilute, dilutetodensity=dilutetodensity) if len(vcfRecOfAChrom) < 30: print("Call_geno_snp_ind_Style_software_cyclly", "skip chrom with snps less than 100") continue else:
affectedlist = [] unaffectedlist = [] affectunaffectmark = {} f = open(options.affectedlist, 'r') for line in f: affectedlist.append(line.strip()) affectunaffectmark[line.strip()] = "2" f.close() f = open(options.unaffectedlist, 'r') for line in f: affectunaffectmark[line.strip()] = "1" f.close() mapfile = open(options.output + ".map", "w") pedfile = open(options.output + ".ped", "w") vcfobj = VCFutil.VCF_Data(options.vcffile[0]) chromchangemap = {} excludesitesMapBchr = {} f = open(options.chrommap, 'r') for line in f: linelist = re.split(r'\s+', line.strip()) chromchangemap[linelist[0].strip()] = linelist[1].strip() f.close() f = open(options.excludesites, 'r') for line in f: linelist = re.split(r'\s+', line.strip()) if linelist[0] in excludesitesMapBchr: excludesitesMapBchr[linelist[0]].append(int(linelist[1])) else: excludesitesMapBchr[linelist[0]] = [int(linelist[1])]
default=True, help="don't print status messages to stdout") (options, args) = parser.parse_args() vcfnameKEY_vcfobj_pyBAMfilesVALUE = {} if options.ancenstralref == None: archicpopvcfbamconfig = options.vcfbamconfig.strip() vcfconfig = open(archicpopvcfbamconfig, "r") for line in vcfconfig: vcffilename_obj = re.search(r"vcffilename=(.*)", line.strip()) if vcffilename_obj != None: vcfname = vcffilename_obj.group(1).strip() vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfname] = [] vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfname].append( VCFutil.VCF_Data(vcfname)) elif line.split(): vcfnameKEY_vcfobj_pyBAMfilesVALUE[vcfname].append( pysam.Samfile(line.strip(), 'rb')) vcfconfig.close() toplevelsnptablename = options.toplevelsnptable flanklen = int(options.flanklen.strip()) if __name__ == '__main__': ancestralalleletabletools = AncestralAlleletabletools( database=Util.vcfdbname, ip=Util.ip, usrname=Util.username, pw=Util.password, dbgenome=Util.genomeinfodbname) if options.mode.strip() == "1":
for k, v in MfileNameMap.items(): if k in VfileNameMap: VtoMmap[VfileNameMap[k]] = v print(VfileNameMap[k], v, sep="\t", file=mf) mf.close() myformatNamelist = [] f = open(options.myformatNamelistfile, "r") for my_Sample_name in f: myformatNamelist.append(my_Sample_name.strip()) f.close() #find the same indvd END refFastahandle1 = open(refFastaName1, 'r') refFastahandle2 = open(refFastaName2, 'r') vcfdataset = VCFutil.VCF_Data(options.vcffilename) for k, v in VtoMmap.items(): commsample_idxlistinM.append(myformatNamelist.index(v)) commsample_idxlistinV.append(vcfdataset.VcfIndexMap["title"].index(k)) # # bbb=Util.getRefSeqBypos_faster(refFastahandle, refidxByChr, "1", 1, 1) # print(bbb) # exit() for chrom in vcfdataset.chromOrder: vcfRecOfAChrom = vcfdataset.getVcfListByChrom(chrom, MQfilter=None) MFfRecOfAChrom = [] #read -M file and change format try: curMyFormatfile = open( 'Chr0' + chrom + options.myformatfilesuffix.strip(), 'r')
def fillarchicpop(self, archicpopVcfFile, depthFile, chromtable, archicpopNameindepthFile, tablename="derived_alle_ref", archicpopfieldNameintable="archicpop"): """ abandon the snps which exist in archicpopVcfFile but absence in all others pop snp sets """ depthfile = Util.GATK_depthfile(depthFile, depthFile + ".index") species_idx = depthfile.title.index("Depth_for_" + archicpopNameindepthFile) archicpop = VCFutil.VCF_Data(archicpopVcfFile) totalChroms = self.dbtools.operateDB( "select", "select count(*) from " + chromtable)[0][0] for i in range(0, totalChroms, 20): currentsql = "select * from " + chromtable + " order by chrlength desc limit " + str( i) + ",20" result = self.dbtools.operateDB("select", currentsql) for row in result: currentchrID = row[0] print(currentchrID + ":", end="") currentchrLen = int(row[2]) archicpopSeqOfAChr = {} archicpopSeqOfAChr[currentchrID] = archicpop.getVcfListByChrom( archicpopVcfFile, currentchrID) allsnpsInAchr = self.dbtools.operateDB( "select", "select snp_pos,alt_base from " + tablename + " where chrID='" + currentchrID + "'") for snp in allsnpsInAchr: snp_pos = int(snp[0]) ALT = snp[1] low = 0 high = len(archicpopSeqOfAChr[currentchrID]) - 1 while low <= high: mid = (low + high) >> 1 if archicpopSeqOfAChr[currentchrID][mid][0] < snp_pos: low = mid + 1 elif archicpopSeqOfAChr[currentchrID][mid][0] > snp_pos: high = mid - 1 else: #find the pos pos, REF, ALT, INFO, FORMAT, samples = archicpopSeqOfAChr[ currentchrID][mid] dp4 = re.search(r"DP4=(\d*),(\d*),(\d*),(\d*)", INFO) refdep = 0 altalleledep = 0 if dp4 != None: #vcf from samtools refdep = int(dp4.group(1)) + int(dp4.group(2)) altalleledep = int(dp4.group(3)) + int( dp4.group(4)) else: AD_idx = (re.split(":", FORMAT)).index( "AD") #gatk GT:AD:DP:GQ:PL for sample in samples: if len(re.split(":", sample)) == 1: # ./. continue AD_depth = re.split( ",", re.split(":", sample)[AD_idx]) try: refdep += int(AD_depth[0]) altalleledep += int(AD_depth[1]) except ValueError: print(sample, end="") popsdata = ALT + ":" + str(refdep) + "," + str( altalleledep) break else: depth_linelist = depthfile.getdepthByPos( currentchrID, snp_pos) if int(depth_linelist[species_idx]) <= 1: popsdata = "no covered" else: popsdata = ALT + ":" + depth_linelist[ species_idx] + ",0" # print(snp[0],end="\t") self.dbtools.operateDB( "update", "update " + tablename + " set " + archicpopfieldNameintable + " = '" + popsdata + "' where chrID=" + "'" + currentchrID + "' and snp_pos=" + str(snp[0]))
def filldata(self, vcfFileName, depthfileName, tablename="derived_alle_ref", posUniq=True, continuechrom=None, continuepos=None): depthfile = Util.GATK_depthfile(depthfileName, depthfileName + ".index") depth_linelist = None vcffile = open(vcfFileName, 'r') vcfline = vcffile.readline() while re.search(r'^##', vcfline) != None: vcfline = vcffile.readline() if re.search(r'^#', vcfline) != None: poptitlelist = re.split(r'\s+', vcfline.strip())[9:] print(poptitlelist) else: print( "need title'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT'" ) exit(-1) for pop in poptitlelist: self.dbtools.operateDB("callproc", "mysql_sp_add_column", data=("life_pilot", tablename, pop, "varchar(128)", "default null")) popsdata = [] #depth for ref or alt if continuechrom != None and continuepos != None: print("filldata", continuechrom, continuepos) vcfpossearcher = VCFutil.VCF_Data(vcfFileName) vcffile.seek(vcfpossearcher.VcfIndexMap[continuechrom]) vcfline = vcffile.readline() while vcfline: vcflist = re.split(r'\s+', vcfline.strip()) chrom = vcflist[0].strip() pos = int(vcflist[1].strip()) print(chrom, pos) if chrom == continuechrom and pos == continuepos: break vcfline = vcffile.readline() else: justiceGATKorSamtools = vcffile.readline() vcflist = re.split(r'\s+', justiceGATKorSamtools.strip()) dp4 = re.search(r"DP4=(\d*),(\d*),(\d*),(\d*)", vcflist[7]) refdep = 0 altalleledep = 0 if dp4 != None: #vcf from samtools print("function for samtools vcf is still need to be finish") exit(-1) else: chrom = vcflist[0].strip() pos = int(vcflist[1].strip()) snpID = vcflist[2].strip() REF = vcflist[3].strip() ALT = vcflist[4].strip() AD_idx = (re.split(":", vcflist[8])).index( "AD") #gatk GT:AD:DP:GQ:PL sample_idx_in_vcf = 0 for sample in vcflist[9:]: samplename = poptitlelist[sample_idx_in_vcf] sample_idx_in_vcf += 1 species_idx = depthfile.title.index("Depth_for_" + samplename) if len(re.split(":", sample)) != len( re.split(":", vcflist[8]) ) and depth_linelist == None: # ./. when lack of variantion information,then consider the depthfile depth_linelist = depthfile.getdepthByPos(chrom, pos) if int(depth_linelist[species_idx]) <= 1: popsdata.append('no covered') else: popsdata.append(depth_linelist[species_idx] + ",0") continue elif len(re.split(":", sample)) != len( re.split(":", vcflist[8])) and depth_linelist != None: if int(depth_linelist[species_idx]) <= 1: popsdata.append('no covered') else: popsdata.append(depth_linelist[species_idx] + ",0") continue popsdata.append(re.split(":", sample)[AD_idx]) depth_linelist = None print( "insert into " + tablename + "(chrID,snp_pos,snpID,ref_base,alt_base," + "".join([e + "," for e in poptitlelist[:-1]] + poptitlelist[-1:]) + ") select %s,%s,%s,%s,%s," + "%s," * (len(poptitlelist) - 1) + "%s from dual where not exists( select * from " + tablename + " where " + tablename + ".chrID='" + chrom + "' and " + tablename + ".snp_pos=" + str(pos) + ")", (chrom, pos, snpID, REF, ALT) + tuple(popsdata)) self.dbtools.operateDB( "insert", "insert into " + tablename + "(chrID,snp_pos,snpID,ref_base,alt_base," + "".join([e + "," for e in poptitlelist[:-1]] + poptitlelist[-1:]) + ") select %s,%s,%s,%s,%s," + "%s," * (len(poptitlelist) - 1) + "%s from dual where not exists( select * from " + tablename + " where " + tablename + ".chrID='" + chrom + "' and " + tablename + ".snp_pos=" + str(pos) + ")", data=(chrom, pos, snpID, REF, ALT) + tuple(popsdata)) for vcfline in vcffile: vcflist = re.split(r'\s+', vcfline.strip()) print(vcfline) if posUniq and pos == int(vcflist[1].strip()): continue chrom = vcflist[0].strip() pos = int(vcflist[1].strip()) snpID = vcflist[2].strip() REF = vcflist[3].strip() ALT = vcflist[4].strip() AD_idx = (re.split(":", vcflist[8])).index("AD") #gatk GT:AD:DP:GQ:PL sample_idx_in_vcf = 0 popsdata = [] for sample in vcflist[9:]: samplename = poptitlelist[sample_idx_in_vcf] sample_idx_in_vcf += 1 species_idx = depthfile.title.index("Depth_for_" + samplename) if len(re.split(":", sample)) != len(re.split( ":", vcflist[8])) and depth_linelist == None: # ./. depth_linelist = depthfile.getdepthByPos(chrom, pos) if int(depth_linelist[species_idx]) <= 1: popsdata.append('no covered') else: popsdata.append(depth_linelist[species_idx] + ",0") continue elif len(re.split(":", sample)) != len( re.split(":", vcflist[8])) and depth_linelist != None: if int(depth_linelist[species_idx]) <= 1: popsdata.append('no covered') else: popsdata.append(depth_linelist[species_idx] + ",0") continue # AD_depth = re.split(",", re.split(":", sample)[AD_idx]) popsdata.append(re.split(":", sample)[AD_idx]) depth_linelist = None print( "insert into " + tablename + "(chrID,snp_pos,snpID,ref_base,alt_base," + "".join([e + "," for e in poptitlelist[:-1]] + poptitlelist[-1:]) + ") select %s,%s,%s,%s,%s," + "%s," * (len(poptitlelist) - 1) + "%s from dual where not exists( select * from " + tablename + " where " + tablename + ".chrID='" + chrom + "' and " + tablename + ".snp_pos=" + str(pos) + ")", (chrom, pos, snpID, REF, ALT) + tuple(popsdata)) self.dbtools.operateDB( "insert", "insert into " + tablename + "(chrID,snp_pos,snpID,ref_base,alt_base," + "".join([e + "," for e in poptitlelist[:-1]] + poptitlelist[-1:]) + ") select %s,%s,%s,%s,%s," + "%s," * (len(poptitlelist) - 1) + "%s from dual where not exists( select * from " + tablename + " where " + tablename + ".chrID='" + chrom + "' and " + tablename + ".snp_pos=" + str(pos) + ")", data=(chrom, pos, snpID, REF, ALT) + tuple(popsdata)) depthfile.closedepthfile() vcffile.close()
"--outfileprename", dest="outfileprename", help="default infile1_infile2") parser.add_option("-2", "--ancenstral_or_derived", dest="ancenstral_or_derived", default="d", help="ancenstral(a) or derived(d)") (options, args) = parser.parse_args() mindeptojudgefix = 15 ##################### VCFobj = {} vcfnameKEY_depthfilename_titlenameVALUE_tojudgeancestrall = {} vcfnameKEY_depthobjVALUE_tojudgeancestral = {} VCFobj["wigeon"] = VCFutil.VCF_Data( "/home/bioinfo/liurui/data/vcffiles/uniqmap/taihudomesticgoose/taihudomesticgoose.pool.withindel.vcf" ) VCFobj["fanya"] = VCFutil.VCF_Data( "/home/bioinfo/liurui/data/vcffiles/uniqmap/fanya/fanya._pool.withindel.vcf" ) vcfnameKEY_depthfilename_titlenameVALUE_tojudgeancestrall[ "wigeon"] = Util.GATK_depthfile( "/home/bioinfo/liurui/data/depth/g_j_sm_k_l_y_f_w_pool/gjsmklyfw_gatk.depth", "/home/bioinfo/liurui/data/depth/g_j_sm_k_l_y_f_w_pool/gjsmklyfw_gatk.depth.index" ) #here is a temp trick not a error vcfnameKEY_depthfilename_titlenameVALUE_tojudgeancestrall[ "fanya"] = Util.GATK_depthfile( "/home/bioinfo/liurui/data/depth/g_j_sm_k_l_y_f_w_pool/gjsmklyfw_gatk.depth", "/home/bioinfo/liurui/data/depth/g_j_sm_k_l_y_f_w_pool/gjsmklyfw_gatk.depth.index" ) vcfnameKEY_depthobjVALUE_tojudgeancestral["wigeon"] = [