"1": 42145699, '2': 49200776, '3': 50652576, "4": 40408058, "5": 47253416, "6": 36015257, "7": 35964515, "8": 40690061, "9": 58970518 } winsize = int(sys.argv[6]) reff = open(sys.argv[1], 'r') try: refidx = pickle.load(open(sys.argv[1] + ".myfasteridx", 'rb')) except IOError: Util.generateFasterRefIndex(sys.argv[1], sys.argv[1] + ".myfasteridx") refidx = pickle.load(open(sys.argv[1] + ".myfasteridx", 'rb')) vcftools = "vcftools" gapf = open(sys.argv[3], 'r') scoredsnp = open(sys.argv[4], 'r') scoredsnp.readline() sitesingap = open(sys.argv[5], 'w') if __name__ == '__main__': win = Util.Window() i = 0 interferf = open(sys.argv[5] + ".InterferingTEMP", 'w') for gapregion in gapf: i += 1 filledsites = [] gaplist = re.split(r"\s+", gapregion.strip()) if not os.path.exists(sys.argv[5] + "temp" + str(i) + ".recode.vcf"):
action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") (options, args) = parser.parse_args() refFastaName1 = options.reffa[0] refFastaName2 = options.reffa[1] reffastaidxName1 = refFastaName1 + ".myfasteridx" reffastaidxName2 = refFastaName2 + ".myfasteridx" try: refidxByChr2 = pickle.load(open(reffastaidxName2, 'rb')) refidxByChr1 = pickle.load(open(reffastaidxName1, 'rb')) except IOError: Util.generateFasterRefIndex(refFastaName1, reffastaidxName1) Util.generateFasterRefIndex(refFastaName2, reffastaidxName2) refidxByChr1 = pickle.load(open(reffastaidxName1, 'rb')) refidxByChr2 = pickle.load(open(reffastaidxName2, 'rb')) commsample_idxlistinM = [] commsample_idxlistinV = [] degenerateM = { "R": "AG", "Y": "CT", "M": "AC", "K": "GT", "S": "GC", "W": "AT", "A": "AA", "T": "TT", "C": "CC",
# if options.depthfile!=None: # print(options.depthfile,"no need") # originalspeciesref=options.ancenstralref # colname=re.search(r'[^/]*$',originalspeciesref).group(0) # colname=re.sub(r"[^\w^\d]","_",colname);colname=colname[:10] # print(colname) # ancestralalleletabletools.dbvariant.operateDB("callproc", "mysql_sp_add_column", data=(ancestralalleletabletools.dbvariant, toplevelsnptablename, colname, "char(128)", "default null")) OUTFILENAME = "ducksnpflankseq.fa" outfile = open(options.chromlistfilename + "snpflankseq.fa", 'w') duckrefhandler = open(options.ref, 'r') try: duckrefindex = pickle.load(open(options.ref + ".myfasteridx", 'rb')) # originalspeciesindex = pickle.load(open(originalspeciesref + ".myindex", 'rb')) except IOError: Util.generateFasterRefIndex(options.ref, options.ref + ".myfasteridx") duckrefindex = pickle.load(open(options.ref + ".myfasteridx", 'rb')) # try: # originalspeciesindex = pickle.load(open(originalspeciesref + ".myindex", 'rb')) # except IOError: # Util.generateIndexByChrom(originalspeciesref, originalspeciesref + ".myindex") # originalspeciesindex = pickle.load(open(originalspeciesref + ".myindex", 'rb')) chrom_lenlist = [] chromlistfile = open(options.chromlistfilename, "r") for chrrow in chromlistfile: chrrowlist = re.split(r'\s+', chrrow.strip()) chrom_lenlist.append( (chrrowlist[0].strip(), int(chrrowlist[1].strip()))) for currentchrID, currentchrLen in chrom_lenlist:
for chrlist, vcflikeFileName, corresponding_ref, flanklen in options.variantfilewithref: chromlistfile = open(chrlist, "r") chrmap = {} for rec in chromlistfile: reclist = re.split(r'\s+', rec.strip()) chrmap[reclist[0]] = reclist[1] flanklen = int(flanklen) duckrefhandler = open(corresponding_ref, 'r') try: duckrefindex = pickle.load( open(corresponding_ref + ".myfasteridx", 'rb')) # originalspeciesindex = pickle.load(open(originalspeciesref + ".myindex", 'rb')) except IOError: Util.generateFasterRefIndex(corresponding_ref, corresponding_ref + ".myfasteridx", chrsignal=options.chrsignal) duckrefindex = pickle.load( open(corresponding_ref + ".myfasteridx", 'rb')) vcflikefile = open(vcflikeFileName, 'r') vcflinesalchr = vcflikefile.readlines() #1,read variations chrom = None snpsOfOneChrom = [] startpostocollecteSNP = 1 while vcflinesalchr: snpline = vcflinesalchr.pop(0).strip() if snpline[0] == "#" or snpline.lower().find("chrom") == 0: #title continue else: