def main(args): print "INFO\t" + now() + "\tstarting " + sys.argv[0] + " called with args: " + ' '.join(sys.argv) + "\n" tmpbams = [] # temporary BAMs, each holds the realigned reads for one mutation exclfns = [] # 'exclude' files store reads to be removed from the original BAM due to deletions if not os.path.exists(args.bamFileName + '.bai'): sys.stderr.write("ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n") sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, None, sv=True) # load insertion library if present try: if args.inslib is not None: print "INFO\t" + now() + "\tloading insertion library from " + args.inslib args.inslib = load_inslib(args.inslib) except Exception, e: sys.stderr.write("ERROR\t" + now() + "\tfailed to load insertion library " + args.inslib + "\n") traceback.print_exc(file=sys.stderr) sys.stderr.write("\n") sys.exit(1)
def main(args): print "INFO\t" + now( ) + "\tstarting " + sys.argv[0] + " called with args: " + ' '.join( sys.argv) + "\n" tmpbams = [ ] # temporary BAMs, each holds the realigned reads for one mutation exclfns = [ ] # 'exclude' files store reads to be removed from the original BAM due to deletions if not os.path.exists(args.bamFileName + '.bai'): sys.stderr.write( "ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n") sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, None, sv=True) # load insertion library if present try: if args.inslib is not None: print "INFO\t" + now( ) + "\tloading insertion library from " + args.inslib args.inslib = load_inslib(args.inslib) except Exception, e: sys.stderr.write("ERROR\t" + now() + "\tfailed to load insertion library " + args.inslib + "\n") traceback.print_exc(file=sys.stderr) sys.stderr.write("\n") sys.exit(1)
def main(args): logger.info("starting %s called with args: %s" % (sys.argv[0], ' '.join(sys.argv))) bedfile = open(args.varFileName, 'r') reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + '.bai'): logger.error("input bam must be indexed, not .bai file found for %s" % args.bamFileName) sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addsnv." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, 'rb') outbam_muts = pysam.Samfile(outbam_mutsfile, 'wb', template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) logger.info("created tmp directory: %s" % args.tmpdir) if not os.path.exists('addsnv_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addsnv_logs_' + os.path.basename(args.outBamFile)) logger.info("created directory: addsnv_logs_%s" % os.path.basename(args.outBamFile)) assert os.path.exists('addsnv_logs_' + os.path.basename(args.outBamFile) ), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 targets = [] for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() target = { 'chrom': c[0], 'start': int(c[1]), 'end': int(c[2]), 'vaf': None, 'altbase': None } # VAF is 4th column, if present if len(c) > 3: target['vaf'] = float(c[3]) # ALT is 5th column, if present if len(c) == 5: altbase = c[4].upper() assert altbase in ['A', 'T', 'C', 'G'], "ERROR:\t" + now( ) + "\tALT " + altbase + " not A, T, C, or G!\n" target['altbase'] = altbase targets.append(target) ntried += 1 targets = sorted(targets, key=itemgetter( 'chrom', 'start')) # sort list of dicts by chrom, start haploclusters = [] hc = [] lastchrom = None laststart = None hapsize = int(args.haplosize) for target in targets: if lastchrom is None: lastchrom = target['chrom'] laststart = target['start'] hc.append(target) elif target['chrom'] == lastchrom: if laststart is None: laststart = target['start'] hc.append(target) elif target['start'] - laststart < hapsize: hc.append(target) else: haploclusters.append(hc) hc = [] hc.append(target) elif target['chrom'] != lastchrom: haploclusters.append(hc) hc = [] laststart = None hc.append(target) haploclusters.append(hc) #print "Debug, haploclusters:" + str(haploclusters) for hc in haploclusters: # make mutation (submit job to thread pool) result = pool.apply_async(makemut, [args, hc, avoid, alignopts]) results.append(result) for result in results: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: if os.path.exists(tmpbam): tmpbams.append(tmpbam) if len(tmpbams) == 0: logger.error("no succesful mutations") sys.exit() # merge tmp bams if len(tmpbams) == 1: move(tmpbams[0], outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams, outbam_mutsfile, maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + '.bai'): os.remove(bam + '.bai') if args.skipmerge: logger.info("skipping merge, plase merge reads from %s manually." % outbam_mutsfile) else: if args.tagreads: from bamsurgeon.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(outbam_mutsfile, tmp_tag_bam) move(tmp_tag_bam, outbam_mutsfile) logger.info("tagged reads.") logger.info("done making mutations, merging mutations into %s --> %s" % (args.bamFileName, args.outBamFile)) replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) #cleanup os.remove(outbam_mutsfile)
def main(args): print "INFO\t" + now( ) + "\tstarting " + sys.argv[0] + " called with args: " + ' '.join( sys.argv) + "\n" bedfile = open(args.varFileName, 'r') reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + '.bai'): sys.stderr.write( "ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n") sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addindel." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, 'rb') outbam_muts = pysam.Samfile(outbam_mutsfile, 'wb', template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) print "INFO\t" + now() + "\tcreated tmp directory: " + args.tmpdir if not os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addindel_logs_' + os.path.basename(args.outBamFile)) print "created directory: addindel_logs_" + os.path.basename( args.outBamFile) assert os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile) ), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() chrom = c[0] start = int(c[1]) end = int(c[2]) vaf = float(c[3]) type = c[4] ins = None assert type in ('INS', 'DEL') if type == 'INS': ins = c[5] # make mutation (submit job to thread pool) result = pool.apply_async( makemut, [args, chrom, start, end, vaf, ins, avoid, alignopts]) results.append(result) ntried += 1 for result in results: try: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: if os.path.exists(tmpbam): tmpbams.append(tmpbam) except AssertionError: print "****************************************************" print "* WARNING: assertion failed somewhere, check logs. *" print "****************************************************" if len(tmpbams) == 0: print "INFO\t" + now() + "\tno succesful mutations" sys.exit() tmpbams.sort() # merge tmp bams if len(tmpbams) == 1: os.rename(tmpbams[0], outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams, outbam_mutsfile, maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + '.bai'): os.remove(bam + '.bai') if args.skipmerge: print "INFO\t" + now( ) + "\tskipping merge, plase merge reads from", outbam_mutsfile, "manually." else: if args.tagreads: from bamsurgeon.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(outbam_mutsfile, tmp_tag_bam) move(tmp_tag_bam, outbam_mutsfile) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now( ) + "\tdone making mutations, merging mutations into", args.bamFileName, "-->", args.outBamFile replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) #cleanup os.remove(outbam_mutsfile)
def main(args): logger.info("starting %s called with args: %s" % (sys.argv[0], ' '.join(sys.argv))) bedfile = open(args.varFileName, 'r') reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + '.bai'): logger.error("input bam must be indexed, not .bai file found for %s" % args.bamFileName) sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addindel." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, 'rb') outbam_muts = pysam.Samfile(outbam_mutsfile, 'wb', template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) logger.info("created tmp directory: %s" % args.tmpdir) if not os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addindel_logs_' + os.path.basename(args.outBamFile)) logger.info("created directory: addindel_logs_%s" % os.path.basename(args.outBamFile)) assert os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile) ), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() chrom = c[0] start = int(c[1]) end = int(c[2]) vaf = float(c[3]) type = c[4] ins = None assert type in ('INS', 'DEL') if type == 'INS': ins = c[5] # make mutation (submit job to thread pool) result = pool.apply_async( makemut, [args, chrom, start, end, vaf, ins, avoid, alignopts]) results.append(result) ntried += 1 for result in results: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: if os.path.exists(tmpbam): tmpbams.append(tmpbam) if len(tmpbams) == 0: logger.error("no succesful mutations") sys.exit() tmpbams.sort() # merge tmp bams if len(tmpbams) == 1: os.rename(tmpbams[0], outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams, outbam_mutsfile, maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + '.bai'): os.remove(bam + '.bai') if args.skipmerge: logger.info("skipping merge, plase merge reads from %s manually." % outbam_mutsfile) else: if args.tagreads: from bamsurgeon.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(outbam_mutsfile, tmp_tag_bam) move(tmp_tag_bam, outbam_mutsfile) logger.info("tagged reads.") logger.info("done making mutations, merging mutations into %s --> %s" % (args.bamFileName, args.outBamFile)) replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) #cleanup os.remove(outbam_mutsfile) var_basename = '.'.join(os.path.basename(args.varFileName).split('.')[:-1]) bam_basename = '.'.join(os.path.basename(args.outBamFile).split('.')[:-1]) vcf_fn = bam_basename + '.addindel.' + var_basename + '.vcf' makevcf.write_vcf_indel( 'addindel_logs_' + os.path.basename(args.outBamFile), args.refFasta, vcf_fn) logger.info('vcf output written to ' + vcf_fn)
def main(args): print "INFO\t" + now() + "\tstarting " + sys.argv[0] + " called with args: " + ' '.join(sys.argv) + "\n" bedfile = open(args.varFileName, 'r') reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + '.bai'): sys.stderr.write("ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n") sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addindel." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, 'rb') outbam_muts = pysam.Samfile(outbam_mutsfile, 'wb', template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) print "INFO\t" + now() + "\tcreated tmp directory: " + args.tmpdir if not os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addindel_logs_' + os.path.basename(args.outBamFile)) print "created directory: addindel_logs_" + os.path.basename(args.outBamFile) assert os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile)), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() chrom = c[0] start = int(c[1]) end = int(c[2]) vaf = float(c[3]) type = c[4] ins = None assert type in ('INS', 'DEL') if type == 'INS': ins = c[5] # make mutation (submit job to thread pool) result = pool.apply_async(makemut, [args, chrom, start, end, vaf, ins, avoid, alignopts]) results.append(result) ntried += 1 for result in results: try: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: if os.path.exists(tmpbam): tmpbams.append(tmpbam) except AssertionError: print "****************************************************" print "* WARNING: assertion failed somewhere, check logs. *" print "****************************************************" if len(tmpbams) == 0: print "INFO\t" + now() + "\tno succesful mutations" sys.exit() tmpbams.sort() # merge tmp bams if len(tmpbams) == 1: os.rename(tmpbams[0],outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams,outbam_mutsfile,maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + '.bai'): os.remove(bam + '.bai') if args.skipmerge: print "INFO\t" + now() + "\tskipping merge, plase merge reads from", outbam_mutsfile, "manually." else: if args.tagreads: from bamsurgeon.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(outbam_mutsfile, tmp_tag_bam) move(tmp_tag_bam, outbam_mutsfile) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now() + "\tdone making mutations, merging mutations into", args.bamFileName, "-->", args.outBamFile replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) #cleanup os.remove(outbam_mutsfile)
def main(args): print "INFO\t" + now() + "\tstarting " + sys.argv[0] + " called with args: " + ' '.join(sys.argv) + "\n" bedfile = open(args.varFileName, 'r') reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + '.bai'): sys.stderr.write("ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n") sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addsnv." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, 'rb') outbam_muts = pysam.Samfile(outbam_mutsfile, 'wb', template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) print "INFO\t" + now() + "\tcreated tmp directory: " + args.tmpdir if not os.path.exists('addsnv_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addsnv_logs_' + os.path.basename(args.outBamFile)) print "INFO\t" + now() + "\tcreated directory: addsnv_logs_" + os.path.basename(args.outBamFile) assert os.path.exists('addsnv_logs_' + os.path.basename(args.outBamFile)), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 targets = [] for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() target = { 'chrom' : c[0], 'start' : int(c[1]), 'end' : int(c[2]), 'vaf' : None, 'altbase' : None } # VAF is 4th column, if present if len(c) > 3: target['vaf'] = float(c[3]) # ALT is 5th column, if present if len(c) == 5: altbase = c[4].upper() assert altbase in ['A','T','C','G'], "ERROR:\t" + now() + "\tALT " + altbase + " not A, T, C, or G!\n" target['altbase'] = altbase targets.append(target) ntried += 1 targets = sorted(targets, key=itemgetter('chrom', 'start')) # sort list of dicts by chrom, start haploclusters = [] hc = [] lastchrom = None laststart = None hapsize = int(args.haplosize) for target in targets: if lastchrom is None: lastchrom = target['chrom'] laststart = target['start'] hc.append(target) elif target['chrom'] == lastchrom: if laststart is None: laststart = target['start'] hc.append(target) elif target['start'] - laststart < hapsize: hc.append(target) else: haploclusters.append(hc) hc = [] hc.append(target) elif target['chrom'] != lastchrom: haploclusters.append(hc) hc = [] laststart = None hc.append(target) haploclusters.append(hc) print "Debug, haploclusters:" + str(haploclusters) for hc in haploclusters: # make mutation (submit job to thread pool) result = pool.apply_async(makemut, [args, hc, avoid, alignopts]) results.append(result) for result in results: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: if os.path.exists(tmpbam): tmpbams.append(tmpbam) if len(tmpbams) == 0: print "INFO\t" + now() + "\tno succesful mutations" sys.exit() # merge tmp bams if len(tmpbams) == 1: move(tmpbams[0],outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams,outbam_mutsfile,maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + '.bai'): os.remove(bam + '.bai') if args.skipmerge: print "INFO\t" + now() + "\tskipping merge, plase merge reads from", outbam_mutsfile, "manually." else: if args.tagreads: from bamsurgeon.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(outbam_mutsfile, tmp_tag_bam) move(tmp_tag_bam, outbam_mutsfile) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now() + "\tdone making mutations, merging mutations into", args.bamFileName, "-->", args.outBamFile replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) #cleanup os.remove(outbam_mutsfile)