def mito(args): """ %prog mito chrM.fa input.bam Identify mitochondrial deletions. """ p = OptionParser(mito.__doc__) p.set_aws_opts(store="hli-mv-data-science/htang/mito-deletions") p.add_option("--realignonly", default=False, action="store_true", help="Realign only") p.add_option("--svonly", default=False, action="store_true", help="Run Realign => SV calls only") p.add_option("--support", default=1, type="int", help="Minimum number of supporting reads") p.set_home("speedseq", default="/mnt/software/speedseq/bin") p.set_cpus() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) chrMfa, bamfile = args store = opts.output_path cleanup = not opts.nocleanup if not op.exists(chrMfa): logging.debug("File `{}` missing. Exiting.".format(chrMfa)) return chrMfai = chrMfa + ".fai" if not op.exists(chrMfai): cmd = "samtools index {}".format(chrMfa) sh(cmd) if not bamfile.endswith(".bam"): bamfiles = [x.strip() for x in open(bamfile)] else: bamfiles = [bamfile] if store: computed = ls_s3(store) computed = [op.basename(x).split('.')[0] for x in computed if \ x.endswith(".depth")] remaining_samples = [x for x in bamfiles \ if op.basename(x).split(".")[0] not in computed] logging.debug("Already computed on `{}`: {}".\ format(store, len(bamfiles) - len(remaining_samples))) bamfiles = remaining_samples logging.debug("Total samples: {}".format(len(bamfiles))) for bamfile in bamfiles: run_mito(chrMfa, bamfile, opts, realignonly=opts.realignonly, svonly=opts.svonly, store=store, cleanup=cleanup)
def batchlobstr(args): """ %prog batchlobstr samples.csv Run lobSTR sequentially on list of samples. Each line contains: sample-name,s3-location """ p = OptionParser(batchlobstr.__doc__) p.add_option("--sep", default=",", help="Separator for building commandline") p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/") p.set_aws_opts(store="hli-mv-data-science/htang/str-data") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (samplesfile,) = args store = opts.output_path computed = ls_s3(store) fp = open(samplesfile) skipped = total = 0 for row in fp: total += 1 sample, s3file = row.strip().split(",")[:2] exec_id, sample_id = sample.split("_") bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam") gzfile = sample + ".{0}.vcf.gz".format("hg38") if gzfile in computed: skipped += 1 continue print( opts.sep.join( "python -m jcvi.variation.str lobstr".split() + [ "hg38", "--input_bam_path", bamfile, "--output_path", store, "--sample_id", sample_id, "--workflow_execution_id", exec_id, "--lobstr_home", opts.lobstr_home, "--workdir", opts.workdir, ] ) ) fp.close() logging.debug("Total skipped: {0}".format(percentage(skipped, total)))
def batchlobstr(args): """ %prog batchlobstr samples.csv Run lobSTR sequentially on list of samples. Each line contains: sample-name,s3-location """ p = OptionParser(batchlobstr.__doc__) p.add_option("--sep", default=",", help="Separator for building commandline") p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/") p.set_aws_opts(store="hli-mv-data-science/htang/str-data") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) samplesfile, = args store = opts.output_path computed = ls_s3(store) fp = open(samplesfile) skipped = total = 0 for row in fp: total += 1 sample, s3file = row.strip().split(",")[:2] exec_id, sample_id = sample.split("_") bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam") gzfile = sample + ".{0}.vcf.gz".format("hg38") if gzfile in computed: skipped += 1 continue print opts.sep.join( "python -m jcvi.variation.str lobstr".split() + [ "hg38", "--input_bam_path", bamfile, "--output_path", store, "--sample_id", sample_id, "--workflow_execution_id", exec_id, "--lobstr_home", opts.lobstr_home, "--workdir", opts.workdir, ] ) fp.close() logging.debug("Total skipped: {0}".format(percentage(skipped, total)))
def batchlobstr(args): """ %prog batchlobstr samples.csv Run lobSTR sequentially on list of samples. Each line contains: sample-name,s3-location """ p = OptionParser(batchlobstr.__doc__) p.add_option("--sep", default=",", help="Separator for building commandline") p.set_aws_opts(store="hli-mv-data-science/htang/str") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) samplesfile, = args store = opts.store computed = ls_s3(store) fp = open(samplesfile) skipped = total = 0 for row in fp: total += 1 sample, s3file = row.strip().split(",")[:2] bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam") gzfile = sample + ".{0}.vcf.gz".format("hg38") if gzfile in computed: skipped += 1 continue print opts.sep.join("python -m jcvi.variation.str lobstr".split() + \ [bamfile, "hg38", "--prefix", sample, "--workdir", opts.workdir, "--cleanup", "--store", opts.store]) fp.close() logging.debug("Total skipped: {0}".format(percentage(skipped, total)))