def main(): if len(sys.argv) != 4: drdcommon.error("Wrong # of args", usage) pattern = sys.argv[1] re_id = sys.argv[2] # Regular expression to extract id f_dir = sys.argv[3] l_ids = [] l = lambda:defaultdict(l) h = l() # hold all data in mem files_to_iterate = drdcommon.files_in_dir(f_dir, pattern) sys.stderr.write("# of files to process: " + str(len(files_to_iterate)) + "\n") for fn in files_to_iterate: try: sid = re.search(re_id, fn).group(1) l_ids.append(sid) except: raise(Exception('Problems extracting id using regular expression.')) load_data(sid, fn, h) # print header out("chrm start ") for _id in l_ids: out("%s " % _id) out("\n") for chrm, one in h.items(): for start, two in one.items(): out("%s %s " % (str(chrm), str(start))) for sid, nr in two.items(): out(str(nr[1]) + " ") out("\n")
def main(): args = parse_args() stream = drdcommon.xopen("-") if not drdcommon.data_in_stdin(): drdcommon.error(usage) print Saturation(stream, args.at_least_seen).csv("\t") stream.close()
def __init__(self, h): self.set_attr(h) if self.id == '': common.error('I cannot create a bam without id ') if not self.valid_path(): common.error('Invalid path [%s] for bam with id = [%s]' % (self.path, self.id))
def main(): if len(sys.argv) == 1: fd_reads = drdcommon.xopen("-") do_work(fd_reads) fd_reads.close() else: drdcommon.error("Incorrect # of params.", usage)
def main(): if len(sys.argv) != 3: drdcommon.error("Wrong # of args", usage) pattern = sys.argv[1] re_id = sys.argv[2] # Regular expression to extract id l = lambda: defaultdict(l) h = l() l_ids = [] for fn in drdcommon.files_in_dir(".", pattern): try: sid = re.search(re_id, fn).group(1) l_ids.append(sid) except: raise ( Exception('Problems extracting id using regular expression.')) load_data(sid, fn, h) # print header sys.stdout.write("chrm coor type num_over_zero ") for _id in l_ids: sys.stdout.write("%s " % _id) print "" for t, one in h.items(): for chrm, two in one.items(): for coor, three in two.items(): sys.stdout.write("%s %s %s %s " % (str(chrm), str(coor), t, len(three))) for sid in l_ids: if sid in three: sys.stdout.write(str(three[sid]) + " ") else: sys.stdout.write("0 ") print ""
def main(): if len(sys.argv) == 5: fn_sam, fn_hits = sys.argv[1:3] min_mapq, pm_hits = [int(i) for i in sys.argv[3:]] ValidateChip(fn_sam, fn_hits, min_mapq, pm_hits).do_work() else: drdcommon.error("Incorrect # of params.", usage)
def main(): if len(sys.argv) == 5: fn_sam, fn_hits = sys.argv[1:3] min_mapq, pm_hits = [ int(i) for i in sys.argv[3:]] ValidateChip(fn_sam, fn_hits, min_mapq, pm_hits).do_work() else: drdcommon.error("Incorrect # of params.", usage)
def main(): if len(sys.argv) != 3: drdcommon.error("Wrong # of args", usage) pattern = sys.argv[1] re_id = sys.argv[2] # Regular expression to extract id l = lambda:defaultdict(l) h = l() l_ids = [] for fn in drdcommon.files_in_dir(".", pattern): try: sid = re.search(re_id, fn).group(1) l_ids.append(sid) except: raise(Exception('Problems extracting id using regular expression.')) load_data(sid, fn, h) # print header sys.stdout.write("chrm coor type num_over_zero ") for _id in l_ids: sys.stdout.write("%s " % _id) print "" for t, one in h.items(): for chrm, two in one.items(): for coor, three in two.items(): sys.stdout.write("%s %s %s %s " % (str(chrm), str(coor), t, len(three))) for sid in l_ids: if sid in three: sys.stdout.write(str(three[sid]) + " ") else: sys.stdout.write("0 ") print ""
def main(): if len(sys.argv) != 1: drdcommon.error("Wrong # of args", usage) if not drdcommon.data_in_stdin(): drdcommon.error("No data in stdin.", usage) fd_vcf = drdcommon.xopen("-") do_work(fd_vcf) fd_vcf.close()
def main(): if len(sys.argv) != 2: drdcommon.error("Wrong # of args", usage) if not drdcommon.data_in_stdin(): drdcommon.error("No data in stdin.", usage) ratios_stream = drdcommon.xopen("-") threshold = float(sys.argv[1]) CnvStateMachine(ratios_stream, threshold).run()
def check_input(): if len(sys.argv) != 3: drdcommon.error("Wrong # of args", usage) bam_fn, probes_fn = sys.argv[1], sys.argv[2] logging.info("bam: %s probes: %s" % (bam_fn, probes_fn)) if not os.path.isfile(bam_fn): drdcommon.error("Invalid bam file.", usage) return bam_fn, probes_fn
def __load_vcf(self): self.vcf = Vcf(self.fd_vcf) self.vcf.load_meta_header() if self.drop and (not self.coordinates_in_file and self.vcf.num_of_samples < 2): drdcommon.error( "I need a population level vcf in order to drop species snps.")
def main(): if len(sys.argv) == 2: pattern = sys.argv[1] data = load_data(pattern) boxplot(data, title=pattern, y_limit=50) else: drdcommon.error("Incorrect # of params.", usage)
def main(): if len(sys.argv) == 3: df = pd.read_table(sys.argv[1]) title = "MAF CRV" labels = ["0-0.05", "0.05-0.1", "0.1-0.15", "0.15-0.2", "0.2-0.25", "0.25-0.3", "0.3-0.35", "0.35-0.4", "0.4-0.45", "0.45-0.5" ] drdplots.barplot(df.counts, labels, title, ofn=sys.argv[2]) else: drdcommon.error("Wrong number of args. Need input tsv file and output png.")
def main(): if not drdcommon.data_in_stdin(): drdcommon.error("I need a data stream in stdin.", usage=_usage) if not len(sys.argv) == 4: drdcommon.error("Wrong number of parameters", usage=_usage) title, _xl, _yl = sys.argv[1:] x, y = process_data(drdcommon.xopen("-")) plot(x, y, title, xlabel=_xl, ylabel=_yl)
def main(): if not drdcommon.data_in_stdin(): drdcommon.error("I need a data stream in stdin.", usage="-") if not len(sys.argv) == 2: drdcommon.error("Wrong number of parameters", usage="-") title = sys.argv[1] x, y = process_data(drdcommon.xopen("-")) plot(x, y, title, xlabel="genomic window", ylabel="Average Read Depth")
def main(): if len(sys.argv) == 4: fd_vcf = drdcommon.xopen("-") fd_csv = drdcommon.xopen(sys.argv[1]) do_work(fd_vcf, fd_csv) fd_vcf.close() fd_csv.close() else: drdcommon.error("Incorrect # of params.", usage)
def main(): if len(sys.argv) == 2: drdcommon.error("Wrong # of args", usage) if drdcommon.data_in_stdin() == False: drdcommon.error("Need data in stdin.", usage) fd_vcf = drdcommon.xopen("-") do_work(fd_vcf) fd_vcf.close()
def main(): if len(sys.argv) != 2: drdcommon.error("Wrong # of args", usage) if not drdcommon.data_in_stdin(): drdcommon.error("No data in stdin.", usage) fd_vcf = drdcommon.xopen("-") w_size = int(sys.argv[1]) do_work(fd_vcf, w_size) fd_vcf.close()
def main(): if len(sys.argv) != 2: drdcommon.error("Wrong # of args", usage) if drdcommon.data_in_stdin() == False: drdcommon.error("Need data in stdin.", usage) min_num_samples = int(sys.argv[1]) fd_vcf = drdcommon.xopen("-") do_work(fd_vcf, min_num_samples) fd_vcf.close()
def main(): if len(sys.argv) == 3: logratios = process_data(drdcommon.xopen("-")) bin_nums = range(1, len(logratios)+1) title = sys.argv[1] output_fn = sys.argv[2] plot(output_fn, bin_nums, logratios, title, xlabel="bin #", ylabel="log2ratios (sample/control)") else: drdcommon.error("Wrong number of args. <title> <output.filename>")
def main(): if len(sys.argv) == 3: fd_vcf = drdcommon.xopen("-") fd_pheno_tsv = drdcommon.xopen(sys.argv[1]) fd_haplo_tsv = drdcommon.xopen(sys.argv[2]) do_work(fd_vcf, fd_pheno_tsv, fd_haplo_tsv) fd_vcf.close() fd_pheno_tsv.close() fd_haplo_tsv.close() else: drdcommon.error("Incorrect # of params.", usage)
def main(): if len(sys.argv) == 1: fd = drdcommon.xopen("-") std, counts = process_data(fd) title = "std dev freq of var allele ratios" drdplots.scatter_plot("std.dist.png", std, log_it(counts, 10), title=title, xlabel="std deviation", ylabel="log10(counts)", dot_size=10) fd.close() else: drdcommon.error("Wrong number of args. Just need std values in stdin.")
def main(): if len(sys.argv) == 3: logratios = process_data(drdcommon.xopen("-")) bin_nums = range(1, len(logratios) + 1) title = sys.argv[1] output_fn = sys.argv[2] plot(output_fn, bin_nums, logratios, title, xlabel="bin #", ylabel="log2ratios (sample/control)") else: drdcommon.error("Wrong number of args. <title> <output.filename>")
def main(): if len(sys.argv) != 2: drdcommon.error("Wrong # of args", usage) if not drdcommon.data_in_stdin(): drdcommon.error("No data in stdin.", usage) windows = drdcommon.xopen("-") bam_name = sys.argv[1] if not os.path.isfile(bam_name): drdcommon.error("Invalid bam file.", usage) compute_ratios(windows, bam_name)
def main(): if len(sys.argv) != 4: drdcommon.error("Wrong # of args", usage) n_reads, bam_name, target_chrm = int(sys.argv[1]), sys.argv[2], sys.argv[3] if not os.path.isfile(bam_name): drdcommon.error("Invalid bam file.", usage) samfile = pysam.Samfile(bam_name, "rb") chrms, chrm_name_to_length = gen_chrm_lenghts(samfile) if not target_chrm in chrm_name_to_length: samfile.close() drdcommon.error("Chrm not present in bam header.", usage) print_bins(samfile, n_reads, bam_name, target_chrm, chrm_name_to_length) samfile.close()
def main(): if len(sys.argv) == 3: sam_fn, pattern_fn_hits = sys.argv[1:] JoinData(sam_fn, pattern_fn_hits).do_work() else: drdcommon.error("Incorrect # of params.", usage)
def __init__(self, fn): self.df = pd.read_table(fn) for c in self.MANDATORY_COLS: if c not in self.df.columns: common.error("I couldn't find column [%s] in project tsv" % c)
def main(): if len(sys.argv) == 2: pattern = sys.argv[1] do_work(pattern) else: drdcommon.error("Incorrect # of params.", usage)
def __load_vcf(self): self.vcf = Vcf(self.fd_vcf) self.vcf.load_meta_header() if self.drop and (not self.coordinates_in_file and self.vcf.num_of_samples < 2): drdcommon.error("I need a population level vcf in order to drop species snps.")
def check_logic(self): options, args = self.options, self.args if not options.vcf_fn: drdcommon.error("Need vcf file.") if options.vcf_fn == '-' and not drdcommon.data_in_stdin(): drdcommon.error("No data in stdin.") if not options.vcf_fn == '-' and not os.path.isfile(options.vcf_fn): drdcommon.error("Vcf file does not exists.") if options.coor_fn: if options.coor_fn == '-' and options.vcf_fn == '-': drdcommon.error("I cannot read two streams from stdin.") if not os.path.isfile(options.coor_fn): drdcommon.error("coor file does not exists.") options.drop = True options.coordinates_in_file = True else: options.coordinates_in_file = False if len(args) == 0: if options.wes: self.exp_type = 'wes' elif options.wgs: self.exp_type = 'wgs' else: self.exp_type = 'null' if not self.options.list_s_snps: drdcommon.error("Experiment type not set.") else: drdcommon.error("Incorrect # of params.")