def vcf2pd_main(args): """ Run the program """ args = parse_args(args) vcf_fn = args.vcf_fn # We'll require regions to start regions = [] if args.regions is None: regions.append((None, None, None, 'all')) else: with open(args.regions, 'r') as fh: for line in fh: data = line.strip().split('\t') data[1] = int(data[1]) data[2] = int(data[2]) regions.append(data[:4]) items = [] for chrom, start, end, name in regions: if name is not None: oname = "%s%s%s.jl" % (args.out, chrom, name) else: oname = args.out + ".jl" items.append({"input": vcf_fn, "parser": args.parser, "output": oname, "chrom": chrom, "start": start, "end": end}) # must iterate once to ensure we got the results made [_ for _ in vpq.fchain([task], items, args.threads)]
def main(): # build an fchain data = range(10) pipe = [(f1, {"f1arg": 'a'}), (f2, {"f2arg": 'boop'}), f3] threads = 2 if len(sys.argv) == 1 else int(sys.argv[1]) for i in vpq.fchain(pipe, data, workers=threads): print("collected %r" % (i))
def __init__(self, vpds, threads): super().__init__() pipe = [vpq.jl_load, vpq.sample_gt_count] self.result = defaultdict(Counter) # consolidate for piece in vpq.fchain(pipe, vpds, threads): for samp in piece["samples"]: self.result[samp].update(piece["sample_gt_count"][samp])
def __init__(self, vpds, threads=1): super().__init__() pipe = [vpq.jl_load, vpq.categorize_sv, # This allows for some pieces to still be around, but as zeros... vpq.add_cnt_column, vpq.add_sizebin_column, (vpq.groupcnt, {"key": "sztype_count", "group": ["szbin", "svtype"]})] # we have to make szbins a series so pd won't reorder it self.result = pd.MultiIndex.from_product([pd.Series(vpq.SZBINS), range(len(vpq.SV))]).to_frame() self.result["cnt"] = 0 for _ in vpq.fchain(pipe, vpds, threads): self.result["cnt"] += _["sztype_count"].fillna(0)
def __init__(self, vpds, threads=1): super().__init__() pipe = [vpq.jl_load, vpq.add_cnt_column, (vpq.groupcnt, {"key": "type_count", "group": ["svtype"]}) ] # Consolidate self.result = pd.DataFrame(np.zeros(len(vpq.SV), dtype=int), columns=["cnt"], index=range(len(vpq.SV))) total = len(vpds) for piece in vpq.fchain(pipe, vpds, threads): self.result = self.result.add(piece["type_count"].fillna(0), axis=0) self.result = self.result.fillna(0)
def __init__(self, vpds, threads=1): super().__init__() pipe = [vpq.jl_load, vpq.add_cnt_column, vpq.add_qualbin_column, (vpq.groupcnt, {"key": "qualbin_count", "group": ["qualbin"]}) ] # consolidate self.result = pd.DataFrame(np.zeros(len(vpq.QUALBINS), dtype=int), columns=["cnt"], index=vpq.QUALBINS) for piece in vpq.fchain(pipe, vpds, threads): self.result = self.result.add(piece["qualbin_count"].fillna(0), axis=0) self.result = self.result.fillna(0)