Exemplo n.º 1
0
def vcf2pd_main(args):
    """
    Run the program
    """
    args = parse_args(args)
    vcf_fn = args.vcf_fn
    # We'll require regions to start
    regions = []
    if args.regions is None:
        regions.append((None, None, None, 'all'))
    else:
        with open(args.regions, 'r') as fh:
            for line in fh:
                data = line.strip().split('\t')
                data[1] = int(data[1])
                data[2] = int(data[2])
                regions.append(data[:4])
    items = []
    for chrom, start, end, name in regions:
        if name is not None:
            oname = "%s%s%s.jl" % (args.out, chrom, name)
        else:
            oname = args.out + ".jl"
        items.append({"input": vcf_fn, "parser": args.parser, "output": oname,
                      "chrom": chrom, "start": start, "end": end})
    # must iterate once to ensure we got the results made
    [_  for _ in vpq.fchain([task], items, args.threads)]
Exemplo n.º 2
0
def main():
    # build an fchain
    data = range(10)
    pipe = [(f1, {"f1arg": 'a'}), (f2, {"f2arg": 'boop'}), f3]
    threads = 2 if len(sys.argv) == 1 else int(sys.argv[1])
    for i in vpq.fchain(pipe, data, workers=threads):
        print("collected %r" % (i))
Exemplo n.º 3
0
 def __init__(self, vpds, threads):
     super().__init__()
     pipe = [vpq.jl_load,
             vpq.sample_gt_count]
     self.result = defaultdict(Counter)
     # consolidate
     for piece in vpq.fchain(pipe, vpds, threads):
         for samp in piece["samples"]:
             self.result[samp].update(piece["sample_gt_count"][samp])
Exemplo n.º 4
0
 def __init__(self, vpds, threads=1):
     super().__init__()
     pipe = [vpq.jl_load,
             vpq.categorize_sv, # This allows for some pieces to still be around, but as zeros...
             vpq.add_cnt_column,
             vpq.add_sizebin_column,
             (vpq.groupcnt, {"key": "sztype_count", "group": ["szbin", "svtype"]})]
     # we have to make szbins a series so pd won't reorder it
     self.result = pd.MultiIndex.from_product([pd.Series(vpq.SZBINS), range(len(vpq.SV))]).to_frame()
     self.result["cnt"] = 0
     for _ in vpq.fchain(pipe, vpds, threads):
         self.result["cnt"] += _["sztype_count"].fillna(0)
Exemplo n.º 5
0
 def __init__(self, vpds, threads=1):
     super().__init__()
     pipe = [vpq.jl_load,
             vpq.add_cnt_column,
             (vpq.groupcnt, {"key": "type_count", "group": ["svtype"]})
             ]
     # Consolidate
     self.result = pd.DataFrame(np.zeros(len(vpq.SV), dtype=int),
                                columns=["cnt"],
                                index=range(len(vpq.SV)))
     total = len(vpds)
     for piece in vpq.fchain(pipe, vpds, threads):
         self.result = self.result.add(piece["type_count"].fillna(0), axis=0)
     self.result = self.result.fillna(0)
Exemplo n.º 6
0
    def __init__(self, vpds, threads=1):
        super().__init__()
        pipe = [vpq.jl_load,
                vpq.add_cnt_column,
                vpq.add_qualbin_column,
                (vpq.groupcnt, {"key": "qualbin_count", "group": ["qualbin"]})
                ]
        # consolidate
        self.result = pd.DataFrame(np.zeros(len(vpq.QUALBINS), dtype=int),
                                   columns=["cnt"],
                                   index=vpq.QUALBINS)

        for piece in vpq.fchain(pipe, vpds, threads):
            self.result = self.result.add(piece["qualbin_count"].fillna(0), axis=0)
        self.result = self.result.fillna(0)