예제 #1
0
파일: homog.py 프로젝트: nloyfer/wgbs_tools
def main():
    """
    Generage homog files. Given a blocks file and pat[s],
    count the number of U,X,M reads for each block for each file
    """

    args = parse_args()
    if args.nr_bits not in (8 , 16):
        raise IllegalArgumentError('nr_bits must be in {8, 16}')
    if args.rlen < 3:
        raise IllegalArgumentError('rlen must be >= 3')
    if args.thresholds is not None:
        th = args.thresholds.split(',')
        if not len(th) == 2: # and th[0].is_number():
            raise IllegalArgumentError('Invalid thresholds')
        th = float(th[0]), float(th[1])
        if not (1 > th[1] > th[0] > 0):
            raise IllegalArgumentError('Invalid thresholds')
    # make sure homog tool is valid:
    validate_local_exe(homog_tool)

    pats = args.input_files
    validate_file_list(pats, '.pat.gz')

    outdir, prefix = parse_outdir_prefix(args)

    # load blocks:
    blocks_df = load_blocks_file(args.blocks_file)
    is_nice, msg = is_block_file_nice(blocks_df)
    if not is_nice:
        homog_log(msg)
        raise IllegalArgumentError(f'Invalid blocks file: {args.blocks_file}')

    for pat in sorted(pats):
        homog_process(pat, blocks_df, args, outdir, prefix)
예제 #2
0
파일: cview.py 프로젝트: nloyfer/wgbs_tools
def view_gr(pat, args, get_cmd=False):
    validate_single_file(pat, '.pat.gz')
    gr = GenomicRegion(args)
    if gr.is_whole():
        s = 1
        e = gr.genome.get_nr_sites() + 1
        cmd = f'gunzip -c {pat} '
    else:
        s, e = gr.sites
        ms = max(1, s - MAX_PAT_LEN)
        cmd = f'tabix {pat} {gr.chrom}:{ms}-{e - 1} '

    view_flags = set_view_flags(args)
    cmd += f' | {cview_tool} --sites "{s}\t{e}" ' + view_flags
    if hasattr(
            args,
            'sub_sample') and args.sub_sample is not None:  # sub-sample reads
        validate_local_exe(pat_sampler)
        cmd += f' | {pat_sampler} {args.sub_sample} '
    if not gr.is_whole():
        cmd += f' | sort -k2,2n -k3,3 '
    cmd += f' | {collapse_pat_script} - '
    if get_cmd:
        return cmd
    if args.out_path is not None:
        cmd += f' > {args.out_path}'
    subprocess_wrap_sigpipe(cmd)
예제 #3
0
def add_bed_to_cpgs(site_file, genome, out_path=None):
    validate_local_exe(add_loci_tool)
    g = GenomeRefPaths(genome)
    cmd = f'cat {site_file} | {add_loci_tool} {g.dict_path} {g.chrom_cpg_sizes}'
    if (out_path is not None) and out_path != sys.stdout:
        cmd += f' > {out_path}'
    subprocess.check_call(cmd, shell=True)
예제 #4
0
def main():
    """
    Generate a beta file from a pat file
    """
    args = parse_args()
    validate_local_exe(pat2beta_tool)
    for pat in args.pat_paths:
        pat2beta(pat, args.out_dir, args, args.force)
예제 #5
0
def main():
    """
    Segment the genome, or a subset region, to homogenously methylated blocks.
    Input: one or more beta files to segment
    Output: blocks file (BED format + startCpG, endCpG columns)
    """
    args = parse_args()
    validate_local_exe(segment_tool)
    betas = parse_betas_input(args)
    SegmentByChunks(args, betas).run()
예제 #6
0
파일: cview.py 프로젝트: nloyfer/wgbs_tools
def main():
    """ view pat file with the c++ engine """
    parser = parse_args()
    args = parser.parse_args()
    # validate input file
    pat = args.pat
    validate_single_file(pat)
    if (args.sub_sample is not None) and (args.sub_sample < 0):
        parser.error('[wt view] sub-sampling rate must be >= 0')
    validate_local_exe(cview_tool)
    cview(pat, args)
예제 #7
0
def main():
    """
    Add to bam file an extra field, YI:Z:{nr_meth},{nr_unmeth},
    to count Cytosine retention at CpG context.
    """
    parser = argparse.ArgumentParser(description=main.__doc__)
    parser = add_args(parser)
    parser = add_cpg_args(parser)
    args = parser.parse_args()
    validate_local_exe(add_cpg_count_tool)
    for bam in args.bam:
        BamMethylData(args, bam).start_threads()
예제 #8
0
def main():
    """
    Run the WGBS pipeline to generate pat & beta files out of an input bam file
    """
    parser = add_args_snp_splitt()
    args = parse_args_snp_split(parser)
    # validate output dir:
    if not op.isdir(args.out_dir):
        raise IllegalArgumentError(f'Invalid output dir: {args.out_dir}')

    validate_local_exe(allele_split_tool)
    for bam in [args.bam]:
        if not validate_bam(bam):
            eprint(f'[wt bam2pat] Skipping {bam}')
            continue

        pat = op.join(args.out_dir, op.basename(bam)[:-4] + PAT_SUFF)
        if not delete_or_skip(pat, args.force):
            continue
        SNPSplit(args, bam)
예제 #9
0
def main():
    """
    Run the WGBS pipeline to generate pat & beta files out of an input bam file
    """
    parser = argparse.ArgumentParser(description=main.__doc__)
    parser = add_args(parser)
    args = parse_args(parser)
    # validate output dir:
    if not op.isdir(args.out_dir):
        raise IllegalArgumentError(f'Invalid output dir: {args.out_dir}')

    validate_local_exe(match_maker_tool)
    validate_local_exe(patter_tool)
    for bam in args.bam:
        if not validate_bam(bam):
            eprint(f'[wt bam2pat] Skipping {bam}')
            continue

        pat = op.join(args.out_dir, op.basename(bam)[:-4] + PAT_SUFF)
        if not delete_or_skip(pat, args.force):
            continue
        Bam2Pat(args, bam)
예제 #10
0
파일: cview.py 프로젝트: nloyfer/wgbs_tools
def view_bed(pat, args):
    # assume columns 4-5 of args.bed_file are startCpG, endCpG:
    bpath = args.bed_file

    # validate blocks file. If it's long, and starts with "chr1", use gunzip instead of tabix.
    df = load_blocks_file(bpath, nrows=1e6)
    if df.shape[0] == 1e6 and df.iloc[0, 0] in ('1', 'chr1'):
        tabix_cmd = f'gunzip -c {pat} '
    else:
        # extended blocks:
        tabix_cmd = 'gunzip -c' if bpath.endswith('.gz') else 'cat'
        tabix_cmd += f' {bpath} | {cview_extend_blocks_script} | tabix -R - {pat} '

    view_flags = set_view_flags(args)
    cmd = tabix_cmd + f' | {cview_tool} {view_flags} --blocks_path {bpath}'
    if args.sub_sample is not None:  # sub-sample reads
        validate_local_exe(pat_sampler)
        cmd += f' | {pat_sampler} {args.sub_sample} '
    cmd += f' | sort -k2,2n -k3,3 | {collapse_pat_script} - '
    if args.out_path is not None:
        cmd += f' > {args.out_path}'
    subprocess_wrap_sigpipe(cmd)