def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('unq_paths', nargs='+') parser.add_argument('-v', '--verbose', action='store_true', help='print the histogram values to stdout') parser.add_argument( '-m', '--max_frag_size', type=int, default=500, help='Maximum fragment size. Longer fragments will be trimmed. [500]') parser.add_argument( '--outdir', '-o', help='output directory for the histogram figure(s) [None]') parser.add_argument('--display', action='store_true', help='Display histogram plot(s) (plt.show)') parser.add_argument( '-L', '--bed_file', help='pat: Only output reads overlapping the input BED FILE') add_GR_args(parser) return parser.parse_args()
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('input_files', nargs='+') parser.add_argument('-p', '--prefix', help='Prefix of output file', required=True) parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing file if existed') parser.add_argument( '-T', '--temp_dir', help='passed to "sort -m". Useful for merging very large pat files') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('--labels', nargs='+', help='labels for the mixed reads. ' 'Default is None') add_GR_args(parser, bed_file=True) parser.add_argument( '--min_len', type=int, default=None, help='consider only reads covering at least MIN_LEN CpG sites [1]') parser.add_argument( '--strict', action='store_true', help='Truncate reads that start/end outside the given region. ' 'Only relevant if "region", "sites" ' 'or "bed_file" flags are given.') args = parser.parse_args() return args
def add_view_flags(parser, sub_sample=True, out_path=True): add_GR_args(parser, bed_file=True) parser.add_argument( '--strict', action='store_true', help='pat: Truncate reads that start/end outside the given region. ' 'Only relevant if "region", "sites" ' 'or "bed_file" flags are given.') parser.add_argument( '--strip', action='store_true', help='pat: Remove trailing dots (from beginning/end of reads).') parser.add_argument( '--min_len', type=int, default=1, help='pat: Display only reads covering at least MIN_LEN CpG sites [1]') if sub_sample: parser.add_argument( '--sub_sample', type=float, metavar='[0.0, 1.0]', help='pat: subsample from reads. Only supported for pat') if out_path: parser.add_argument('-o', '--out_path', help='Output path. [stdout]') return parser
def add_args(parser): parser.add_argument('bam', nargs='+') add_GR_args(parser) parser.add_argument('--out_dir', '-o', default='.') parser.add_argument( '--min_cpg', type=int, default=1, help='Reads covering less than MIN_CPG sites are removed [1]') parser.add_argument('--debug', '-d', action='store_true') parser.add_argument('--force', '-f', action='store_true', help='overwrite existing files if exists') parser.add_argument('--verbose', '-v', action='store_true') parser.add_argument('-F', '--exclude_flags', type=int, help='flags to exclude from bam file (samtools view parameter) ' \ f'[{FLAGS_FILTER}]', default=FLAGS_FILTER) parser.add_argument( '-q', '--mapq', type=int, help=f'Minimal mapping quality (samtools view parameter) [{MAPQ}]', default=MAPQ) parser.add_argument( '--clip', type=int, default=0, help='Clip for each read the first and last CLIP characters [0]') add_multi_thread_args(parser) return parser
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('beta_paths', nargs='+') parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing files if existed') parser.add_argument( '--keep_na', action='store_true', help='If set, missing CpG sites are not removed from the output' ' They are assigned with a "-1" value.') parser.add_argument('-b', '--bedGraph', action='store_true', help='Keep (gzipped) bedGraphs as well as bigwigs') parser.add_argument( '--dump_cov', action='store_true', help='Generate coverage bigiwig in addition to beta values bigwig') parser.add_argument( '-c', '--min_cov', type=int, default=1, help='Minimal coverage to consider when computing beta values.' ' Default is 1 (include all observations). ' ' Sites with less than MIN_COV coverage are considered as missing.') parser.add_argument('--outdir', '-o', default='.', help='Output directory. [.]') add_GR_args(parser, bed_file=True) args = parser.parse_args() return args
def parse_args(): # todo: seperate args parsing for beta and pat parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('input_files', nargs='+', help='A pat.gz file or one or more beta files') parser.add_argument( '-d', '--dists', action='store_true', help='print results with distances (kind of log scale)') parser.add_argument('-t', '--title', help='A text to be printed before the results.') parser.add_argument('-o', '--output', help='beta vis: save plot to file') parser.add_argument( '-b', '--blocks_path', help='Display blocks borders. If [-b] is specified with no ' 'blocks path, default blocks are used.', nargs='?', const=default_blocks_path, default=False) parser.add_argument("--no_color", action='store_true', help='Print without colors.') parser.add_argument( '--strict', action='store_true', help='Truncate reads that start/end outside the given region. ' 'Only relevant for pat files.') parser.add_argument( '--max_reps', '-m', type=int, default=10, help='Pat vis: Display a read at most "max_reps" times, ' 'if it is repeating itself. [10]') parser.add_argument( '--min_len', type=int, default=1, help= 'Pat vis: Display only reads covering at least MIN_LEN CpG sites [1]') parser.add_argument( '--color_scheme', '-cs', type=int, default=256, help='beta vis: Color scheme. Possible values: 16 or 256 [256]') parser.add_argument('--plot', action='store_true', help='beta vis: plot results in a heatmap.') parser.add_argument( '--no_dense', action='store_true', help='pat: Do not squeeze multiple reads to every line.\n' 'Each read appears in a different line.') add_GR_args(parser, required=True) return parser.parse_args()
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('betas', nargs='+', help='one or more beta files') parser.add_argument('--plot', action='store_true', help='Plot histogram of coverages') add_GR_args(parser, bed_file=True) add_multi_thread_args(parser) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('betas', nargs='+') parser.add_argument( '--min_cov', '-c', type=int, default=10, help='Minimal coverage to consider. ' 'Sites with coverage lower than this value are ignored') add_GR_args(parser) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('betas', nargs='+', help='one or more beta files') parser.add_argument('--plot', action='store_true', help='Plot histogram of coverages') add_GR_args(parser) parser.add_argument( '-L', '--bed_file', help='Only output coverage overlapping the input BED FILE. ') args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) region_or_sites = add_GR_args(parser, bed_file=True, no_anno=True) region_or_sites.add_argument('--site_file', help='text file with a single CpG indexes column,' \ ' or <startCpG, endCpG> columns.\n' \ 'if "-" is passed, the file is read from stdin.') parser.add_argument('--out_path', '-o', help='Output path for bed file [stdout]') parser.add_argument('-d', '--debug', action='store_true') # parser.add_argument('--bedtools', action='store_true') parser.add_argument( '-p', '--parsable', action='store_true', help='Output a parsing friendly format (only work with -r/-s flags)') parser.add_argument('--drop_empty', action='store_true', help='Drop empty regions (without CpGs)') parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing files if existed') add_multi_thread_args(parser) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('beta_paths', nargs='+') parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing files if existed') parser.add_argument('-d', '--debug', action='store_true') parser.add_argument( '--outdir', '-o', default='.', help='Output directory. Default is current directory [.]') add_GR_args(parser) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('input_file') add_GR_args(parser) parser.add_argument('-o', '--out_path', type=argparse.FileType('w'), default=sys.stdout, help='Output path. [stdout]') parser.add_argument( '--sub_sample', type=float, metavar='(0.0, 1.0)', help='pat: subsample from reads. Only supported for pat' ) # todo: support unq too parser.add_argument( '-L', '--bed_file', help='pat: Only output reads overlapping the input BED FILE') parser.add_argument( '--strict', action= 'store_true', # todo: add fractions to trimmed reads (optional flag) help='pat: Truncate reads that start/end outside the given region. ' 'Only relevant if "region", "sites" ' 'or "bed_file" flags are given.') parser.add_argument('--inflate', action='store_true', help='unq: add CpG-Index column to the output') parser.add_argument('--awk_engine', action='store_true', help='pat: use awk engine instead of python.\n' 'Its saves RAM when dealing with large regions.') parser.add_argument( '--multiprocess', type=int, default=16, help= 'pat: If bed file is specified, use multiple processors to read multiple.\n' 'regions in parallel. Default number of processors: 16.') parser.add_argument( '--min_len', type=int, default=1, help='Pat: Display only reads covering at least MIN_LEN CpG sites [1]') args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('beta_path') parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing files if existed') parser.add_argument('--keep_na', action='store_true', help='If set, missing CpG sites are not removed from the output') parser.add_argument('--mean', action='store_true', help='Output a mean methylation value column instead of <meth,cov> columns') parser.add_argument('-c', '--min_cov', type=int, default=1, help='Minimal coverage to consider when computing beta values.' ' Default is 1 (include all observations). ' ' Sites with less than MIN_COV coverage are considered as missing.' ' Only relevant if --mean is specified.') parser.add_argument('--outpath', '-o', default='/dev/stdout', help='Output path. [stdout]') add_GR_args(parser, bed_file=True) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('bam_path') add_GR_args(parser) parser.add_argument('--out_dir', '-o', default='.') parser.add_argument('--debug', '-d', action='store_true') parser.add_argument( '-@', '--threads', type=int, default=multiprocessing.cpu_count(), help='Number of threads to use (default: multiprocessing.cpu_count)') parser.add_argument( '--test', action='store_true', help='Perform a test for the pipeline. Ignore other parameters.') args = parser.parse_args() return args
def add_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('pat', help="The input pat file") add_GR_args(parser, bed_file=True, required=True) add_multi_thread_args(parser) parser.add_argument( '--strict', action='store_true', help='Truncate reads that start/end outside the given region.') parser.add_argument( '--min_len', type=int, default=1, help='Only use reads covering at least MIN_LEN CpG sites [1]') parser.add_argument('--out_file', '-o', default="-", help="Output file name in which to write results") parser.add_argument('--verbose', '-v', action='store_true') return parser
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('betas', nargs='+') parser.add_argument( '--outpath', '-o', help= 'Dump figure to this path (e.g., pdf/png). If not specified, --show flag is set' ) parser.add_argument( '--show', action='store_true', help='Display the figures using matplotlib.pyplot.show.') parser.add_argument( '--min_cov', '-c', type=int, default=10, help='Minimal coverage to consider. ' 'Sites with coverage lower than this value are ignored') add_GR_args(parser) args = parser.parse_args() return args
def parse_args(): # todo: seperate args parsing for beta and pat parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('input_files', nargs='+', help='A pat.gz file or one or more beta files') parser.add_argument('-t', '--title', help='A text to be printed before the results.') parser.add_argument( '-b', '--blocks_path', help='Display blocks borders. If [-b] is specified with no ' 'blocks path, default blocks are used.', nargs='?', const=True, default=False) parser.add_argument("--no_color", action='store_true', help='Print without colors.') add_GR_args(parser, required=True, no_anno=True) pat_args(parser) beta_args(parser) return parser
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) add_GR_args(parser, bed_file=True) betas_or_file = parser.add_mutually_exclusive_group(required=True) betas_or_file.add_argument('--betas', nargs='+') betas_or_file.add_argument('--beta_file', '-F') parser.add_argument('-c', '--chunk_size', type=int, default=DEF_CHUNK, help=f'Chunk size. Default {DEF_CHUNK} sites') parser.add_argument( '-p', '--pcount', type=float, default=15, help='Pseudo counts of C\'s and T\'s in each block. Default 15') parser.add_argument('--min_cpg', type=int, default=1, help='Minimal block size (in #sites) to output. Shorter blocks will simply be ' \ 'ommited from output (equivalent to set min_cpg to 1 and then filter output by ' \ 'length). Default is 1') parser.add_argument( '--max_cpg', type=int, default=1000, help='Maximal allowed block size (in #sites). Default is 1000') parser.add_argument( '--max_bp', type=int, default=2000, help='Maximal allowed block size (in bp). Default is 2000') parser.add_argument('-o', '--out_path', default=sys.stdout, help='output path [stdout]') add_multi_thread_args(parser) return parser.parse_args()
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) add_GR_args(parser) # todo: allow bed file input parser.add_argument('-L', '--bed_path', help='convert all regions in a bed file') parser.add_argument('--out_path', '-o', help='Output path for bed file [stdout]') parser.add_argument('-d', '--debug', action='store_true') parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing files if existed') parser.add_argument( '-@', '--threads', type=int, default=multiprocessing.cpu_count(), help='Number of threads to use (default: multiprocessing.cpu_count)') args = parser.parse_args() return args
def add_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('bam', help="The full path of the bam file to process") parser.add_argument('homog_prop', help="A fraction with which to determine homogenous reads. All reads with " "methylation proportion >= [homog_prop] will be classified as highly " "methylated reads while all reads with methylation proportion <= 1 - [homog_prop] " "will be classified as mostly un-methylated reads.") parser.add_argument('--min_cpg', help="The value of CpGs required per fragment (read pair combined number of CpGs). " "default [1]", default=1) parser.add_argument('--out_dir', '-o', default='.') parser.add_argument('--force', '-f', action='store_true', help='overwrite existing files if exists') parser.add_argument('--debug', '-d', action='store_true') parser.add_argument('--verbose', '-v', action='store_true') parser.add_argument('-F', '--exclude_flags', type=int, help='flags to exclude from bam file (samtools view parameter)', default=None) parser.add_argument('-q', '--mapq', type=int, help=f'Minimal mapping quality (samtools view parameter)', default=None) add_GR_args(parser) add_multi_thread_args(parser) return parser
def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('pat_files', nargs='+', help='Two or more pat files') parser.add_argument( '-c', '--cov', type=float, help='Coverage of the output pat. ' 'Default the coverage of the file with the highest rate. ' 'Only supported if corresponding beta files are in the same ' 'directory with the pat files. ' 'Otherwise, they will be created.') add_GR_args(parser) parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing files if existed') parser.add_argument('--reps', type=int, default=1, help='nr or repetitions [1]') parser.add_argument( '--rates', type=float, metavar='[0.0, 1.0]', nargs='+', required=True, help='Rates for each of the pat files. Note: the order matters!' 'Rate of for the last file may be omitted. ' 'The rates will be adjusted s.t the output will be of the requested coverage.' ) parser.add_argument('--labels', nargs='+', help='labels for the mixed reads. ' 'Default is the basenames of the pat files,' 'lowercased and trimmed by the first "-"') parser.add_argument( '-L', '--bed_file', help='Only output reads overlapping the input BED FILE. ') parser.add_argument( '--strict', action='store_true', help='Truncate reads that start/end outside the given region. ' 'Only relevant if "region", "sites" ' 'or "bed_file" flags are given.') out_or_pref = parser.add_mutually_exclusive_group() out_or_pref.add_argument('-p', '--prefix', help='Prefix of output file.') out_or_pref.add_argument('-o', '--out_dir', help='Output directory [.]', default='.') parser.add_argument( '-@', '--threads', type=int, default=multiprocessing.cpu_count(), help='Number of threads to use (default: multiprocessing.cpu_count)') args = parser.parse_args() return args