def add_tfbscan_arguments(parser): parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=35, width=90) description = "Find positions of Transcription Factor Binding Sites (TFBS) in FASTA sequences by scanning with motifs.\n\n" description += "Usage:\nTOBIAS TFBScan --motifs <motifs.txt> --fasta <genome.fa> \n\n" description += "By setting --outdir, the output files are:\n- <outdir>/<TF1>.bed\n- <outdir>/<TF2>.bed\n- (...)\n\n" description += "By setting --outfile, all TFBS are written to one file (with motif specified in the 4th column of the .bed)." parser.description = format_help_description("TFBScan", description) parser._action_groups.pop() #pop -h required_arguments = parser.add_argument_group('Required arguments') required_arguments.add_argument('-m', '--motifs', metavar="", help='File containing motifs in either MEME, PFM or JASPAR format') required_arguments.add_argument('-f', '--fasta', metavar="", help='A fasta file of sequences to use for scanning motifs') # whole genome file or regions of interest in FASTA format to be scanned with motifs') #all other arguments are optional optional_arguments = parser.add_argument_group('Optional arguments') optional_arguments.add_argument('-r', '--regions', metavar="", help='Subset scanning to regions of interest') optional_arguments.add_argument('--outdir', metavar="", help='Output directory for TFBS sites in one file per motif (default: ./tfbscan_output/). NOTE: Select either --outdir or --outfile.', default=None) optional_arguments.add_argument('--outfile', metavar="", help='Output file for TFBS sites joined in one bed-file (default: not set). NOTE: Select either --outdir or --outfile.', default=None) optional_arguments.add_argument('--naming', metavar="", help="Naming convention for bed-ids and output files ('id', 'name', 'name_id', 'id_name') (default: 'name_id')", choices=["id", "name", "name_id", "id_name"], default="name_id") optional_arguments.add_argument('--gc', metavar="", type=lambda x: restricted_float(x,0,1), help='Set the gc content for background regions (default: will be estimated from fasta)') optional_arguments.add_argument('--pvalue', metavar="", type=lambda x: restricted_float(x,0,1), help='Set p-value for motif matches (default: 0.0001)', default=0.0001) optional_arguments.add_argument('--keep-overlaps', action='store_true', help='Keep overlaps of same motifs (default: overlaps are resolved by keeping best-scoring site)') optional_arguments.add_argument('--add-region-columns', action='store_true', help="Add extra information columns (starting from 4th column) from --regions to the output .bed-file(s) (default: off)") RUN = parser.add_argument_group('Run arguments') RUN.add_argument('--split', metavar="<int>", type=int, help="Split of multiprocessing jobs (default: 100)", default=100) RUN.add_argument('--cores', metavar="", type=int, help='Number of cores to use (default: 1)', default=1) RUN.add_argument('--debug', action="store_true", help=argparse.SUPPRESS) RUN = add_logger_args(optional_arguments) return(parser)
def add_bindetect_arguments(parser): parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=35, width=90) description = "BINDetect takes motifs, signals (footprints) and genome as input to estimate bound transcription factor binding sites and differential binding between conditions. " description += "The underlying method is a modified motif enrichment test to see which motifs have the largest differences in signal across input conditions. " description += "The output is an in-depth overview of global changes as well as the individual binding site signal-differences.\n\n" description += "Usage:\nTOBIAS BINDetect --signals <bigwig1> (<bigwig2> (...)) --motifs <motifs.txt> --genome <genome.fasta> --peaks <peaks.bed>\n\n" description += "Output files:\n- <outdir>/<prefix>_figures.pdf\n- <outdir>/<prefix>_results.{txt,xlsx}\n- <outdir>/<prefix>_distances.txt\n" description += "- <outdir>/<TF>/<TF>_overview.{txt,xlsx} (per motif)\n- <outdir>/<TF>/beds/<TF>_all.bed (per motif)\n" description += "- <outdir>/<TF>/beds/<TF>_<condition>_bound.bed (per motif-condition pair)\n- <outdir>/<TF>/beds/<TF>_<condition>_unbound.bed (per motif-condition pair)\n\n" parser.description = format_help_description("BINDetect", description) parser._action_groups.pop() #pop -h required = parser.add_argument_group('Required arguments') required.add_argument('--signals', metavar="<bigwig>", help="Signal per condition (.bigwig format)", nargs="*") required.add_argument('--peaks', metavar="<bed>", help="Peaks.bed containing open chromatin regions across all conditions") required.add_argument('--motifs', metavar="<motifs>", help="Motif file(s) in pfm/jaspar/meme format", nargs="*") required.add_argument('--genome', metavar="<fasta>", help="Genome .fasta file") optargs = parser.add_argument_group('Optional arguments') optargs.add_argument('--cond-names', metavar="<name>", nargs="*", help="Names of conditions fitting to --signals (default: prefix of --signals)") optargs.add_argument('--peak-header', metavar="<file>", help="File containing the header of --peaks separated by whitespace or newlines (default: peak columns are named \"_additional_<count>\")") optargs.add_argument('--naming', metavar="<string>", help="Naming convention for TF output files ('id', 'name', 'name_id', 'id_name') (default: 'name_id')", choices=["id", "name", "name_id", "id_name"], default="name_id") optargs.add_argument('--motif-pvalue', metavar="<float>", type=lambda x: restricted_float(x, 0, 1), help="Set p-value threshold for motif scanning (default: 1e-4)", default=0.0001) optargs.add_argument('--bound-pvalue', metavar="<float>", type=lambda x: restricted_float(x, 0, 1), help="Set p-value threshold for bound/unbound split (default: 0.001)", default=0.001) #optargs.add_argument('--volcano-diff-thresh', metavar="<float>", help="", default=0.2) #not yet implemented #optargs.add_argument('--volcano-p-thresh', metavar="<float>", help="", default=0.05) #not yet implemented optargs.add_argument('--pseudo', type=float, metavar="<float>", help="Pseudocount for calculating log2fcs (default: estimated from data)", default=None) optargs.add_argument('--time-series', action='store_true', help="Will only compare signals1<->signals2<->signals3 (...) in order of input, and skip all-against-all comparison.") optargs.add_argument('--skip-excel', action='store_true', help="Skip creation of excel files - for large datasets, this will speed up BINDetect considerably") runargs = parser.add_argument_group("Run arguments") runargs.add_argument('--outdir', metavar="<directory>", help="Output directory to place TFBS/plots in (default: bindetect_output)", default="bindetect_output") optargs.add_argument('--prefix', metavar="<prefix>", help="Prefix for overview files in --outdir folder (default: bindetect)", default="bindetect") runargs.add_argument('--cores', metavar="<int>", type=int, help="Number of cores to use for computation (default: 1)", default=1) runargs.add_argument('--split', metavar="<int>", type=int, help="Split of multiprocessing jobs (default: 100)", default=100) runargs.add_argument('--debug', help=argparse.SUPPRESS, action='store_true') runargs = add_logger_args(runargs) return(parser)
def add_aggregate_arguments(parser): parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter( prog, max_help_position=40, width=90) description = "" parser.description = format_help_description("PlotAggregate", description) parser._action_groups.pop() #pop -h IO = parser.add_argument_group('Input / output arguments') IO.add_argument('--TFBS', metavar="<bed>", nargs="*", help="TFBS sites (*required)") #default is None IO.add_argument( '--signals', metavar="<bigwig>", nargs="*", help="Signals in bigwig format (*required)") #default is None IO.add_argument('--regions', metavar="<bed>", nargs="*", help="Regions to overlap with TFBS (optional)", default=[]) IO.add_argument('--whitelist', metavar="<bed>", nargs="*", help="Only plot sites overlapping whitelist (optional)", default=[]) IO.add_argument('--blacklist', metavar="<bed>", nargs="*", help="Exclude sites overlapping blacklist (optional)", default=[]) IO.add_argument('--output', metavar="", help="Path to output plot (default: TOBIAS_aggregate.pdf)", default="TOBIAS_aggregate.pdf") IO.add_argument( '--output-txt', metavar="", help="Path to output file for aggregates in .txt-format (default: None)" ) PLOT = parser.add_argument_group('Plot arguments') PLOT.add_argument('--title', metavar="", help="Title of plot (default: \"Aggregated signals\")", default="Aggregated signals") PLOT.add_argument( '--flank', metavar="", help= "Flanking basepairs (+/-) to show in plot (counted from middle of the TFBS) (default: 60)", default=60, type=int) PLOT.add_argument( '--TFBS-labels', metavar="", help="Labels used for each TFBS file (default: prefix of each --TFBS)", nargs="*") PLOT.add_argument( '--signal-labels', metavar="", help= "Labels used for each signal file (default: prefix of each --signals)", nargs="*") PLOT.add_argument( '--region-labels', metavar="", help= "Labels used for each regions file (default: prefix of each --regions)", nargs="*") PLOT.add_argument( '--share-y', metavar="", help= "Share y-axis range across plots (none/signals/sites/both). Use \"--share-y signals\" if bigwig signals have similar ranges. Use \"--share_y sites\" if sites per bigwig are comparable, but bigwigs themselves aren't comparable (default: none)", choices=["none", "signals", "sites", "both"], default="none") #Signals / regions PLOT.add_argument( '--normalize', action='store_true', help= "Normalize the aggregate signal(s) to be between 0-1 (default: the true range of values is shown)" ) PLOT.add_argument('--negate', action='store_true', help="Negate overlap with regions") PLOT.add_argument( '--smooth', metavar="<int>", type=int, help= "Smooth output signal by taking the mean of <smooth> bp windows (default: 1 (no smooth)", default=1) PLOT.add_argument('--log-transform', help="Log transform the signals before aggregation", action="store_true") PLOT.add_argument( '--plot-boundaries', help= "Plot TFBS boundaries (Note: estimated from first region in each --TFBS)", action='store_true') PLOT.add_argument( '--signal-on-x', help= "Show signals on x-axis and TFBSs on y-axis (default: signal is on y-axis)", action='store_true') PLOT.add_argument( '--remove-outliers', metavar="<float>", help= "Value between 0-1 indicating the percentile of regions to include, e.g. 0.99 to remove the sites with 1%% highest values (default: 1)", type=lambda x: restricted_float(x, 0, 1), default=1) RUN = parser.add_argument_group("Run arguments") RUN = add_logger_args(RUN) return (parser)