def parse_arguments(): """ Creat parser instance and parse command-line arguments passed to the pipeline :return argparse.Namespace: parsed arguments namespace """ parser = VersionInHelpParser(prog="PEPATAC_collator", description='PEPATAC collator', version=__version__) parser = pypiper.add_pypiper_args(parser, groups=['pypiper', 'looper']) parser.add_argument("-n", "--name", help="Name of the project to use.", type=str) parser.add_argument("-r", "--results", help="Output results sub directory path.", type=str) parser.add_argument("--skip-consensus", action='store_true', dest="skip_consensus", default=False, help="Do not calculate consensus peaks.") parser.add_argument("--skip-table", action='store_true', dest="skip_table", default=False, help="Do not calculate peak counts table.") args = parser.parse_args() return args
def _parse_cmdl(cmdl): parser = ArgumentParser(description="Automatic GEO SRA data downloader") parser.add_argument( "-b", "--bamfolder", default=safe_echo("SRABAM"), help="Optional: Specify a location to store bam files " "[Default: $SRABAM:" + safe_echo("SRABAM") + "]") parser.add_argument( "-s", "--srafolder", default=safe_echo("SRARAW"), help="Optional: Specify a location to store sra files " "[Default: $SRARAW:" + safe_echo("SRARAW") + "]") # parser.add_argument( # "--picard", dest="picard_path", default=safe_echo("PICARD"), # help="Specify a path to the picard jar, if you want to convert " # "fastq to bam [Default: $PICARD:" + safe_echo("PICARD") + "]") parser.add_argument( "-r", "--srr", required=True, nargs="+", help="SRR files") parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "config"]) return parser.parse_args(cmdl)
def main(): # Parse command-line arguments parser = ArgumentParser( prog="hic-pipeline", description="Hi-C pipeline." ) parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() # Read in yaml configs series = pd.Series(yaml.load(open(args.sample_config, "r"))) # looper 0.6/0.7 compatibility: if "protocol" in series.index: key = "protocol" elif "library" in series.index: key = "library" else: raise KeyError( "Sample does not contain either a 'protocol' or 'library' attribute!") # Create Sample object if series[key] != "HiChIP": sample = HiCSample(series) else: sample = HiChIPSample(series) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="hic", outfolder=sample.paths.sample_root, args=args) pipe_manager.config.tools.scripts_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tools") # Start main function process(sample, pipe_manager, args)
def _parse_args(cmdl): from argparse import ArgumentParser parser = ArgumentParser(description='Pipeline') # First, add arguments from Pypiper, including # 1. pypiper options, 2. looper connections, 3. common options, # using the all_args=True flag (you can customize this). # Adds options including; for details, see: # http://github.com/epigen/pypiper/command_line_args.md parser = pypiper.add_pypiper_args(parser, all_args=True) # Add any pipeline-specific arguments parser.add_argument("-t", "--trimgalore", dest="trimgalore", action="store_true", help='Use trimgalore instead of trimmomatic?') parser.add_argument("-e", "--epilog", dest='epilog', action="store_true", help='Use epilog for meth calling?') parser.add_argument("--pdr", dest="pdr", action="store_true", help='Calculate Proportion of Discordant Reads (PDR)?') parser.add_argument( "--rrbs-fill", dest="rrbs_fill", type=int, default=4, help= "Number of bases from read end to regard as unreliable and ignore due to RRBS chemistry" ) parser.add_argument( "--dark-bases", type=int, default=None, help="Number of bases from to prepend to R1 from R2 for dark sequencing" ) args = parser.parse_args(cmdl) if args.rrbs_fill < 0: raise ValueError("Negative RRBS fill-in value: {}".format( args.rrbs_fill)) # Translate pypiper method of read type specification into flag-like option. if args.single_or_paired == "paired": args.paired_end = True else: args.paired_end = False # Input is required. if not args.input: parser.print_help() raise SystemExit return args
def main(): # Parse command-line arguments parser = ArgumentParser(prog="atacseq-pipeline", description="ATAC-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args( parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() if args.sample_config is None or args.output_parent is None: parser.print_help() return 1 # Read in yaml configs series = pd.Series(yaml.safe_load(open(args.sample_config, "r"))) series["sample_root"] = args.output_parent print(series) # Create Sample object if series["protocol"] != "DNase-seq": sample = ATACseqSample(series) else: sample = DNaseSample(series) print(sample) # Check if merged if (type(sample.data_source) == list) & (len(sample.data_source) > 1): sample.merged = True else: sample.merged = False sample.paths = AttributeDict(sample.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="atacseq", outfolder=sample.sample_root, args=args) pipe_manager.config.tools.scripts_dir = pjoin( os.path.dirname(os.path.realpath(__file__)), "tools") # Start main function process(sample, pipe_manager, args)
def main(): # Parse command-line arguments parser = ArgumentParser(prog="chipseq-pipeline", description="ChIP-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args( parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() if args.sample_config is None: parser.print_help() return 1 # Read in yaml configs series = pd.Series(yaml.safe_load(open(args.sample_config, "r"))) # Create Sample object if series["protocol"] == "ChIPmentation": sample = ChIPmentation(series) else: sample = ChIPseqSample(series) # Check if merged if len(sample.data_source.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths(sample.prj) # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="chipseq", outfolder=sample.paths.sample_root, args=args) # Start main function process(sample, pipe_manager, args)
def main(): # Parse command-line arguments parser = ArgumentParser(prog="rnaKallisto", description="Kallisto pipeline") parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, all_args=True) args = parser.parse_args() # Read in yaml configs sample = AttributeDict(yaml.load(open(args.sample_config, "r"))) path_conf_file = os.path.join(os.path.dirname(__file__), args.config_file) pipeline_config = AttributeDict(yaml.load(open(path_conf_file), "r")) # Start main function process(sample, pipeline_config, args)
def main(): # Parse command-line arguments parser = ArgumentParser(prog="starrseq-pipeline", description="STARR-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args( parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() # Read in yaml configs sample = STARRSeqSample(pd.Series(yaml.load(open(args.sample_config, "r")))) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="starrseq", outfolder=sample.paths.sample_root, args=args) pipe_manager.config.tools.scripts_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "tools") # Start main function process(sample, pipe_manager, args)
def main(): # Parse command-line arguments parser = ArgumentParser( prog="rnaseq-pipeline", description="RNA-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() # Read in yaml configs sample = RNASeqSample(pd.Series(yaml.load(open(args.sample_config, "r")))) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager( name="rnaseq", outfolder=sample.paths.sample_root, args=args) pipe_manager.config.tools.scripts_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "tools") # Start main function process(sample, pipe_manager, args)
def main(): # Parse command-line arguments parser = ArgumentParser( prog="dropseq-pipeline", description="Drop-seq pipeline." ) parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() if args.sample_config is None: parser.print_help() return 1 # Read in yaml configs sample = AttributeDict(yaml.load(open(args.sample_config, "r"))) pipeline_config = AttributeDict(yaml.load(open(os.path.join(os.path.dirname(__file__), args.config_file), "r"))) # Start main function process(sample, pipeline_config, args)
def main(): # Parse command-line arguments parser = ArgumentParser(prog="starrseq-pipeline", description="STARR-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, all_args=True) args = parser.parse_args() if args.sample_config is None: parser.print_help() return 1 # Read in yaml config and create Sample object sample = STARRseqSample(pd.Series(yaml.load(open(args.sample_config, "r")))) # Check if merged if len(sample.data_source.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() sample.make_sample_dirs() # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="starrseq", outfolder=sample.paths.sample_root, args=args) # Start main function process(sample, pipe_manager, args)
def _parse_args(cmdl): from argparse import ArgumentParser parser = ArgumentParser(description='Pipeline') # First, add arguments from Pypiper, including # 1. pypiper options, 2. looper connections, 3. common options, # using the all_args=True flag (you can customize this). # Adds options including; for details, see: # http://github.com/epigen/pypiper/command_line_args.md parser = pypiper.add_pypiper_args(parser, all_args=True) parser.add_argument('-e', '--epilog', dest='epilog', action="store_true", default=False, help='Use epilog for meth calling?') parser.add_argument( '--single2', dest='single2', action="store_true", default=False, help= 'Single secondary mode: any reads not mapping in paired-end mode will \ be aligned using single-end mode, and then analyzed. Only valid for \ paired-end mode. ') args = parser.parse_args(cmdl) if args.single_or_paired == "paired": args.paired_end = True else: args.paired_end = False if not args.input: parser.print_help() raise SystemExit return args
def main(): # Parse command-line arguments parser = ArgumentParser(prog="amplicon-pipeline", description="Amplicon pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "pypiper"]) args = parser.parse_args() print(args) print("Processing sample {}.".format(args.sample_name)) output_folder = os.path.abspath(args.output_parent) # Create output directories if not existing for path in [args.output_parent, output_folder]: if not os.path.exists(path): try: os.mkdir(path) except OSError("Cannot create directory '{}'".format(path)): raise # Count length of pattern matches sizes = count_sizes(fastq_file=args.input, amplicon=args.amplicon, guide_rna=args.guide_rna) # Calculate efficiency efficiency = (sizes[sizes.index != 0].sum() / float(sizes.sum())) * 100 # Save with open( os.path.join(output_folder, args.sample_name + ".efficiency.csv"), "w") as handle: handle.write("{},{}\n".format(args.sample_name, efficiency)) print("Sample {} has an editing efficiency of {}.".format( args.sample_name, efficiency)) print("Finished processing sample {}.".format(args.sample_name))
def build_argparser(): parser = ArgumentParser( description= "A pipeline to count the number of reads and file size. Accepts" " BAM, fastq, or fastq.gz files.") # First, add standard arguments from Pypiper. # groups="pypiper" will add all the arguments that pypiper uses, # and adding "common" adds arguments for --input and --sample--name # and "output_parent". You can read more about your options for standard # arguments in the pypiper docs (section "command-line arguments") parser = pypiper.add_pypiper_args( parser, groups=["pypiper", "common", "ngs", "logmuse"], args=["output-parent", "config"], required=['sample-name', 'output-parent']) # Add any pipeline-specific arguments if you like here. # args for `output_parent` and `sample_name` were added by the standard # `add_pypiper_args` function. return parser
RNA NucSeq pipeline """ from argparse import ArgumentParser import os import sys import yaml import subprocess import re import pypiper # Argument Parsing # ####################################################################################### parser = ArgumentParser(description='Pypiper arguments.') parser = pypiper.add_pypiper_args(parser, all_args=True) # Add any pipeline-specific arguments parser.add_argument('-e', '--ercc', default="ERCC92", dest='ERCC_assembly', type=str, help='ERCC Assembly') parser.add_argument( '-em', '--ercc-mix', default="False", dest='ERCC_mix', help='ERCC mix. If False no ERCC analysis will be performed.')
def _parse_cmdl(cmdl): description = """ The SRA data converter is a wrapper around sra-tools that provides convenience functions for converting or deleting sra data in various formats. """ parser = ArgumentParser(description=description) # parser = pypiper.add_pypiper_args(parser, args=["output-parent"]) parser.add_argument( "-m", "--mode", default="convert", choices=["convert", "delete_sra", "delete_bam", "delete_fq"], help="What do you want to do? Default: convert", ) parser.add_argument( "-f", "--format", default="fastq", choices=["fastq", "bam"], help="Convert to what format? Default: fastq", ) parser.add_argument( "-b", "--bamfolder", default=safe_echo("SRABAM"), help="Optional: Specify a location to store bam files " "[Default: $SRABAM:" + safe_echo("SRABAM") + "]", ) parser.add_argument( "-q", "--fqfolder", default=safe_echo("SRAFQ"), help="Optional: Specify a location to store fastq files " "[Default: $SRAFQ:" + safe_echo("SRAFQ") + "]", ) parser.add_argument( "-s", "--srafolder", default=safe_echo("SRARAW"), help="Optional: Specify a location to store pipeline output " "[Default: $SRARAW:" + safe_echo("SRARAW") + "]", ) parser.add_argument( "--keep-sra", action="store_true", default=False, help="On convert mode, keep original sra data?", ) parser.add_argument( "-S", "--sample-name", required=False, nargs="+", help="Name for sample to run", metavar="SAMPLE_NAME", ) parser.add_argument("-r", "--srr", required=True, nargs="+", help="SRR files") parser = pypiper.add_pypiper_args(parser, groups=["config", "logmuse"], args=["output-parent", "recover"]) return parser.parse_args(cmdl)
import os.path import sys from subprocess import call import subprocess import re import pypiper import yaml from datetime import datetime # Argument Parsing # ####################################################################################### parser = ArgumentParser(description='Pypiper arguments.') parser = pypiper.add_pypiper_args(parser, all_args=True) #Add any pipeline-specific arguments #parser.add_argument('-e', '--ercc', default="ERCC92", #parser.add_argument('-em', '--ercc-mix', #parser.add_argument('-f', dest='filter', action='store_false', default=True) # Core-seq as optional parameter #parser.add_argument('-cs', '--core-seq', default=False, dest='coreseq', action='store_true', help='CORE-seq Mode') args = parser.parse_args() if args.single_or_paired == "paired": args.paired_end = True else: args.paired_end = False ###
def main(): # Parse command-line arguments parser = ArgumentParser(prog="chipseq-pipeline", description="ChIP-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["all"]) args = parser.parse_args() if args.sample_config is None: parser.print_help() return 1 # Read in yaml configs series = pd.Series(yaml.load(open(args.sample_config, "r"))) # looper 0.6/0.7 compatibility: if "protocol" in series.index: key = "protocol" elif "library" in series.index: key = "library" else: raise KeyError( "Sample does not contain either a 'protocol' or 'library' attribute!" ) # Create Sample object if series[key] != "ChIPmentation": sample = ChIPseqSample(series) else: sample = ChIPmentation(series) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="chipseq", outfolder=sample.paths.sample_root, args=args) # Start main function if not args.only_peaks: pipe_manager = process(sample, pipe_manager, args) else: print("Skipped processing sample '{}'.".format(sample.name)) # If sample does not have "ctrl" attribute, finish processing it. if not hasattr(sample, "compare_sample"): pipe_manager.stop_pipeline() print("Finished processing sample '{}'.".format(sample.name)) return # The pipeline will now wait for the comparison sample file to be completed pipe_manager._wait_for_file( sample.filtered.replace(sample.name, sample.compare_sample)) # Start peak calling function call_peaks(sample, pipe_manager, args)
def main(): # Parse command-line arguments parser = ArgumentParser( prog="chipseq-pipeline", description="ChIP-seq pipeline." ) parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() if args.sample_config is None: parser.print_help() return 1 # Read in yaml configs series = pd.Series(yaml.load(open(args.sample_config, "r"))) # looper 0.6/0.7 compatibility: if "protocol" in series.index: key = "protocol" elif "library" in series.index: key = "library" else: raise KeyError( "Sample does not contain either a 'protocol' or 'library' attribute!") # Create Sample object if series[key] != "ChIPmentation": sample = ChIPseqSample(series) else: sample = ChIPmentation(series) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() else: if sample.read_type not in ['single', 'paired']: sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="chipseq", outfolder=sample.paths.sample_root, args=args) # Start main function if not args.only_peaks: pipe_manager = process(sample, pipe_manager, args) else: print("Skipped processing sample '{}'.".format(sample.name)) # If sample does not have "ctrl" attribute, finish processing it. if not hasattr(sample, "compare_sample"): pipe_manager.stop_pipeline() print("Finished processing sample '{}'.".format(sample.name)) return # If compare_sample is empty string, finish processing. if sample.compare_sample == "": pipe_manager.stop_pipeline() print("Finished processing sample '{}'.".format(sample.name)) return # The pipeline will now wait for the comparison sample file to be completed pipe_manager._wait_for_file(sample.filtered.replace(sample.name, sample.compare_sample)) # Start peak calling function call_peaks(sample, pipe_manager, args)
import re import pandas import sys import string #' Part of the looper setup. We add two additional arguments to the parser, one is the sample id of #' the currently processed sample and the second is the path to the bam file containing the mapped #' reads (preferably with bsmap). These two arguments are passed through #' config/pipeline_interface.yaml to map column names in the sample anntotation sheet to the name of #' the argument here. parser = argparse.ArgumentParser(description="Pipeline") parser.add_argument("--sample_id", "-o", help="id of sample to be analyzed") parser.add_argument("--bam_name", help="path to bam file of sample to be analyzed") parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "looper"]) args = parser.parse_args() manager = pypiper.PipelineManager(name="HETEROGENEITY", outfolder=args.output_parent, args=args) pipe_folder = os.path.dirname(sys.argv[0]) + "/" ##################################################################################################### #' PART I: Preprocessing ##################################################################################################### if not os.path.exists(args.output_parent + "/" + args.sample_id): os.makedirs(args.output_parent + "/" + args.sample_id) sample_folder = args.output_parent + "/" + args.sample_id + "/"
def build_argparser(): """ Builds argument parser. :return argparse.ArgumentParser """ banner = "%(prog)s - reference genome asset manager" additional_description = "\nhttps://refgenie.databio.org" parser = VersionInHelpParser(prog="refgenie", version=__version__, description=banner, epilog=additional_description) subparsers = parser.add_subparsers(dest="command") def add_subparser(cmd, description): return subparsers.add_parser(cmd, description=description, help=description) sps = {} for cmd, desc in SUBPARSER_MESSAGES.items(): sps[cmd] = add_subparser(cmd, desc) # It's required for init sps[cmd].add_argument( '-c', '--genome-config', required=(cmd == INIT_CMD), dest="genome_config", help= "Path to local genome configuration file. Optional if {} environment variable is set." .format(", ".join(refgenconf.CFG_ENV_VARS))) sps[INIT_CMD].add_argument( '-s', '--genome-server', nargs='+', default=DEFAULT_SERVER, help="URL(s) to use for the {} attribute in config file. Default: {}.". format(DEFAULT_SERVER, CFG_SERVERS_KEY)) sps[BUILD_CMD] = pypiper.add_pypiper_args( sps[BUILD_CMD], groups=None, args=["recover", "config", "new-start"]) # Add any arguments specific to subcommands. sps[BUILD_CMD].add_argument( '--tag-description', required=False, default=None, type=str, help="Add tag level description (e.g. built with version 0.3.2).") sps[BUILD_CMD].add_argument( '--genome-description', required=False, default=None, type=str, help= "Add genome level description (e.g. The mouse mitochondrial genome, released in Dec 2013)." ) sps[BUILD_CMD].add_argument( "-d", "--docker", action="store_true", help="Run all commands in the refgenie docker container.") sps[BUILD_CMD].add_argument( '--assets', nargs="+", action='append', required=False, default=None, help='Override the default genome, asset and tag of the parents' ' (e.g. fasta=hg38/fasta:default gtf=mm10/gencode_gtf:default).') sps[BUILD_CMD].add_argument( '--files', nargs="+", action='append', required=False, default=None, help= 'Provide paths to the required files (e.g. fasta=/path/to/file.fa.gz).' ) sps[BUILD_CMD].add_argument( '--params', nargs="+", action='append', required=False, default=None, help='Provide required parameter values (e.g. param1=value1).') sps[BUILD_CMD].add_argument( '-v', '--volumes', nargs="+", required=False, default=None, help='If using docker, also mount these folders as volumes.') sps[BUILD_CMD].add_argument( '-o', '--outfolder', dest='outfolder', required=False, default=None, help='Override the default path to genomes folder, which is the ' 'genome_folder attribute in the genome configuration file.') sps[BUILD_CMD].add_argument( "-q", "--requirements", action="store_true", help="Show the build requirements for the specified asset and exit.") sps[BUILD_CMD].add_argument("-r", "--recipe", required=False, default=None, type=str, help="Provide a recipe to use.") # add 'genome' argument to many commands for cmd in [ PULL_CMD, GET_ASSET_CMD, BUILD_CMD, INSERT_CMD, REMOVE_CMD, GETSEQ_CMD, TAG_CMD, ID_CMD ]: # genome is not required for listing actions sps[cmd].add_argument("-g", "--genome", required=cmd in GETSEQ_CMD, help="Reference assembly ID, e.g. mm10.") for cmd in LIST_REMOTE_CMD, LIST_LOCAL_CMD: sps[cmd].add_argument("-g", "--genome", required=False, type=str, nargs="*", help="Reference assembly ID, e.g. mm10.") for cmd in [ PULL_CMD, GET_ASSET_CMD, BUILD_CMD, INSERT_CMD, REMOVE_CMD, TAG_CMD, ID_CMD ]: sps[cmd].add_argument( "asset_registry_paths", metavar="asset-registry-paths", type=str, nargs='+', help= "One or more registry path strings that identify assets (e.g. hg38/fasta or hg38/fasta:tag" + (" or hg38/fasta.fai:tag)." if cmd == GET_ASSET_CMD else ").")) for cmd in [PULL_CMD, REMOVE_CMD, INSERT_CMD]: sps[cmd].add_argument( "-f", "--force", action="store_true", help="Do not prompt before action, approve upfront.") sps[PULL_CMD].add_argument("-u", "--no-untar", action="store_true", help="Do not extract tarballs.") sps[INSERT_CMD].add_argument("-p", "--path", required=True, help="Relative local path to asset.") sps[GETSEQ_CMD].add_argument( "-l", "--locus", required=True, help="Coordinates of desired sequence; e.g. 'chr1:50000-50200'.") sps[GET_ASSET_CMD].add_argument( "-e", "--check-exists", required=False, action="store_true", help="Whether the returned asset path should be checked for existence " "on disk.") group = sps[TAG_CMD].add_mutually_exclusive_group(required=True) group.add_argument("-t", "--tag", type=str, help="Tag to assign to an asset.") group.add_argument("-d", "--default", action="store_true", help="Set the selected asset tag as the default one.") sps[SUBSCRIBE_CMD].add_argument( "-r", "--reset", action="store_true", help="Overwrite the current list of server URLs.") for cmd in [SUBSCRIBE_CMD, UNSUBSCRIBE_CMD]: sps[cmd].add_argument( "-s", "--genome-server", nargs='+', required=True, help= "One or more URLs to {action} the {key} attribute in config file.". format(action="add to" if cmd == SUBSCRIBE_CMD else "remove from", key=CFG_SERVERS_KEY)) return parser
def build_argparser(): """ Builds argument parser. :return argparse.ArgumentParser """ banner = "%(prog)s - builds and manages reference genome assemblies" additional_description = "\nhttps://refgenie.databio.org" parser = _VersionInHelpParser(description=banner, epilog=additional_description) parser.add_argument("-V", "--version", action="version", version="%(prog)s {v}".format(v=__version__)) subparsers = parser.add_subparsers(dest="command") def add_subparser(cmd, description): return subparsers.add_parser(cmd, description=description, help=description) subparser_messages = { INIT_CMD: "Initialize a genome configuration.", LIST_LOCAL_CMD: "List available local genomes.", LIST_REMOTE_CMD: "List available genomes and assets on server.", PULL_CMD: "Download assets.", BUILD_CMD: "Build genome assets.", GET_ASSET_CMD: "Get the path to a local asset.", INSERT_CMD: "Insert a local asset into the configuration file." } sps = {} for cmd, desc in subparser_messages.items(): sps[cmd] = add_subparser(cmd, desc) # It's required for init sps[cmd].add_argument('-c', '--genome-config', required=(cmd == INIT_CMD), dest="genome_config", help="Path to local genome configuration file.") sps[INIT_CMD].add_argument( '-s', '--genome-server', default=DEFAULT_SERVER, help="URL to use for the genome_server attribute in config file." " Defaults : {}".format(DEFAULT_SERVER)) sps[BUILD_CMD] = pypiper.add_pypiper_args( sps[BUILD_CMD], groups=None, args=["recover", "config", "new-start"]) # Add any arguments specific to subcommands. sps[BUILD_CMD].add_argument( "-d", "--docker", action="store_true", help="Run all commands in the refgenie docker container.") sps[BUILD_CMD].add_argument( '-v', '--volumes', nargs="+", required=False, default=None, help='If using docker, also mount these folders as volumes') sps[BUILD_CMD].add_argument( '-o', '--outfolder', dest='outfolder', required=False, default=None, help='Override the default path to genomes folder, which is the ' 'genome_folder attribute in the genome configuration file.') for cmd in [PULL_CMD, GET_ASSET_CMD, BUILD_CMD, INSERT_CMD]: sps[cmd].add_argument("-g", "--genome", required=True, help="Reference assembly ID, e.g. mm10") sps[cmd].add_argument( "-a", "--asset", required=True, nargs='+', help="Name of one or more assets (keys in genome config file)") sps[PULL_CMD].add_argument("-u", "--no-untar", action="store_true", help="Do not extract tarballs.") sps[INSERT_CMD].add_argument("-p", "--path", required=True, help="Relative path to asset") # Finally, arguments to the build command to give the files needed to do # the building. These should eventually move to a more flexible system that # doesn't require them to be hard-coded here in order to be recognized for arg in BUILD_SPECIFIC_ARGS: sps[BUILD_CMD].add_argument("--{arg}".format(arg=arg), required=False, help=SUPPRESS) # sps[BUILD_CMD].add_argument( # '--fasta', required=False, help=SUPPRESS) # help='Local path or URL to genome sequence file in .fa, .fa.gz, ' # 'or .2bit format.' # sps[BUILD_CMD].add_argument( # '--gtf', required=False, help=SUPPRESS) # help='Path to GTF gene annotation file.' return parser
import sys import subprocess import yaml import pypiper parser = ArgumentParser( description="A pipeline to count the number of reads and file size. Accepts" " BAM, fastq, or fastq.gz files.") # First, add standard arguments from Pypiper. # groups="pypiper" will add all the arguments that pypiper uses, # and adding "common" adds arguments for --input and --sample--name # and "output_parent". You can read more about your options for standard # arguments in the pypiper docs (section "command-line arguments") parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "ngs"], args=["output-parent", "config"], required=['sample-name', 'output-parent']) # Add any pipeline-specific arguments if you like here. args = parser.parse_args() if not args.input or not args.output_parent: parser.print_help() raise SystemExit if args.single_or_paired == "paired": args.paired_end = True else: args.paired_end = False
def build_argparser(): """ Builds argument parser. :return argparse.ArgumentParser """ banner = "%(prog)s - reference genome asset manager" additional_description = "\nhttps://refgenie.databio.org" parser = VersionInHelpParser( prog="refgenie", version=f"{__version__} | refgenconf {rgc_version}", description=banner, epilog=additional_description, ) subparsers = parser.add_subparsers(dest="command") def add_subparser(cmd, msg, subparsers): return subparsers.add_parser( cmd, description=msg, help=msg, formatter_class=lambda prog: HelpFormatter( prog, max_help_position=40, width=90), ) sps = {} for cmd, desc in SUBPARSER_MESSAGES.items(): sps[cmd] = add_subparser(cmd, desc, subparsers) # alias is nested and alias subcommands require config path if cmd == ALIAS_CMD: continue # It's required for init sps[cmd].add_argument( "-c", "--genome-config", required=(cmd == INIT_CMD), dest="genome_config", metavar="C", help= "Path to local genome configuration file. Optional if {} environment variable is set." .format(", ".join(CFG_ENV_VARS)), ) sps[cmd].add_argument( "--skip-read-lock", required=False, action="store_true", help="Whether the config file should not be locked for reading", ) # upgrade: upgrade config and alter file structure to the target version sps[UPGRADE_CMD].add_argument( "-v", "--target-version", required=True, metavar="V", help="Target config version for the upgrade.", ) sps[UPGRADE_CMD].add_argument( "-f", "--force", action="store_true", help="Do not prompt before action, approve upfront.", ) sps[INIT_CMD].add_argument( "-s", "--genome-server", nargs="+", default=[DEFAULT_SERVER], help= f"URL(s) to use for the {CFG_SERVERS_KEY} attribute in config file. Default: {DEFAULT_SERVER}.", ) sps[INIT_CMD].add_argument( "-f", "--genome-folder", help="Absolute path to parent folder refgenie-managed assets.", ) sps[INIT_CMD].add_argument( "-a", "--genome-archive-folder", help= "Absolute path to parent archive folder refgenie-managed assets; used by refgenieserver.", ) sps[INIT_CMD].add_argument( "-b", "--genome-archive-config", help= "Absolute path to desired archive config file; used by refgenieserver.", ) sps[INIT_CMD].add_argument( "-u", "--remote-url-base", help= "URL to use as an alternative, remote archive location; used by refgenieserver.", ) sps[INIT_CMD].add_argument( "-j", "--settings-json", help="Absolute path to a JSON file with the key " "value pairs to inialize the configuration " "file with. Overwritten by itemized specifications.", ) sps[BUILD_CMD] = pypiper.add_pypiper_args( sps[BUILD_CMD], groups=None, args=["recover", "config", "new-start"]) # Add any arguments specific to subcommands. sps[BUILD_CMD].add_argument( "--tag-description", required=False, default=None, type=str, help="Add tag level description (e.g. built with version 0.3.2).", ) sps[BUILD_CMD].add_argument( "--genome-description", required=False, default=None, type=str, help= "Add genome level description (e.g. The mouse mitochondrial genome, released in Dec 2013).", ) sps[BUILD_CMD].add_argument( "-d", "--docker", action="store_true", help="Run all commands in the refgenie docker container.", ) sps[BUILD_CMD].add_argument( "--map", action="store_true", help= "Run the map procedure: build assets and store the metadata in separate configs.", ) sps[BUILD_CMD].add_argument( "--pull-parents", action="store_true", help= "Automatically pull the default parent asset if required but not provided", ) sps[BUILD_CMD].add_argument( "--preserve-map-configs", action="store_true", help= "Do not remove the genome configuration files produced in the potential map step of building", ) sps[BUILD_CMD].add_argument( "--reduce", action="store_true", help= "Run the reduce procedure: gather the metadata produced with `refgenie build --map`.", ) sps[BUILD_CMD].add_argument( "--assets", nargs="+", action="append", required=False, default=None, help="Override the default genome, asset and tag of the parents" " (e.g. fasta=hg38/fasta:default gtf=mm10/gencode_gtf:default).", ) sps[BUILD_CMD].add_argument( "--files", nargs="+", action="append", required=False, default=None, help= "Provide paths to the required files (e.g. fasta=/path/to/file.fa.gz).", ) sps[BUILD_CMD].add_argument( "--params", nargs="+", action="append", required=False, default=None, help="Provide required parameter values (e.g. param1=value1).", ) sps[BUILD_CMD].add_argument( "-v", "--volumes", nargs="+", required=False, default=None, help="If using docker, also mount these folders as volumes.", ) sps[BUILD_CMD].add_argument( "-q", "--requirements", action="store_true", help="Show the build requirements for the specified asset and exit.", ) sps[BUILD_CMD].add_argument( "-r", "--recipe", required=False, default=None, type=str, help="Provide a recipe to use.", ) alias_subparser = sps[ALIAS_CMD] alias_subsubparsers = alias_subparser.add_subparsers(dest="subcommand") alias_sps = {} for cmd, desc in ALIAS_SUBPARSER_MESSAGES.items(): alias_sps[cmd] = add_subparser(cmd, desc, alias_subsubparsers) alias_sps[cmd].add_argument( "-c", "--genome-config", required=False, dest="genome_config", metavar="C", help= "Path to local genome configuration file. Optional if {} environment variable is set." .format(", ".join(CFG_ENV_VARS)), ) alias_sps[cmd].add_argument( "--skip-read-lock", required=False, action="store_true", help="Whether the config file should not be locked for reading", ) alias_sps[ALIAS_SET_CMD].add_argument( "-a", "--aliases", metavar="A", required=False, default=None, type=str, nargs="+", help= "Aliases to set; single if the digest is to be retrieved from the server.", ) alias_sps[ALIAS_SET_CMD].add_argument( "-d", "--digest", metavar="D", required=False, type=str, help= "Digest to set; leave out if the digest is to be retrieved from the server.", ) alias_sps[ALIAS_SET_CMD].add_argument( "-r", "--reset", action="store_true", help= "Whether all the aliases should be removed prior to setting new ones.", ) alias_sps[ALIAS_SET_CMD].add_argument( "-f", "--force", action="store_true", help="Whether the action should be forced, if genome does not exist.", ) alias_sps[ALIAS_REMOVE_CMD].add_argument( "-a", "--aliases", metavar="A", required=False, default=None, type=str, nargs="+", help="Aliases to remove.", ) alias_sps[ALIAS_REMOVE_CMD].add_argument("-d", "--digest", metavar="D", required=True, type=str, help="Digest to remove.") alias_sps[ALIAS_GET_CMD].add_argument( "-a", "--aliases", metavar="A", required=False, type=str, nargs="+", help="Aliases to get the digests for.", ) sps[COMPARE_CMD].add_argument( "genome1", metavar="GENOME1", type=str, nargs=1, help="First genome for compatibility check.", ) sps[COMPARE_CMD].add_argument( "genome2", metavar="GENOME2", type=str, nargs=1, help="Second genome for compatibility check.", ) sps[COMPARE_CMD].add_argument( "-e", "--no-explanation", action="store_true", help="Do not print compatibility code explanation.", ) sps[COMPARE_CMD].add_argument( "-f", "--flag-meanings", action="store_true", help="Display compatibility flag meanings.", ) # add 'genome' argument to many commands for cmd in [ PULL_CMD, GET_ASSET_CMD, GET_REMOTE_ASSET_CMD, BUILD_CMD, INSERT_CMD, REMOVE_CMD, GETSEQ_CMD, TAG_CMD, ID_CMD, ]: # genome is not required for listing actions sps[cmd].add_argument( "-g", "--genome", required=cmd in GETSEQ_CMD, metavar="G", help="Reference assembly ID, e.g. mm10.", ) for cmd in LIST_REMOTE_CMD, LIST_LOCAL_CMD: sps[cmd].add_argument( "-g", "--genome", required=False, type=str, metavar="G", nargs="*", help="Reference assembly ID, e.g. mm10.", ) for cmd in [ PULL_CMD, GET_ASSET_CMD, GET_REMOTE_ASSET_CMD, BUILD_CMD, INSERT_CMD, REMOVE_CMD, TAG_CMD, ID_CMD, ]: build_arg_kwargs = dict( metavar="asset-registry-paths", type=str, nargs="+", help= "One or more registry path strings that identify assets (e.g. hg38/fasta or hg38/fasta:tag" + (" or hg38/fasta.fai:tag)." if cmd in [GET_ASSET_CMD, GET_REMOTE_ASSET_CMD] else ")."), ) # make asset-registry-path argument optional for build command # and require it manually in CLI when running a non-reduce build if cmd == BUILD_CMD: build_arg_kwargs.update({"nargs": "*", "default": None}) sps[cmd].add_argument("asset_registry_paths", **build_arg_kwargs) sps[LIST_LOCAL_CMD].add_argument("-r", "--recipes", action="store_true", help="List available recipes.") for cmd in [REMOVE_CMD, INSERT_CMD]: sps[cmd].add_argument( "-f", "--force", action="store_true", help="Do not prompt before action, approve upfront.", ) sps[REMOVE_CMD].add_argument( "-a", "--aliases", action="store_true", help= "Remove the genome alias if last asset for that genome is removed.", ) force_group = sps[PULL_CMD].add_argument_group( title="Prompt handling", description="These flags configure the pull prompt responses.", ) overwrite_group = force_group.add_mutually_exclusive_group() overwrite_group.add_argument("--no-overwrite", action="store_true", help="Do not overwrite if asset exists.") overwrite_group.add_argument("--force-overwrite", action="store_true", help="Overwrite if asset exists.") large_group = force_group.add_mutually_exclusive_group() large_group.add_argument("--no-large", action="store_true", help="Do not pull archives over 5GB.") large_group.add_argument( "--pull-large", action="store_true", help="Pull any archive, regardless of its size.", ) force_group.add_argument( "--size-cutoff", type=float, default=10, metavar="S", help= "Maximum archive file size to download with no confirmation required (in GB, default: 10)", ) force_group.add_argument( "-b", "--batch", action="store_true", help="Use batch mode: pull large archives, do no overwrite", ) sps[INSERT_CMD].add_argument("-p", "--path", required=True, metavar="P", help="Relative local path to asset.") sps[INSERT_CMD].add_argument( "-s", "--seek-keys", required=False, type=str, metavar="S", help=""" String representation of a JSON object with seek_keys, e.g. '{"seek_key1": "file.txt"}' """, ) sps[GETSEQ_CMD].add_argument( "-l", "--locus", required=True, help="Coordinates of desired sequence; e.g. 'chr1:50000-50200'.", ) sps[GET_ASSET_CMD].add_argument( "-e", "--check-exists", required=False, action="store_true", help= "Whether the returned asset path should be checked for existence on disk.", ) sps[TAG_CMD].add_argument( "-f", "--force", action="store_true", help="Do not prompt before action, approve upfront.", ) group = sps[TAG_CMD].add_mutually_exclusive_group(required=True) group.add_argument("-t", "--tag", type=str, help="Tag to assign to an asset.") group.add_argument( "-d", "--default", action="store_true", help="Set the selected asset tag as the default one.", ) sps[SUBSCRIBE_CMD].add_argument( "-r", "--reset", action="store_true", help="Overwrite the current list of server URLs.", ) for cmd in [SUBSCRIBE_CMD, UNSUBSCRIBE_CMD]: sps[cmd].add_argument( "-s", "--genome-server", nargs="+", required=True, metavar="S", help= "One or more URLs to {action} the {key} attribute in config file.". format( action="add to" if cmd == SUBSCRIBE_CMD else "remove from", key=CFG_SERVERS_KEY, ), ) for cmd in [LIST_REMOTE_CMD, GET_REMOTE_ASSET_CMD, POPULATE_REMOTE_CMD]: sps[cmd].add_argument( "-s", "--genome-server", nargs="+", required=False, metavar="S", help="One or more URLs to use. " "This information will not persist in the genome config file.", ) sps[cmd].add_argument( "-p", "--append-server", action="store_true", help="Whether the provided servers should be appended to the list.", ) for cmd in [POPULATE_REMOTE_CMD, GET_REMOTE_ASSET_CMD]: sps[cmd].add_argument( "--remote-class", metavar="RC", type=str, default="http", help="Remote data provider class, e.g. 'http' or 's3'", ) for cmd in [POPULATE_REMOTE_CMD, POPULATE_CMD]: sps[cmd].add_argument("-f", "--file", metavar="F", help="File with registry paths to populate") return parser
'specificity plots') parser.add_argument( "--bedbase-config", dest="bedbase_config", type=str, default=None, help="a path to the bedbase configuratiion file") parser.add_argument( "-y", "--sample-yaml", dest="sample_yaml", type=str, required=False, help="a yaml config file with sample attributes to pass on more metadata " "into the database") exclusive_group = parser.add_mutually_exclusive_group() exclusive_group.add_argument( '--no-db-commit', action='store_true', help='whether the JSON commit to the database should be skipped') exclusive_group.add_argument( '--just-db-commit', action='store_true', help='whether just to commit the JSON to the database') parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "looper", "ngs"]) args = parser.parse_args() bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config)) bed_digest = md5(open(args.bedfile, 'rb').read()).hexdigest() bedfile_name = os.path.split(args.bedfile)[1] # need to split twice since there are 2 exts fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0] outfolder = os.path.abspath(os.path.join( bbc[CFG_PATH_KEY][CFG_BEDSTAT_OUTPUT_KEY], bed_digest)) json_file_path = os.path.abspath(os.path.join(outfolder, fileid + ".json")) if not args.just_db_commit: pm = pypiper.PipelineManager(name="bedstat-pipeline", outfolder=outfolder,
action="store", type=str, nargs="*", help= "path to the chain file(s) ffacilitating conversion from one assembly to the other" ) parser.add_argument( "-f", "--outfolder", type=str, required=True, help="path to folder where pipeline logs and lifted files will be stored") # add pypiper args parser = pypiper.add_pypiper_args(parser, groups=["pypiper"], required=["--bedfile", "--genome"]) args = parser.parse_args() # Set output folder logs_name = "bedlifter_logs" logs_dir = os.path.join(args.outfolder, logs_name) if not os.path.exists(logs_dir): print("bedlifter logs directory doesn't exist. Creating one...") os.makedirs(logs_dir) def main(): pm = pypiper.PipelineManager(name="bedlifter", outfolder=logs_dir,
__author__ = "Martin Jaeger" __copyright__ = "Copyright 2018, Martin Jaeger" __credits__ = [] __license__ = "GPL3" __version__ = "0.3" __maintainer__ = "Martin Jaeger" __email__ = "*****@*****.**" __status__ = "development" ######################## ### Argument Parsing ### ######################## parser = ArgumentParser(description='Pypiper arguments.') parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "resource", "config"]) parser.add_argument( '-y', '--sample_yaml', dest='sample_config', help= 'yaml config file with sample attributes; this file will be generated by looper if submitting multiple jobs in parallel', type=str) parser.add_argument( '-i', '--input_file', dest='input_file', help= 'Path to input raw read BAM file(s). Space-separated paths will be merged before processing.', nargs='+',
from argparse import ArgumentParser import os, re import sys import os.path import subprocess import pypiper import yaml import shutil from datetime import datetime # Argument Parsing from yaml file # ####################################################################################### parser = ArgumentParser(description='Pipeline') parser = pypiper.add_pypiper_args( parser, groups=["config"], args=["sample-name", "recover", "new-start", "output-parent", "genome"]) #Add any pipeline-specific arguments parser.add_argument( '-I', '--input-dir', required=True, dest='input', type=str, help= "path to directory containing input bam files (and narrowpeak files if applicable) (required)" ) parser.add_argument('-gs', '--genome-size', default="hs",