def main(): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command') # Generate subparsers subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the ADAM preprocessing pipeline') parser_run.add_argument('--config', default='adam_preprocessing.config', type=str, help='Path to the (filled in) config file, generated with "generate-config". ' '\nDefault value: "%(default)s"') parser_run.add_argument('--sample', help='The S3 URL or local path to the input SAM or BAM file.' 'NOTE: unlike other pipelines, we do not support ftp://, gnos://, etc. schemes.') parser_run.add_argument('--output-dir', required=True, default=None, help='full path where final results will be output') parser_run.add_argument('-s', '--suffix', default='', help='Additional suffix to add to the names of the output files') Job.Runner.addToilOptions(parser_run) args = parser.parse_args() cwd = os.getcwd() if args.command == 'generate-config': generate_file(os.path.join(cwd, 'adam-preprocessing.config'), generate_config) # Pipeline execution elif args.command == 'run': require(os.path.exists(args.config), '{} not found. Please run ' 'generate-config'.format(args.config)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} inputs = argparse.Namespace(**parsed_config) require(not (inputs.master_ip and inputs.num_nodes), 'Only one of master_ip and num_nodes can be provided.') if not hasattr(inputs, 'master_ip'): require(inputs.num_nodes > 1, 'num_nodes allocates one Spark/HDFS master and n-1 workers, and ' 'thus must be greater than 1. %d was passed.' % inputs.num_nodes) for arg in [inputs.dbsnp, inputs.memory]: require(arg, 'Required argument {} missing from config'.format(arg)) Job.Runner.startToil(Job.wrapJobFn(static_adam_preprocessing_dag, inputs, args.sample, args.output_dir), args)
def main(): """ GATK germline pipeline with variant filtering and annotation. """ # Define Parser object and add to jobTree parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter) # Generate subparsers subparsers = parser.add_subparsers(dest='command') subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.') subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the GATK germline pipeline') parser_run.add_argument('--config', required=True, type=str, help='Path to the (filled in) config file, generated with ' '"generate-config".') parser_run.add_argument('--manifest', type=str, help='Path to the (filled in) manifest file, generated with ' '"generate-manifest".\nDefault value: "%(default)s".') parser_run.add_argument('--sample', default=None, nargs=2, type=str, help='Input sample identifier and BAM file URL or local path') parser_run.add_argument('--output-dir', default=None, help='Path/URL to output directory') parser_run.add_argument('-s', '--suffix', default=None, help='Additional suffix to add to the names of the output files') parser_run.add_argument('--preprocess-only', action='store_true', help='Only runs preprocessing steps') Job.Runner.addToilOptions(parser_run) options = parser.parse_args() cwd = os.getcwd() if options.command == 'generate-config' or options.command == 'generate': generate_file(os.path.join(cwd, 'config-toil-germline.yaml'), generate_config) if options.command == 'generate-manifest' or options.command == 'generate': generate_file(os.path.join(cwd, 'manifest-toil-germline.tsv'), generate_manifest) elif options.command == 'run': # Program checks for program in ['curl', 'docker']: require(next(which(program)), program + ' must be installed on every node.'.format(program)) require(os.path.exists(options.config), '{} not found. Please run "generate-config"'.format(options.config)) # Read sample manifest samples = [] if options.manifest: samples.extend(parse_manifest(options.manifest)) # Add BAM sample from command line if options.sample: uuid, url = options.sample # samples tuple: (uuid, url, paired_url, rg_line) # BAM samples should not have as paired URL or read group line samples.append(GermlineSample(uuid, url, None, None)) require(len(samples) > 0, 'No samples were detected in the manifest or on the command line') # Parse inputs inputs = {x.replace('-', '_'): y for x, y in yaml.load(open(options.config).read()).iteritems()} required_fields = {'genome_fasta', 'output_dir', 'run_bwa', 'sorted', 'snp_filter_annotations', 'indel_filter_annotations', 'preprocess', 'preprocess_only', 'run_vqsr', 'joint_genotype', 'run_oncotator', 'cores', 'file_size', 'xmx', 'suffix'} input_fields = set(inputs.keys()) require(input_fields > required_fields, 'Missing config parameters:\n{}'.format(', '.join(required_fields - input_fields))) if inputs['output_dir'] is None: inputs['output_dir'] = options.output_dir require(inputs['output_dir'] is not None, 'Missing output directory PATH/URL') if inputs['suffix'] is None: inputs['suffix'] = options.suffix if options.suffix else '' if inputs['preprocess_only'] is None: inputs['preprocess_only'] = options.preprocess_only if inputs['run_vqsr']: # Check that essential VQSR parameters are present vqsr_fields = {'g1k_snp', 'mills', 'dbsnp', 'hapmap', 'omni'} require(input_fields > vqsr_fields, 'Missing parameters for VQSR:\n{}'.format(', '.join(vqsr_fields - input_fields))) # Check that hard filtering parameters are present. If only running preprocessing steps, then we do # not need filtering information. elif not inputs['preprocess_only']: hard_filter_fields = {'snp_filter_name', 'snp_filter_expression', 'indel_filter_name', 'indel_filter_expression'} require(input_fields > hard_filter_fields, 'Missing parameters for hard filtering:\n{}'.format(', '.join(hard_filter_fields - input_fields))) # Check for falsey hard filtering parameters for hard_filter_field in hard_filter_fields: require(inputs[hard_filter_field], 'Missing %s value for hard filtering, ' 'got %s.' % (hard_filter_field, inputs[hard_filter_field])) # Set resource parameters inputs['xmx'] = human2bytes(inputs['xmx']) inputs['file_size'] = human2bytes(inputs['file_size']) inputs['cores'] = int(inputs['cores']) inputs['annotations'] = set(inputs['snp_filter_annotations'] + inputs['indel_filter_annotations']) # HaplotypeCaller test data for testing inputs['hc_output'] = inputs.get('hc_output', None) # It is a toil-scripts convention to store input parameters in a Namespace object config = argparse.Namespace(**inputs) root = Job.wrapJobFn(run_gatk_germline_pipeline, samples, config) Job.Runner.startToil(root, options)
def main(): """toil-signalAlign master script """ def parse_args(): parser = argparse.ArgumentParser(description=print_help.__doc__, formatter_class=argparse.RawTextHelpFormatter) subparsers = parser.add_subparsers(dest="command") # parsers for running the full pipeline run_parser = subparsers.add_parser("run", help="runs full workflow on a BAM") run_parser.add_argument('--config', default='config-toil-signalAlign.yaml', type=str, help='Path to the (filled in) config file, generated with "generate".') run_parser.add_argument('--manifest', default='manifest-toil-signalAlign.tsv', type=str, help='Path to the (filled in) manifest file, generated with "generate". ' '\nDefault value: "%(default)s".') subparsers.add_parser("generate", help="generates a config file for your run, do this first") # parsers for running the readstore pipeline readstore_parser = subparsers.add_parser("run-readstore", help="generates a readstore from a tar of .fast5s") readstore_parser.add_argument('--config', default='config-toil-signalAlign-readstore.yaml', type=str, help='Path to the (filled in) config file, generated with "generate".') readstore_parser.add_argument('--manifest', default='manifest-toil-signalAlign-readstore.tsv', type=str, help='Path to the (filled in) manifest file, generated with "generate". ' '\nDefault value: "%(default)s".') subparsers.add_parser("generate-readstore", help="generates a config file for making a readstore") Job.Runner.addToilOptions(run_parser) Job.Runner.addToilOptions(readstore_parser) return parser.parse_args() def exitBadInput(message=None): if message is not None: print(message, file=sys.stderr) sys.exit(1) if len(sys.argv) == 1: exitBadInput(print_help()) cwd = os.getcwd() args = parse_args() if args.command == "generate" or args.command == "generate-readstore": if args.command == "generate": config_filename = "config-toil-signalAlign.yaml" manifest_filename = "manifest-toil-signalAlign.tsv" else: config_filename = "config-toil-signalAlign-readstore.yaml" manifest_filename = "manifest-toil-signalAlign-readstore.tsv" configGenerator = partial(generateConfig, command=args.command) manifestGenerator = partial(generateManifest, command=args.command) try: config_path = os.path.join(cwd, config_filename) generate_file(config_path, configGenerator) except UserError: print("[toil-nanopore]NOTICE using existing config file {}".format(config_path)) pass try: manifest_path = os.path.join(cwd, manifest_filename) generate_file(manifest_path, manifestGenerator) except UserError: print("[toil-nanopore]NOTICE using existing manifest {}".format(manifest_path)) elif args.command == "run": require(os.path.exists(args.config), "{config} not found run generate".format(config=args.config)) # Parse config config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} samples = parseManifest(args.manifest) for sample in samples: with Toil(args) as toil: if not toil.options.restart: root_job = Job.wrapJobFn(signalAlignCheckInputJobFunction, config, sample) return toil.start(root_job) else: toil.restart() elif args.command == "run-readstore": require(os.path.exists(args.config), "{config} not found run generate-readstore".format(config=args.config)) # Parse config config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} samples = parseManifestReadstore(args.manifest) with Toil(args) as toil: if not toil.options.restart: root_job = Job.wrapJobFn(makeReadstoreJobFunction, config, samples) return toil.start(root_job) else: toil.restart()
def main(): """ Computational Genomics Lab, Genomics Institute, UC Santa Cruz Toil BWA pipeline Alignment of fastq reads via BWA-kit General usage: 1. Type "toil-bwa generate" to create an editable manifest and config in the current working directory. 2. Parameterize the pipeline by editing the config. 3. Fill in the manifest with information pertaining to your samples. 4. Type "toil-bwa run [jobStore]" to execute the pipeline. Please read the README.md located in the source directory or at: https://github.com/BD2KGenomics/toil-scripts/tree/master/src/toil_scripts/bwa_alignment Structure of the BWA pipeline (per sample) 0 --> 1 0 = Download sample 1 = Run BWA-kit =================================================================== :Dependencies: cURL: apt-get install curl Toil: pip install toil Docker: wget -qO- https://get.docker.com/ | sh Optional: S3AM: pip install --s3am (requires ~/.boto config file) Boto: pip install boto """ # Define Parser object and add to Toil parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter) subparsers = parser.add_subparsers(dest='command') # Generate subparsers subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.') subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the BWA alignment pipeline') group = parser_run.add_mutually_exclusive_group() parser_run.add_argument('--config', default='config-toil-bwa.yaml', type=str, help='Path to the (filled in) config file, generated with "generate-config".') group.add_argument('--manifest', default='manifest-toil-bwa.tsv', type=str, help='Path to the (filled in) manifest file, generated with "generate-manifest". ' '\nDefault value: "%(default)s".') group.add_argument('--sample', nargs='+', action=required_length(2, 3), help='Space delimited sample UUID and fastq files in the format: uuid url1 [url2].') # Print docstring help if no arguments provided if len(sys.argv) == 1: parser.print_help() sys.exit(1) Job.Runner.addToilOptions(parser_run) args = parser.parse_args() # Parse subparsers related to generation of config and manifest cwd = os.getcwd() if args.command == 'generate-config' or args.command == 'generate': generate_file(os.path.join(cwd, 'config-toil-bwa.yaml'), generate_config) if args.command == 'generate-manifest' or args.command == 'generate': generate_file(os.path.join(cwd, 'manifest-toil-bwa.tsv'), generate_manifest) # Pipeline execution elif args.command == 'run': require(os.path.exists(args.config), '{} not found. Please run generate-config'.format(args.config)) if not args.sample: args.sample = None require(os.path.exists(args.manifest), '{} not found and no sample provided. ' 'Please run "generate-manifest"'.format(args.manifest)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} config = argparse.Namespace(**parsed_config) config.maxCores = int(args.maxCores) if args.maxCores else sys.maxint samples = [args.sample[0], args.sample[1:]] if args.sample else parse_manifest(args.manifest) # Sanity checks require(config.ref, 'Missing URL for reference file: {}'.format(config.ref)) require(config.output_dir, 'No output location specified: {}'.format(config.output_dir)) # Launch Pipeline Job.Runner.startToil(Job.wrapJobFn(download_reference_files, config, samples), args)
def test_generate_file(tmpdir): from toil_lib.files import generate_file work_dir = str(tmpdir) test_path = os.path.join(work_dir, 'test_file') generate_file(test_path, _generate_func) assert open(test_path).read().strip() == 'test'
def main(): """ This is a Toil pipeline used to perform alignment of fastqs. """ # Define Parser object and add to Toil if mock_mode(): usage_msg = 'You have the TOIL_SCRIPTS_MOCK_MODE environment variable set, so this pipeline ' \ 'will run in mock mode. To disable mock mode, set TOIL_SCRIPTS_MOCK_MODE=0' else: usage_msg = None parser = argparse.ArgumentParser(usage=usage_msg) subparsers = parser.add_subparsers(dest='command') subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.') subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the ADAM/GATK pipeline') default_config = 'adam-gatk-mock.config' if mock_mode() else 'adam-gatk.config' default_manifest = 'adam-gatk-mock-manifest.csv' if mock_mode() else 'adam-gatk-manifest.csv' parser_run.add_argument('--config', default=default_config, type=str, help='Path to the (filled in) config file, generated with "generate-config".') parser_run.add_argument('--manifest', default=default_manifest, type=str, help='Path to the (filled in) manifest file, generated with "generate-manifest". ' '\nDefault value: "%(default)s".') Job.Runner.addToilOptions(parser_run) args = parser.parse_args() cwd = os.getcwd() if args.command == 'generate-config' or args.command == 'generate': generate_file(os.path.join(cwd, default_config), generate_config) if args.command == 'generate-manifest' or args.command == 'generate': generate_file(os.path.join(cwd, default_manifest), generate_manifest) # Pipeline execution elif args.command == 'run': require(os.path.exists(args.config), '{} not found. Please run ' 'generate-config'.format(args.config)) if not hasattr(args, 'sample'): require(os.path.exists(args.manifest), '{} not found and no samples provided. Please ' 'run "generate-manifest"'.format(args.manifest)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} inputs = argparse.Namespace(**parsed_config) # Parse manifest file uuid_list = [] with open(args.manifest) as f_manifest: for line in f_manifest: if not line.isspace() and not line.startswith('#'): uuid_list.append(line.strip()) inputs.sort = False if not inputs.dir_suffix: inputs.dir_suffix = '' if not inputs.s3_bucket: inputs.s3_bucket = '' if inputs.master_ip and inputs.num_nodes: raise ValueError("Exactly one of master_ip (%s) and num_nodes (%d) must be provided." % (inputs.master_ip, inputs.num_nodes)) if not hasattr(inputs, 'master_ip') and inputs.num_nodes <= 1: raise ValueError('num_nodes allocates one Spark/HDFS master and n-1 workers, and thus must be greater ' 'than 1. %d was passed.' % inputs.num_nodes) if (inputs.pipeline_to_run != "adam" and inputs.pipeline_to_run != "gatk" and inputs.pipeline_to_run != "both"): raise ValueError("pipeline_to_run must be either 'adam', 'gatk', or 'both'. %s was passed." % inputs.pipeline_to_run) Job.Runner.startToil(Job.wrapJobFn(sample_loop, uuid_list, inputs), args)
def main(): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command') # Generate subparsers subparsers.add_parser( 'generate-config', help='Generates an editable config in the current working directory.') # Run subparser parser_run = subparsers.add_parser( 'run', help='Runs the avocado variant calling pipeline') parser_run.add_argument( '--config', default='avocado.config', type=str, help= 'Path to the (filled in) config file, generated with "generate-config". ' '\nDefault value: "%(default)s"') parser_run.add_argument( '--sample', help='The S3 URL or local path to the input SAM or BAM file.' 'NOTE: unlike other pipelines, we do not support ftp://, gnos://, etc. schemes.' ) parser_run.add_argument( '--output-dir', required=True, default=None, help='full path where final results will be output') parser_run.add_argument( '-s', '--suffix', default='', help='Additional suffix to add to the names of the output files') Job.Runner.addToilOptions(parser_run) args = parser.parse_args() cwd = os.getcwd() if args.command == 'generate-config': generate_file(os.path.join(cwd, 'avocado.config'), generate_config) # Pipeline execution elif args.command == 'run': require( os.path.exists(args.config), '{} not found. Please run ' 'generate-config'.format(args.config)) # Parse config parsed_config = { x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems() } inputs = argparse.Namespace(**parsed_config) require(not (inputs.master_ip and (inputs.num_nodes > 0)), 'Only one of master_ip and num_nodes can be provided.') if not hasattr(inputs, 'master_ip'): require( inputs.num_nodes > 1, 'num_nodes allocates one Spark/HDFS master and n-1 workers, and ' 'thus must be greater than 1. %d was passed.' % inputs.num_nodes) for arg in [inputs.memory]: require(arg, 'Required argument {} missing from config'.format(arg)) Job.Runner.startToil( Job.wrapJobFn(static_avocado_dag, inputs, args.sample, args.output_dir), args)
def main(): """ Computational Genomics Lab, Genomics Institute, UC Santa Cruz Toil BWA pipeline Alignment of fastq reads via BWA-kit General usage: 1. Type "toil-bwa generate" to create an editable manifest and config in the current working directory. 2. Parameterize the pipeline by editing the config. 3. Fill in the manifest with information pertaining to your samples. 4. Type "toil-bwa run [jobStore]" to execute the pipeline. Please read the README.md located in the source directory or at: https://github.com/BD2KGenomics/toil-scripts/tree/master/src/toil_scripts/bwa_alignment Structure of the BWA pipeline (per sample) 0 --> 1 0 = Download sample 1 = Run BWA-kit =================================================================== :Dependencies: cURL: apt-get install curl Toil: pip install toil Docker: wget -qO- https://get.docker.com/ | sh Optional: S3AM: pip install --s3am (requires ~/.boto config file) Boto: pip install boto """ # Define Parser object and add to Toil parser = argparse.ArgumentParser( description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter) subparsers = parser.add_subparsers(dest='command') # Generate subparsers subparsers.add_parser( 'generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser( 'generate-manifest', help='Generates an editable manifest in the current working directory.' ) subparsers.add_parser( 'generate', help='Generates a config and manifest in the current working directory.' ) # Run subparser parser_run = subparsers.add_parser('run', help='Runs the BWA alignment pipeline') group = parser_run.add_mutually_exclusive_group() parser_run.add_argument( '--config', default='config-toil-bwa.yaml', type=str, help= 'Path to the (filled in) config file, generated with "generate-config".' ) group.add_argument( '--manifest', default='manifest-toil-bwa.tsv', type=str, help= 'Path to the (filled in) manifest file, generated with "generate-manifest". ' '\nDefault value: "%(default)s".') group.add_argument( '--sample', nargs='+', action=required_length(2, 3), help= 'Space delimited sample UUID and fastq files in the format: uuid url1 [url2].' ) # Print docstring help if no arguments provided if len(sys.argv) == 1: parser.print_help() sys.exit(1) Job.Runner.addToilOptions(parser_run) args = parser.parse_args() # Parse subparsers related to generation of config and manifest cwd = os.getcwd() if args.command == 'generate-config' or args.command == 'generate': generate_file(os.path.join(cwd, 'config-toil-bwa.yaml'), generate_config) if args.command == 'generate-manifest' or args.command == 'generate': generate_file(os.path.join(cwd, 'manifest-toil-bwa.tsv'), generate_manifest) # Pipeline execution elif args.command == 'run': require(os.path.exists(args.config), '{} not found. Please run generate-config'.format(args.config)) if not args.sample: args.sample = None require( os.path.exists(args.manifest), '{} not found and no sample provided. ' 'Please run "generate-manifest"'.format(args.manifest)) # Parse config parsed_config = { x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems() } config = argparse.Namespace(**parsed_config) config.maxCores = int(args.maxCores) if args.maxCores else sys.maxint samples = [args.sample[0], args.sample[1:] ] if args.sample else parse_manifest(args.manifest) # Sanity checks require(config.ref, 'Missing URL for reference file: {}'.format(config.ref)) require(config.output_dir, 'No output location specified: {}'.format(config.output_dir)) # Launch Pipeline Job.Runner.startToil( Job.wrapJobFn(download_reference_files, config, samples), args)
def main(): def parse_args(): parser = argparse.ArgumentParser( description=print_help.__doc__, formatter_class=argparse.RawTextHelpFormatter) subparsers = parser.add_subparsers(dest="command") run_parser = subparsers.add_parser( "run", help="runs nanopore pipeline with config on samples in manifest") subparsers.add_parser( "generate", help= "generates config and manifest files for your run, do this first") run_parser.add_argument( "--config", default="config-toil-nanopore.yaml", type=str, help= 'Path to the (filled in) config file, generated with "generate".') run_parser.add_argument( '--manifest', default='manifest-toil-nanopore.tsv', type=str, help= 'Path to the (filled in) manifest file, generated with "generate". ' '\nDefault value: "%(default)s".') Job.Runner.addToilOptions(run_parser) return parser.parse_args() def exitBadInput(message=None): if message is not None: print(message, file=sys.stderr) sys.exit(1) if len(sys.argv) == 1: exitBadInput(print_help()) cwd = os.getcwd() args = parse_args() if args.command == "generate": try: config_path = os.path.join(cwd, "config-toil-nanopore.yaml") generate_file(config_path, generateConfig) except UserError: print("[toil-nanopore]NOTICE using existing config file {}".format( config_path)) pass try: manifest_path = os.path.join(cwd, "manifest-toil-nanopore.tsv") generate_file(manifest_path, generateManifest) except UserError: print("[toil-nanopore]NOTICE using existing manifest {}".format( manifest_path)) elif args.command == "run": require( os.path.exists(args.config), "{config} not found run generate-config".format( config=args.config)) # Parse config config = { x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems() } samples = parseManifest(args.manifest) for sample in samples: with Toil(args) as toil: if not toil.options.restart: root_job = Job.wrapJobFn(marginAlignRootJobFunction, config, sample) return toil.start(root_job) else: toil.restart()
def main(): """ GATK germline pipeline with variant filtering and annotation. """ # Define Parser object and add to jobTree parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawTextHelpFormatter) # Generate subparsers subparsers = parser.add_subparsers(dest='command') subparsers.add_parser( 'generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser( 'generate-manifest', help='Generates an editable manifest in the current working directory.' ) subparsers.add_parser( 'generate', help='Generates a config and manifest in the current working directory.' ) # Run subparser parser_run = subparsers.add_parser('run', help='Runs the GATK germline pipeline') parser_run.add_argument( '--config', required=True, type=str, help='Path to the (filled in) config file, generated with ' '"generate-config".') parser_run.add_argument( '--manifest', type=str, help='Path to the (filled in) manifest file, generated with ' '"generate-manifest".\nDefault value: "%(default)s".') parser_run.add_argument( '--sample', default=None, nargs=2, type=str, help='Input sample identifier and BAM file URL or local path') parser_run.add_argument('--output-dir', default=None, help='Path/URL to output directory') parser_run.add_argument( '-s', '--suffix', default=None, help='Additional suffix to add to the names of the output files') parser_run.add_argument('--preprocess-only', action='store_true', help='Only runs preprocessing steps') Job.Runner.addToilOptions(parser_run) options = parser.parse_args() cwd = os.getcwd() if options.command == 'generate-config' or options.command == 'generate': generate_file(os.path.join(cwd, 'config-toil-germline.yaml'), generate_config) if options.command == 'generate-manifest' or options.command == 'generate': generate_file(os.path.join(cwd, 'manifest-toil-germline.tsv'), generate_manifest) elif options.command == 'run': # Program checks for program in ['curl', 'docker']: require( next(which(program)), program + ' must be installed on every node.'.format(program)) require( os.path.exists(options.config), '{} not found. Please run "generate-config"'.format( options.config)) # Read sample manifest samples = [] if options.manifest: samples.extend(parse_manifest(options.manifest)) # Add BAM sample from command line if options.sample: uuid, url = options.sample # samples tuple: (uuid, url, paired_url, rg_line) # BAM samples should not have as paired URL or read group line samples.append(GermlineSample(uuid, url, None, None)) require( len(samples) > 0, 'No samples were detected in the manifest or on the command line') # Parse inputs inputs = { x.replace('-', '_'): y for x, y in yaml.load(open(options.config).read()).iteritems() } required_fields = { 'genome_fasta', 'output_dir', 'run_bwa', 'sorted', 'snp_filter_annotations', 'indel_filter_annotations', 'preprocess', 'preprocess_only', 'run_vqsr', 'joint_genotype', 'run_oncotator', 'cores', 'file_size', 'xmx', 'suffix' } input_fields = set(inputs.keys()) require( input_fields > required_fields, 'Missing config parameters:\n{}'.format(', '.join(required_fields - input_fields))) if inputs['output_dir'] is None: inputs['output_dir'] = options.output_dir require(inputs['output_dir'] is not None, 'Missing output directory PATH/URL') if inputs['suffix'] is None: inputs['suffix'] = options.suffix if options.suffix else '' if inputs['preprocess_only'] is None: inputs['preprocess_only'] = options.preprocess_only if inputs['run_vqsr']: # Check that essential VQSR parameters are present vqsr_fields = {'g1k_snp', 'mills', 'dbsnp', 'hapmap', 'omni'} require( input_fields > vqsr_fields, 'Missing parameters for VQSR:\n{}'.format( ', '.join(vqsr_fields - input_fields))) # Check that hard filtering parameters are present. If only running preprocessing steps, then we do # not need filtering information. elif not inputs['preprocess_only']: hard_filter_fields = { 'snp_filter_name', 'snp_filter_expression', 'indel_filter_name', 'indel_filter_expression' } require( input_fields > hard_filter_fields, 'Missing parameters for hard filtering:\n{}'.format( ', '.join(hard_filter_fields - input_fields))) # Check for falsey hard filtering parameters for hard_filter_field in hard_filter_fields: require( inputs[hard_filter_field], 'Missing %s value for hard filtering, ' 'got %s.' % (hard_filter_field, inputs[hard_filter_field])) # Set resource parameters inputs['xmx'] = human2bytes(inputs['xmx']) inputs['file_size'] = human2bytes(inputs['file_size']) inputs['cores'] = int(inputs['cores']) inputs['annotations'] = set(inputs['snp_filter_annotations'] + inputs['indel_filter_annotations']) # HaplotypeCaller test data for testing inputs['hc_output'] = inputs.get('hc_output', None) # It is a toil-scripts convention to store input parameters in a Namespace object config = argparse.Namespace(**inputs) root = Job.wrapJobFn(run_gatk_germline_pipeline, samples, config) Job.Runner.startToil(root, options)