def add_archive_command(cmdparser): """Create a parser for the 'archive' command """ p = cmdparser.add_command('archive',help="Copy analyses to 'archive' area", usage="%prog archive [OPTIONS] [ANALYSIS_DIR]", version="%prog "+__version__, description="Copy sequencing analysis data directory " "ANALYSIS_DIR to 'archive' destination.") p.add_option('--archive_dir',action='store', dest='archive_dir',default=None, help="specify top-level archive directory to copy data under. " "ARCHIVE_DIR can be a local directory, or a remote location in the " "form '[[user@]host:]directory'. Overrides the default settings.") p.add_option('--platform',action='store', dest='platform',default=None, help="specify the platform e.g. 'hiseq', 'miseq' etc (overrides " "automatically determined platform, if any). Use 'other' for cases " "where the platform is unknown.") p.add_option('--year',action='store', dest='year',default=None, help="specify the year e.g. '2014' (default is the current year)") default_group = __settings.archive.group p.add_option('--group',action='store',dest='group',default=default_group, help="specify the name of group for the archived files NB only works " "when the archive is a local directory (default: %s)" % default_group) default_chmod = __settings.archive.chmod p.add_option('--chmod',action='store',dest='chmod',default=default_chmod, help="specify chmod operations for the archived files (default: " "%s)" % default_chmod) p.add_option('--force',action='store_true',dest='force',default=False, help="perform archiving operation even if key metadata items are " "not set") add_dry_run_option(p) add_debug_option(p)
def test_add_debug_option_with_argparse(self): """add_debug_option enables '--debug' with ArgumentParser """ p = ArgumentParser() add_debug_option(p) args = p.parse_args(['--debug']) self.assertTrue(args.debug)
def add_publish_qc_command(cmdparser): """Create a parser for the 'publish_qc' command """ p = cmdparser.add_command('publish_qc',help="Copy QC reports to publication area", usage="%prog publish_qc [OPTIONS] [ANALYSIS_DIR]", description="Copy QC reports from ANALYSIS_DIR to local " "or remote directory (e.g. web server). By default existing " "QC reports will be copied without further checking; if no " "report is found then QC results will be verified and a " "report generated first.") p.add_option('--projects',action='store', dest='project_pattern',default=None, help="simple wildcard-based pattern specifying a subset of projects " "and samples to publish the QC for. PROJECT_PATTERN can specify a " "single project, or a set of projects.") p.add_option('--qc_dir',action='store', dest='qc_dir',default=None, help="specify target directory to copy QC reports to. QC_DIR can " "be a local directory, or a remote location in the form " "'[[user@]host:]directory'. Overrides the default settings.") p.add_option('--ignore-missing-qc',action='store_true', dest='ignore_missing_qc',default=False, help="skip projects where QC results are missing or can't be verified, " "or where reports can't be generated.") p.add_option('--regenerate-reports',action='store_true', dest='regenerate_reports',default=False, help="attempt to regenerate existing QC reports") p.add_option('--force',action='store_true', dest='force',default=False, help="force generation of QC reports for all projects even " "if verification has failed") add_debug_option(p)
def add_import_project_command(cmdparser): """Create a parser for the 'import_project' command """ p = cmdparser.add_command('import_project',help="Import a project directory", usage="%prog import_project [OPTIONS] [ANALYSIS_DIR] PROJECT_DIR", description="Copy a project directory PROJECT_DIR into " "ANALYSIS_DIR.") add_debug_option(p)
def test_add_debug_option(self): """add_debug_option enables '--debug' """ # Skip the test if optparse not available if not OPTPARSE_AVAILABLE: raise unittest.SkipTest("'optparse' not available") p = OptionParser() add_debug_option(p) options, args = p.parse_args(['--debug']) self.assertTrue(options.debug)
def add_samplesheet_command(cmdparser): """Create a parser for the 'samplesheet' command """ p = cmdparser.add_command('samplesheet',help="Sample sheet manipulation", usage="%prog samplesheet [OPTIONS] [ANALYSIS_DIR]", description="Query and manipulate sample sheets") p.add_option('-e','--edit',action='store_true',dest='edit',default=False, help="bring up sample sheet file in an editor to make " "changes manually") add_debug_option(p)
def add_params_command(cmdparser): """Create a parser for the 'params' command """ p = cmdparser.add_command('params',help="Query and change project parameters", usage="%prog params [OPTIONS] [ANALYSIS_DIR]", description="Query and change processing parameters " "and settings for ANALYSIS_DIR.") p.add_option('--set',action='append',dest='key_value',default=None, help="Set the value of a parameter. KEY_VALUE should be of the form " "'<param>=<value>'. Multiple --set options can be specified.") add_debug_option(p)
def add_readme_command(cmdparser): """Create a parser for the 'readme' command """ p = cmdparser.add_command('readme',help="Add or amend top-level README file", usage="%prog readme [OPTIONS] [ANALYSIS_DIR]", description="Add or amend a README file in the " "analysis directory DIR.") p.add_option('--init',action='store_true',dest='init',default=False, help="create a new README file") p.add_option('--edit',action='store_true',dest='edit',default=False, help="bring up README file in an editor to make changes") add_debug_option(p)
def add_clone_command(cmdparser): """Create a parser for the 'clone' command """ p = cmdparser.add_command('clone',help="Make a copy of an analysis directory", usage="%prog clone [OPTIONS] DIR CLONE_DIR", description="Make a copy of an existing auto_processed analysis " "directory DIR, in a new directory CLONE_DIR. The clone will " "not include any project directories, but will copy the " "projects.info file.") p.add_option('--copy-fastqs',action='store_true',dest='copy_fastqs',default=False, help="Copy fastq.gz files from DIR into DIR2 (default is to make a " "link to the bcl-to-fastq directory)") add_debug_option(p)
def add_metadata_command(cmdparser): """Create a parser for the 'metadata' command """ p = cmdparser.add_command('metadata',help="Query and update analysis metadata", usage="%prog metadata [OPTIONS] [ANALYSIS_DIR]", description="Query and change metadata " "associated with ANALYSIS_DIR.") p.add_option('--set',action='append',dest='key_value',default=None, help="Set the value of a metadata item. KEY_VALUE should be of the form " "'<param>=<value>'. Multiple --set options can be specified.") p.add_option('--update',action='store_true',dest='update',default=False, help="Automatically update metadata items where possible " "(e.g. for older analyses which have old or missing metadata " "files)") add_debug_option(p)
def add_merge_fastq_dirs_command(cmdparser): """Create a parser for the 'merge_fastq_dirs' command """ p = cmdparser.add_command('merge_fastq_dirs',help="Combine bcl-to-fastq runs", usage="%prog merge_fastq_dirs [OPTIONS] [ANALYSIS_DIR]", description="Automatically merge fastq directories from " "multiple bcl-to-fastq runs within ANALYSIS_DIR. Use this " "command if 'make_fastqs' step was run multiple times to " "process subsets of lanes.") p.add_option('--primary-unaligned-dir',action='store', dest='unaligned_dir',default='bcl2fastq', help="merge fastqs from additional bcl-to-fastq directories into " "UNALIGNED_DIR. Original data will be moved out of the way first. " "Defaults to 'bcl2fastq'.") add_dry_run_option(p) add_debug_option(p)
def add_report_command(cmdparser): """Create a parser for the 'report' command """ p = cmdparser.add_command('report',help="Generate reporting information", usage="%prog report [OPTIONS] [ANALYSIS_DIR]", description="Report information on processed Illumina " "sequence data in ANALYSIS_DIR.") p.add_option('--logging',action='store_true',dest='logging',default=False, help="print short report suitable for logging file") p.add_option('--summary',action='store_true',dest='summary',default=False, help="print full report suitable for bioinformaticians") p.add_option('--projects',action='store_true',dest='projects',default=False, help="print tab-delimited line (one per project) suitable for " "injection into a spreadsheet") p.add_option('--full',action='store_true',dest='full',default=False, help="print summary report suitable for record-keeping") add_debug_option(p)
def add_analyse_barcodes_command(cmdparser): """Create a parser for the 'analyse_barcodes' command """ p = cmdparser.add_command('analyse_barcodes',help="Analyse index (barcode) sequences", usage="%prog analyse_barcodes [OPTIONS] [ANALYSIS_DIR]", version="%prog "+__version__, description="Analyse barcode sequences for fastq files " "in specified lanes in ANALYSIS_DIR, and report the most " "common barcodes found across all reads from each lane.") p.add_option('--unaligned-dir',action='store', dest='unaligned_dir',default='bcl2fastq', help="explicitly set the (sub)directory with bcl-to-fastq outputs") p.add_option('--lanes',action='store', dest='lanes',default=None, help="specify which lanes to analyse barcodes for (default is to do " "analysis for all lanes).") p.add_option('--mismatches',action='store',dest='mismatches', default=0,type='int', help="maximum number of mismatches to use when grouping " "similar barcodes (default is 0, i.e. no grouping)") p.add_option('--cutoff',action='store',dest='cutoff', default=0.001,type='float', help="exclude barcodes with a smaller fraction of " "associated reads than CUTOFF, e.g. '0.01' excludes " "barcodes with < 1% of reads (default is 0.01%)") p.add_option('--sample-sheet',action="store", dest="sample_sheet",default=None, help="use an alternative sample sheet to the default " "'custom_SampleSheet.csv' created on setup.") p.add_option('--barcode-analysis-dir',action="store", dest="barcode_analysis_dir",default=None, help="specify subdirectory where barcode analysis will " "be performed and outputs will be written") add_runner_option(p) add_debug_option(p) # Deprecated options deprecated = optparse.OptionGroup(p,'Deprecated/defunct options') deprecated.add_option('--nprocessors',action='store', dest='nprocessors',default=None,type='int', help="does nothing; kept for backwards " "compatibility only") deprecated.add_option('--truncate',action='store', dest='length',default=None,type='int', help="does nothing; kept for backwards " "compatibility only") p.add_option_group(deprecated)
def add_setup_command(cmdparser): """Create a parser for the 'setup' command """ p = cmdparser.add_command('setup',help="Set up a new analysis directory", usage="%prog setup [OPTIONS] DIR", description="Set up automatic processing of Illumina " "sequencing data from DIR.") p.add_option('--sample-sheet',action='store',dest='sample_sheet',default=None, help="Copy sample sheet file from name and location SAMPLE_SHEET " "(default is to look for SampleSheet.csv inside DIR)") p.add_option('--fastq-dir',action='store',dest='fastq_dir',default=None, help="Import fastq.gz files from FASTQ_DIR (which should be a " "subdirectory of DIR with the same structure as that produced " "by CASAVA/bcl2fastq i.e. 'Project_<name>/Sample_<name>/<fastq>')") p.add_option('--analysis-dir',action='store',dest='analysis_dir',default=None, help="Make new directory called ANALYSIS_DIR (otherwise default is " "'DIR_analysis')") add_debug_option(p)
def add_update_fastq_stats_command(cmdparser): """Create a parser for the 'update_fastq_stats' command """ p = cmdparser.add_command('update_fastq_stats',help="(Re)generate Fastq statistics", usage="%prog update_fastq_stats [OPTIONS] [ANALYSIS_DIR]", description="(Re)generate statistics for fastq " "files produced from 'make_fastqs'.") p.add_option('--unaligned-dir',action='store', dest='unaligned_dir',default='bcl2fastq', help="explicitly set the (sub)directory with bcl-to-fastq outputs") p.add_option('--stats-file',action='store', dest='stats_file',default=None, help="specify output file for fastq statistics") p.add_option('--per-lane-stats-file',action='store', dest='per_lane_stats_file',default=None, help="specify output file for per-lane statistics") add_nprocessors_option(p,__settings.fastq_stats.nprocessors) add_runner_option(p) add_debug_option(p)
def add_setup_analysis_dirs_command(cmdparser): """Create a parser for the 'setup_analysis_dirs' command """ p = cmdparser.add_command('setup_analysis_dirs',help="Create project subdirectories", usage="%prog setup_analysis_dirs [OPTIONS] [ANALYSIS_DIR]", description="Create analysis subdirectories for projects " "defined in projects.info file in ANALYSIS_DIR.") p.add_option('--ignore-missing-metadata',action='store_true', dest='ignore_missing_metadata',default=False, help="force creation of project directories even if metadata is not " "set (default is to fail if metadata is missing)") p.add_option('--short-fastq-names',action='store_true', dest='short_fastq_names',default=False, help="shorten fastq file names when copying or linking from project " "directory (default is to keep long names from bcl2fastq)") p.add_option('--link-to-fastqs',action='store_true', dest='link_to_fastqs',default=False, help="create symbolic links to original fastqs from project directory " "(default is to make hard links)") add_debug_option(p)
def add_config_command(cmdparser): """Create a parser for the 'config' command """ p = cmdparser.add_command('config',help="Query and change global configuration", usage="%prog config [OPTIONS] [ANALYSIS_DIR]", description="Query and change global configuration.") p.add_option('--init',action='store_true',dest='init',default=False, help="Create a new configuration file from the sample.") p.add_option('--set',action='append',dest='key_value',default=None, help="Set the value of a parameter. KEY_VALUE should be of the form " "'<param>=<value>'. Multiple --set options can be specified.") p.add_option('--add',action='append',dest='new_section',default=None, help="Add a new section called NEW_SECTION to the config. To add a " "new platform, use 'platform:NAME'. Multiple --add options can be " "specified.") add_debug_option(p) # Deprecated options deprecated = optparse.OptionGroup(p,'Deprecated/defunct options') deprecated.add_option('--show',action='store_true',dest='show',default=False, help="Show the values of parameters and settings (does " "nothing; use 'config' with no options to display settings)") p.add_option_group(deprecated)
def add_run_qc_command(cmdparser): """Create a parser for the 'run_qc' command """ p = cmdparser.add_command('run_qc',help="Run QC procedures", usage="%prog run_qc [OPTIONS] [ANALYSIS_DIR]", description="Run QC procedures for sequencing projects in " "ANALYSIS_DIR.") max_concurrent_jobs = __settings.general.max_concurrent_jobs fastq_screen_subset = 1000000 p.add_option('--projects',action='store', dest='project_pattern',default=None, help="simple wildcard-based pattern specifying a subset of projects " "and samples to run the QC on. PROJECT_PATTERN should be of the form " "'pname[/sname]', where 'pname' specifies a project (or set of " "projects) and 'sname' optionally specifies a sample (or set of " "samples).") p.add_option('--fastq_screen_subset',action='store',dest='subset', type='int',default=fastq_screen_subset, help="specify size of subset of total reads to use for " "fastq_screen (i.e. --subset option); (default %d, set to " "0 to use all reads)" % fastq_screen_subset) p.add_option('--ungzip-fastqs',action='store_true',dest='ungzip_fastqs', help="create decompressed copies of fastq.gz files") p.add_option('--max-jobs',action='store', dest='max_jobs',default=max_concurrent_jobs,type='int', help="explicitly specify maximum number of concurrent QC jobs to run " "(default %s, change in settings file)" % max_concurrent_jobs) add_runner_option(p) add_modulefiles_option(p) add_debug_option(p) # Deprecated options deprecated = optparse.OptionGroup(p,'Deprecated/defunct options') deprecated.add_option('--no-ungzip-fastqs',action='store_true',dest='no_ungzip_fastqs', help="don't create uncompressed copies of fastq.gz files " "(does nothing; this is now the default, use --ungzip-fastqs " "to turn on decompression)") p.add_option_group(deprecated)
def add_make_fastqs_command(cmdparser): """Create a parser for the 'make_fastqs' command """ p = cmdparser.add_command('make_fastqs',help="Run Fastq generation", usage="%prog make_fastqs [OPTIONS] [ANALYSIS_DIR]", description="Generate fastq files from raw bcl files " "produced by Illumina sequencer within ANALYSIS_DIR.") # General options add_no_save_option(p) add_modulefiles_option(p) add_debug_option(p) # Primary data management primary_data = optparse.OptionGroup(p,'Primary data management') primary_data.add_option('--only-fetch-primary-data',action='store_true', dest='only_fetch_primary_data',default=False, help="only fetch the primary data, don't perform any other " "operations") primary_data.add_option('--skip-rsync',action='store_true', dest='skip_rsync',default=False, help="don't rsync the primary data at the beginning of processing") primary_data.add_option('--remove-primary-data',action='store_true', dest='remove_primary_data',default=False, help="Delete the primary data at the end of processing (default " "is to keep data)") p.add_option_group(primary_data) # Options to control bcl2fastq bcl_to_fastq = optparse.OptionGroup(p,'Bcl-to-fastq options') bcl_to_fastq.add_option('--skip-bcl2fastq',action='store_true', dest='skip_bcl2fastq',default=False, help="don't run the Fastq generation step") bcl_to_fastq.add_option('--output-dir',action='store', dest='unaligned_dir',default=None, help="explicitly set the output (sub)directory for bcl-to-fastq " "conversion (overrides default)") bcl_to_fastq.add_option('--use-bases-mask',action="store", dest="bases_mask",default=None, help="explicitly set the bases-mask string to indicate how each " "cycle should be used in the bcl-to-fastq conversion (overrides " "default)") bcl_to_fastq.add_option('--sample-sheet',action="store", dest="sample_sheet",default=None, help="use an alternative sample sheet to the default " "'custom_SampleSheet.csv' created on setup.") bcl_to_fastq.add_option('--ignore-missing-bcl',action='store_true', dest='ignore_missing_bcl',default=False, help="use the --ignore-missing-bcl option for bcl2fastq (treat " "missing bcl files as no call)") bcl_to_fastq.add_option('--ignore-missing-stats',action='store_true', dest='ignore_missing_stats',default=False, help="use the --ignore-missing-stats option for bcl2fastq (fill " "in with zeroes when *.stats files are missing)") bcl_to_fastq.add_option('--require-bcl2fastq-version',action='store', dest='bcl2fastq_version',default=None, help="explicitly specify version of bcl2fastq " "software to use (e.g. '1.8.4' or '>=2.0').") # Use lane splitting # Determine defaults to report to user no_lane_splitting_platforms = [] use_lane_splitting_platforms = [] for platform in __settings.platform: if __settings.platform[platform].no_lane_splitting is not None: if __settings.platform[platform].no_lane_splitting: no_lane_splitting_platforms.append(platform) else: use_lane_splitting_platforms.append(platform) if __settings.bcl2fastq.no_lane_splitting: if use_lane_splitting_platforms: default_no_lane_splitting = \ "Used by default for all platforms except %s" % \ ', '.join(use_lane_splitting_platforms) default_use_lane_splitting = "Used by default for %s" % \ ', '.join(use_lane_splitting_platforms) else: default_no_lane_splitting = "Default for all platforms" default_use_lane_splitting = "" else: if no_lane_splitting_platforms: default_use_lane_splitting = \ "Used by default for all platforms except %s" % \ ', '.join(no_lane_splitting_platforms) default_no_lane_splitting = "Used by default for %s" % \ ', '.join(no_lane_splitting_platforms) else: default_no_lane_splitting = "" default_use_lane_splitting = "Default for all platforms" if default_use_lane_splitting: default_use_lane_splitting = ". "+default_use_lane_splitting if default_no_lane_splitting: default_no_lane_splitting = ". "+default_no_lane_splitting bcl_to_fastq.add_option('--no-lane-splitting',action='store_true', dest='no_lane_splitting',default=False, help="don't split the output FASTQ files by lane " "(bcl2fastq v2 only; turn off using " "--use-lane-splitting)%s" % default_no_lane_splitting) bcl_to_fastq.add_option('--use-lane-splitting',action='store_true', dest='use_lane_splitting',default=False, help="split the output FASTQ files by lane " "(bcl2fastq v2 only; turn off using " "--no-lane-splitting)%s" % default_use_lane_splitting) # Adapter trimming/masking options bcl_to_fastq.add_option('--minimum-trimmed-read-length',action="store", dest="minimum_trimmed_read_length",default=35, help="Minimum read length after adapter " "trimming. bcl2fastq trims the adapter from " "the read down to this value; if there is more " "adapter match below this length then those " "bases are masked not trimmed (i.e. replaced " "by N rather than removed) (default: 35)") bcl_to_fastq.add_option('--mask-short-adapter-reads',action="store", dest="mask_short_adapter_reads",default=22, help="minimum length of unmasked bases that " "a read can be after adapter trimming; reads " "with fewer ACGT bases will be completely " "masked with Ns (default: 22)") # Creation of empty fastqs bcl_to_fastq.add_option('--create-empty-fastqs',action='store_true', dest='create_empty_fastqs',default=False, help="create 'empty' FASTQ files which weren't " "generated by bcl2fastq because they didn't " "have any reads assigned at the demultiplexing " "stage (NB bcl2fastq must have finished without " "an error for this option to be applied)") # Number of processors default_nprocessors = [] for platform in __settings.platform: if __settings.platform[platform].nprocessors is not None: default_nprocessors.append("%s: %s" % (platform, __settings.platform[platform].nprocessors)) if default_nprocessors: default_nprocessors.append("other platforms: %s" % __settings.bcl2fastq.nprocessors) else: default_nprocessors.append("%s" % __settings.bcl2fastq.nprocessors) default_nprocessors = ', '.join(default_nprocessors) add_nprocessors_option(bcl_to_fastq,None, default_display=default_nprocessors) add_runner_option(bcl_to_fastq) p.add_option_group(bcl_to_fastq) # Statistics statistics = optparse.OptionGroup(p,'Statistics generation') statistics.add_option('--stats-file',action='store', dest='stats_file',default=None, help="specify output file for fastq statistics") statistics.add_option('--per-lane-stats-file',action='store', dest='per_lane_stats_file',default=None, help="specify output file for per-lane statistics") statistics.add_option('--no-stats',action='store_true', dest='no_stats',default=False, help="don't generate statistics file; use 'update_fastq_stats' " "command to (re)generate statistics") p.add_option_group(statistics) # Deprecated options deprecated = optparse.OptionGroup(p,'Deprecated/defunct options') deprecated.add_option('--keep-primary-data',action='store_true', dest='keep_primary_data',default=False, help="don't delete the primary data at the end of processing " "(does nothing; primary data is kept by default unless " "--remove-primary-data is specified)") deprecated.add_option('--generate-stats',action='store_true', dest='generate_stats',default=False, help="(re)generate statistics for fastq files (does nothing; " "statistics are generated by default unless suppressed by " "--no-stats)") deprecated.add_option('--report-barcodes',action='store_true', dest='report_barcodes',default=False, help="analyse and report barcode indices for all lanes after " "generating fastq files (deprecated: use the " "'analyse_barcodes' command instead)") deprecated.add_option('--barcodes-file',action='store', dest='barcodes_file',default=None, help="specify output file for barcode analysis report " "(deprecated: use the 'analyse_barcodes' command instead)") p.add_option_group(deprecated)
####################################################################### # Main program ####################################################################### if __name__ == "__main__": # Set up command line parser p = CommandParser(description="Utility for managing processed and analysed Illumina " "sequence data in ANALYSIS_DIR", version="%prog "+get_version()) # Add info command p.add_command('info',help="Get information about ANALYSIS_DIR", usage="%prog info [OPTIONS] ANALYSIS_DIR", description="Report information on processed Illumina " "sequence data in ANALYSIS_DIR.") add_debug_option(p.parser_for('info')) # Add copy command p.add_command('copy',help="Copy fastqs from ANALYSIS_DIR", usage="%prog copy [OPTIONS] ANALYSIS_DIR DEST_DIR", description="Copy fastqs from ANALYSIS_DIR to DEST_DIR.") p.parser_for('copy').add_option('--projects',action='store',dest='projects',default=None, help="Restrict copying to projects matching the " "supplied pattern") p.parser_for('copy').add_option('--fastq-dir',action='store',dest='fastq_dir',default=None, help="Only copy fastqs from the specified FASTQ_DIR") add_dry_run_option(p.parser_for('copy')) add_debug_option(p.parser_for('copy')) # Process the command line cmd,options,args = p.parse_args() if len(args) < 1: p.error("Need to supply a directory to examine")