def main(): info(' '.join(sys.argv)) info() parser = OptionParser( usage='Usage: ' + basename(__file__) + ' --bed BED_file --bam BAM_file -g hg19 -o Output_BEDGRAPH_file ' '--work-dir work_directory --chr chromosome') parser.add_option('-o', dest='output_dir') parser.add_option('--samples', dest='sample_names') parser.add_option('--bams', dest='bams') parser.add_option('--vcf', dest='vcf_fpath') parser.add_option('--chr', dest='chrom') parser.add_option('--bed', dest='bed', help='BED file.') parser.add_option('-g', '--genome', dest='chr_len_fpath', help='File with chromosomes lengths.') parser.add_option('--work-dir', dest='work_dir', help='Work directory.') (opts, args) = parser.parse_args(sys.argv[1:]) cnf = Config(opts.__dict__, determine_sys_cnf(opts), {}) samples = [ BaseSample(sample_name, None, bam=bam) for (sample_name, bam) in zip(cnf.sample_names.split(','), cnf.bams.split(',')) ] if not cnf.output_dir or not cnf.bams: critical(parser.usage) safe_mkdir(cnf.output_dir) safe_mkdir(cnf.work_dir) get_regions_coverage(cnf, samples) info('Done.')
def main(): info(' '.join(sys.argv)) info() parser = OptionParser(usage='Usage: ' + basename(__file__) + ' --chr chr --vcf VCF_file --samples Sample1,Sample2 ' '--bams BAM_file1,BAM_file2 -o Output_directory ' '--features BED_file') add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('-o', dest='output_dir') parser.add_option('--samples', dest='sample_names') parser.add_option('--bams', dest='bams') parser.add_option('--vcf', dest='vcf_fpath') parser.add_option('--chr', dest='chrom') parser.add_option('--features', dest='features', help='BED file with real CDS/Exon/Gene/Transcript regions with ' 'annotations (default "features" is in system_config)') (opts, args) = parser.parse_args(sys.argv[1:]) cnf = Config(opts.__dict__, determine_sys_cnf(opts), {}) cnf.verbose = False if not cnf.output_dir or not cnf.vcf_fpath or not cnf.chrom: critical(parser.usage) cnf.features = cnf.features or cnf.genome.features samples = [BaseSample(sample_name, None, bam=bam) for (sample_name, bam) in zip(cnf.sample_names.split(','), cnf.bams.split(','))] split_bams(cnf, samples, cnf.vcf_fpath) info('Done.')
def proc_args(argv): info(' '.join(sys.argv)) info() description = 'This script generates target QC reports for each BAM provided as an input. ' \ 'Usage: ' + basename(__file__) + ' sample2bam.tsv --bed target.bed --contols sample1:sample2 -o results_dir' parser = OptionParser(description=description, usage=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'seq2c')) parser.add_option('--bed', dest='bed', help='BED file to run Seq2C analysis') parser.add_option('-c', '--controls', dest='controls', help='Optional control sample names for Seq2C. For multiple controls, separate them using :') parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.') parser.add_option('--no-prep-bed', dest='prep_bed', help=SUPPRESS_HELP, action='store_false', default=True) (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) == 0: parser.print_usage() sys.exit(1) if len(args) == 1 and not args[0].endswith('.bam'): sample_names, bam_fpaths = read_samples(verify_file(args[0], is_critical=True, description='Input sample2bam.tsv')) bam_by_sample = OrderedDict() for s, b in zip(sample_names, bam_fpaths): bam_by_sample[s] = b else: bam_by_sample = find_bams(args) run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed')) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) check_genome_resources(cnf) cnf.output_dir = adjust_path(cnf.output_dir) verify_dir(dirname(cnf.output_dir), is_critical=True) safe_mkdir(cnf.output_dir) if not cnf.project_name: cnf.project_name = basename(cnf.output_dir) info('Project name: ' + cnf.project_name) cnf.proc_name = 'Seq2C' set_up_dirs(cnf) samples = [ source.TargQC_Sample(name=s_name, dirpath=join(cnf.output_dir, s_name), bam=bam_fpath) for s_name, bam_fpath in bam_by_sample.items()] info('Samples: ') for s in samples: info(' ' + s.name) samples.sort(key=lambda _s: _s.key_to_sort()) target_bed = verify_bed(cnf.bed, is_critical=True) if cnf.bed else None if not cnf.only_summary: cnf.qsub_runner = adjust_system_path(cnf.qsub_runner) if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.') verify_file(cnf.qsub_runner, is_critical=True) return cnf, samples, target_bed, cnf.output_dir
def main(): info(' '.join(sys.argv)) info() description = 'This script runs preprocessing.' parser = OptionParser(description=description) parser.add_option('-1', dest='left_reads_fpath', help='Left reads fpath') parser.add_option('-2', dest='right_reads_fpath', help='Right reads fpath') parser.add_option('--sample', dest='sample_name', help='Sample name') parser.add_option('-o', dest='output_dir', help='Output directory path') parser.add_option( '--downsample-to', dest='downsample_to', default=None, type='int', help= 'Downsample reads to avoid excessive processing times with large files. ' 'Default is 1 million. Set to 0 to turn off downsampling.') add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) (opts, args) = parser.parse_args() logger.is_debug = opts.debug cnf = Config(opts.__dict__, determine_sys_cnf(opts), determine_run_cnf(opts)) left_reads_fpath = verify_file(opts.left_reads_fpath, is_critical=True) right_reads_fpath = verify_file(opts.right_reads_fpath, is_critical=True) output_dirpath = adjust_path( opts.output_dir) if opts.output_dir else critical( 'Please, specify output directory with -o') verify_dir(dirname(output_dirpath), description='output_dir', is_critical=True) with workdir(cnf): sample_name = cnf.sample_name if not sample_name: sample_name = _get_sample_name(left_reads_fpath, right_reads_fpath) results_dirpath = run_fastq(cnf, sample_name, left_reads_fpath, right_reads_fpath, output_dirpath, downsample_to=cnf.downsample_to) verify_dir(results_dirpath, is_critical=True) info() info('*' * 70) info('Fastqc results:') info(' ' + results_dirpath)
def get_args(): description = ( 'Plots a Circos plot given vardict variant file (with all dbSNP SNPs, not the PASS one), ' 'Seq2C CNV calls and Manta SVs.') parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) parser.add_option('--bed', dest='bed_fpath', help='Path to BED file') parser.add_option('-v', '--mutations', dest='mutations_fpath', help='Path to VarDict.txt file') parser.add_option('-c', '--seq2c', dest='seq2c_tsv_fpath', help='Path to seq2c copy number file') parser.add_option('--sv', dest='sv_fpath', help='Path to Manta SV call vcf.gz file') parser.add_option('-s', '--sample', dest='sample', help='Identifier of sample in VarDict and Seq2c files') parser.add_option('-o', '--output-dir', dest='output_dir', default="./", help='Output directory. Defaults to ./') (opts, args) = parser.parse_args() run_cnf = determine_run_cnf(opts) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) return cnf
def main(): parser = OptionParser(usage='Usage: ' + basename(__file__) + ' -o Output_BED_file -g hg19 Input_BED_file') parser.add_option('-o', '--output-bed', dest='output_fpath') parser.add_option('-g', '--genome', dest='genome') (opts, args) = parser.parse_args(sys.argv[1:]) if len(args) < 1: parser.print_help(file=sys.stderr) sys.exit(1) cnf = Config(opts.__dict__, determine_sys_cnf(opts), {}) check_genome_resources(cnf) if not cnf.output_fpath: critical(parser.usage) sort_bed(cnf, verify_bed(args[0], is_critical=True), adjust_path(cnf.output_fpath))
def _read_args(args_list): options = [ # (['-k', '--key-genes'], dict( # dest='key_genes_fpath', # help='list of key genes (they are at top priority when choosing one of multiple annotations)', # default='/ngs/reference_data/genomes/Hsapiens/common/az_key_genes.300.txt') # ), # (['-a', '--approved-genes'], dict( # dest='approved_genes_fpath', # help='list of HGNC approved genes (they are preferable when choosing one of multiple annotations)', # default='/ngs/reference_data/genomes/Hsapiens/common/HGNC_gene_synonyms.txt') # ), # (['-e', '--ensembl-bed'], dict( # dest='ensembl_bed_fpath', # help='reference BED file for annotation (Ensembl)') # ), # (['-r', '--refseq-bed'], dict( # dest='refseq_bed_fpath', # help='reference BED file for annotation (RefSeq)') # ), # (['-b', '--bedtools'], dict( # dest='bedtools', # help='path to bedtools', # default='bedtools') # ), (['-o', '--output-bed'], dict(dest='output_fpath')), (['--debug'], dict( dest='debug', help= 'run in a debug more (verbose output, keeping of temporary files)', default=False, action='store_true')), (['--output-hg'], dict( dest='output_hg', help= 'output chromosome names in hg-style (chrM, chr1, .., chr22, chrX, chrY)', default=False, action='store_true')), (['--output-grch'], dict( dest='output_grch', help='output chromosome names in GRCh-style (1, .., 22, X, Y, MT)', default=False, action='store_true')), (['-g', '--genome'], dict(dest='genome', default='hg19')), ] parser = OptionParser( usage='usage: %prog [options] Input_BED_file -o Standardized_BED_file', description='Scripts outputs a standardized version of input BED file. ' 'Standardized BED: 1) has 4 or 8 fields (for BEDs with primer info);' ' 2) has HGNC approved symbol in forth column if annotation is ' 'possible and not_a_gene_X otherwise;' ' 3) is sorted based on chromosome name -> start -> end;' ' 4) has no duplicated regions (regions with the same chromosome, start and end), ' 'the only exception is _CONTROL_ regions.') for args, kwargs in options: parser.add_option(*args, **kwargs) (opts, args) = parser.parse_args(args_list) if len(args) != 1: parser.print_help(file=sys.stderr) sys.exit(1) cnf = Config(opts.__dict__, determine_sys_cnf(opts), {}) work_dirpath = tempfile.mkdtemp() info('Creating a temporary working directory ' + work_dirpath) if not exists(work_dirpath): os.mkdir(work_dirpath) input_bed_fpath = abspath(args[0]) info('Input: ' + input_bed_fpath) output_bed_fpath = adjust_path(cnf.output_fpath) info('Writing to: ' + output_bed_fpath) # process configuration # for k, v in opts.__dict__.iteritems(): # if k.endswith('fpath') and verify_file(v, is_critical=True): # opts.__dict__[k] = verify_file(v, k) if cnf.output_grch and cnf.output_hg: info( 'you cannot specify --output-hg and --output-grch simultaneously!') # if not which(opts.bedtools): # info('bedtools executable not found, please specify correct path (current is %s)! ' # 'Did you forget to execute "module load bedtools"?' % opts.bedtools) # if opts.debug: # info('Configuration: ') # for k, v in opts.__dict__.iteritems(): # info('\t' + k + ': ' + str(v)) info() # opts.ensembl_bed_fpath = verify_file(opts.ensembl_bed_fpath or \ # ('/ngs/reference_data/genomes/Hsapiens/' + opts.genome + '/bed/Exons/Exons.with_genes.bed')) # opts.refseq_bed_fpath = verify_file(opts.refseq_bed_fpath or \ # ('/ngs/reference_data/genomes/Hsapiens/' + opts.genome + '/bed/Exons/RefSeq/RefSeq_CDS_miRNA.all_features.bed')) return input_bed_fpath, output_bed_fpath, work_dirpath, cnf
def proc_opts(): parser = OptionParser() add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--expose-only', dest='expose_to_ngs_server_only', action='store_true', default=False, help='Only add project to the webserver') parser.add_option('--no-expose', dest='expose', action='store_false', default=True, help='Do not expose the reports') parser.add_option('-o', dest='output_dir') parser.add_option('--bed', dest='bed', help='BED file to run targetSeq and Seq2C analysis on.') parser.add_option('--downsample-to', dest='downsample_to', type='int') (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) < 1: critical('Usage: ' + __file__ + ' *.fq.gz -o output_dir') # if len(args) < 2: # info('No dataset path specified, assuming it is the current working directory') # dataset_dirpath = adjust_path(os.getcwd()) # jira_url = args[0] fastq_fpaths = [verify_file(fpath) for fpath in args] fastq_fpaths = [fpath for fpath in fastq_fpaths if fpath] info(str(len(fastq_fpaths)) + ' fastq files') run_cnf = determine_run_cnf(opts) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) cnf.output_dir = adjust_path(cnf.output_dir) info('Writing to ' + str(cnf.output_dir)) cnf.project_name = cnf.project_name or 'preproc' if cnf.work_dir: cnf.debug = True else: all_work_dir = join(cnf.output_dir, 'work') safe_mkdir(all_work_dir) latest_fpath = join(all_work_dir, 'latest') if cnf.reuse_intermediate: cnf.work_dir = latest_fpath else: cnf.work_dir = join( all_work_dir, datetime.datetime.now().strftime("%Y-%b-%d_%H-%M")) if islink(latest_fpath): os.remove(latest_fpath) if isdir(latest_fpath): shutil.rmtree(latest_fpath) if not exists(latest_fpath): os.symlink(basename(cnf.work_dir), latest_fpath) cnf.work_dir = adjust_path(cnf.work_dir) safe_mkdir(cnf.work_dir) cnf.log_dir = join(cnf.work_dir, 'log') safe_mkdir(cnf.log_dir) set_up_log(cnf) try: subprocess.call(['chmod', '-R', 'g+w', cnf.work_dir]) except OSError: err(traceback.format_exc()) pass if cnf.samplesheet: cnf.samplesheet = verify_file(cnf.samplesheet, is_critical=True) info(' '.join(sys.argv)) info() info('Created a temporary working directory: ' + cnf.work_dir) if cnf.project_name: info('Project name: ' + cnf.project_name) if cnf.samplesheet: info('Using custom sample sheet ' + cnf.samplesheet) check_genome_resources(cnf) check_system_resources(cnf, optional=['fastq']) return cnf, cnf.output_dir, fastq_fpaths
def _read_args(args_list): options = [ # (['-k', '--key-genes'], dict( # dest='key_genes_fpath', # help='list of key genes (they are at top priority when choosing one of multiple annotations)', # default='/ngs/reference_data/genomes/Hsapiens/common/az_key_genes.300.txt') # ), # (['-a', '--approved-genes'], dict( # dest='approved_genes_fpath', # help='list of HGNC approved genes (they are preferable when choosing one of multiple annotations)', # default='/ngs/reference_data/genomes/Hsapiens/common/HGNC_gene_synonyms.txt') # ), # (['-e', '--ensembl-bed'], dict( # dest='ensembl_bed_fpath', # help='reference BED file for annotation (Ensembl)') # ), # (['-r', '--refseq-bed'], dict( # dest='refseq_bed_fpath', # help='reference BED file for annotation (RefSeq)') # ), # (['-b', '--bedtools'], dict( # dest='bedtools', # help='path to bedtools', # default='bedtools') # ), (['-o', '--output-bed'], dict( dest='output_fpath') ), (['--debug'], dict( dest='debug', help='run in a debug more (verbose output, keeping of temporary files)', default=False, action='store_true') ), (['--output-hg'], dict( dest='output_hg', help='output chromosome names in hg-style (chrM, chr1, .., chr22, chrX, chrY)', default=False, action='store_true') ), (['--output-grch'], dict( dest='output_grch', help='output chromosome names in GRCh-style (1, .., 22, X, Y, MT)', default=False, action='store_true') ), (['-g', '--genome'], dict( dest='genome', default='hg19') ), ] parser = OptionParser(usage='usage: %prog [options] Input_BED_file -o Standardized_BED_file', description='Scripts outputs a standardized version of input BED file. ' 'Standardized BED: 1) has 4 or 8 fields (for BEDs with primer info);' ' 2) has HGNC approved symbol in forth column if annotation is ' 'possible and not_a_gene_X otherwise;' ' 3) is sorted based on chromosome name -> start -> end;' ' 4) has no duplicated regions (regions with the same chromosome, start and end), ' 'the only exception is _CONTROL_ regions.') for args, kwargs in options: parser.add_option(*args, **kwargs) (opts, args) = parser.parse_args(args_list) if len(args) != 1: parser.print_help(file=sys.stderr) sys.exit(1) cnf = Config(opts.__dict__, determine_sys_cnf(opts), {}) work_dirpath = tempfile.mkdtemp() info('Creating a temporary working directory ' + work_dirpath) if not exists(work_dirpath): os.mkdir(work_dirpath) input_bed_fpath = abspath(args[0]) info('Input: ' + input_bed_fpath) output_bed_fpath = adjust_path(cnf.output_fpath) info('Writing to: ' + output_bed_fpath) # process configuration # for k, v in opts.__dict__.items(): # if k.endswith('fpath') and verify_file(v, is_critical=True): # opts.__dict__[k] = verify_file(v, k) if cnf.output_grch and cnf.output_hg: info('you cannot specify --output-hg and --output-grch simultaneously!') # if not which(opts.bedtools): # info('bedtools executable not found, please specify correct path (current is %s)! ' # 'Did you forget to execute "module load bedtools"?' % opts.bedtools) # if opts.debug: # info('Configuration: ') # for k, v in opts.__dict__.items(): # info('\t' + k + ': ' + str(v)) info() # opts.ensembl_bed_fpath = verify_file(opts.ensembl_bed_fpath or \ # ('/ngs/reference_data/genomes/Hsapiens/' + opts.genome + '/bed/Exons/Exons.with_genes.bed')) # opts.refseq_bed_fpath = verify_file(opts.refseq_bed_fpath or \ # ('/ngs/reference_data/genomes/Hsapiens/' + opts.genome + '/bed/Exons/RefSeq/RefSeq_CDS_miRNA.all_features.bed')) return input_bed_fpath, output_bed_fpath, work_dirpath, cnf
def get_args(): info(' '.join(sys.argv)) info() description = ( 'The program will filter the VarDict output after vcf2txt.pl to ' 'candidate interpretable mutations, somatic or germline.') parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) parser.add_option('-o', dest='output_file') parser.add_option('--o-all-transcripts', dest='all_transcripts_output_file') parser.add_option('--o-fm', dest='fm_output_file') parser.add_option('--o-reject', dest='rejected_output_file') parser.add_option('--cohort-freqs', dest='cohort_freqs_fpath') parser.add_option('--transcripts', dest='transcripts_fpath') parser.add_option('-D', '--min-depth', dest='filt_depth', type='int', help='The minimum total depth') parser.add_option('-V', '--min-vd', dest='min_vd', type='int', help='The minimum reads supporting variant') parser.add_option( '--gmaf', dest='min_gmaf', type='float', help= 'When the GMAF is greater than specified, it\'s considered common SNP and filtered out.' ) parser.add_option( '-f', '--min-freq', dest='min_freq', type='float', help='The minimum allele frequency for regular variants.') parser.add_option( '-F', '--min-freq-hs', '--act-min-freq', dest='act_min_freq', type='float', help= 'The minimum allele frequency hotspot somatic mutations, typically lower then -f. ' 'Default: 0.01 or half -f, whichever is less') parser.add_option( '-N', '--keep-utr-intronic', dest='keep_utr_intronic', action='store_true', help= 'Keep all intronic and UTR in the output, but will be set as "unknown".' ) parser.add_option( '-p', '--platform', dest='platform', help= 'The platform, such as WXS, WGS, RNA-Seq, VALIDATION, etc. No Default. ' 'Used for output in FM\'s format') parser.set_usage('Usage: ' + __file__ + ' vcf2txt_res_fpath [opts] -o output_fpath') (opts, args) = parser.parse_args() if len(args) < 1: critical('Provide the first argument - output from vcf2txt.pl') logger.is_debug = opts.debug vcf2txt_res_fpath = verify_file(args[0], is_critical=True) run_cnf = determine_run_cnf(opts) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) if not cnf.genome: critical('Please, specify the --genome option (e.g. --genome hg19)') check_genome_resources(cnf) if not cnf.output_file: critical('Please, specify the output fpath with -o') info() return cnf, vcf2txt_res_fpath
def main(): info(' '.join(sys.argv)) info() description = 'This script generates target QC reports for each BAM provided as an input.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) parser.add_option('--work-dir', dest='work_dir', metavar='DIR') parser.add_option('--log-dir', dest='log_dir') parser.add_option('--only-summary', dest='only_summary', action='store_true') parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'targetqc')) parser.add_option('--reannotate', dest='reannotate', action='store_true', default=False, help='re-annotate BED file with gene names') parser.add_option('--dedup', dest='dedup', action='store_true', default=False, help='count duplicates in coverage metrics') parser.add_option('--bed', dest='bed', help='BED file to run targetSeq and Seq2C analysis on.') parser.add_option( '--exons', '--exome', '--features', dest='features', help= 'Annotated CDS/Exon/Gene/Transcripts BED file to make targetSeq exon/amplicon regions reports.' ) (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) == 0: critical('No BAMs provided to input.') bam_fpaths = list(set([abspath(a) for a in args])) bad_bam_fpaths = [] for fpath in bam_fpaths: if not verify_bam(fpath): bad_bam_fpaths.append(fpath) if bad_bam_fpaths: critical('BAM files cannot be found, empty or not BAMs:' + ', '.join(bad_bam_fpaths)) run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed')) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) if not cnf.project_name: cnf.project_name = basename(cnf.output_dir) info('Project name: ' + cnf.project_name) cnf.proc_name = 'TargQC' set_up_dirs(cnf) # cnf.name = 'TargQC_' + cnf.project_name check_genome_resources(cnf) verify_bed(cnf.bed, is_critical=True) bed_fpath = adjust_path(cnf.bed) info('Using amplicons/capture panel ' + bed_fpath) features_bed_fpath = adjust_path( cnf.features) if cnf.features else adjust_path(cnf.genome.features) info('Features: ' + features_bed_fpath) genes_fpath = None if cnf.genes: genes_fpath = adjust_path(cnf.genes) info('Custom genes list: ' + genes_fpath) if not cnf.only_summary: cnf.qsub_runner = adjust_system_path(cnf.qsub_runner) if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.') verify_file(cnf.qsub_runner, is_critical=True) info('*' * 70) info() targqc_html_fpath = run_targqc(cnf, cnf.output_dir, bam_fpaths, bed_fpath, features_bed_fpath, genes_fpath) if targqc_html_fpath: send_email( cnf, 'TargQC report for ' + cnf.project_name + ':\n ' + targqc_html_fpath)
def read_opts_and_cnfs(extra_opts, key_for_sample_name=None, required_keys=list(), file_keys=list(), dir_keys=list(), description='', extra_msg=None, proc_name=None, fpath_for_sample_name=None, main_output_is_file=False, main_output_is_dir=True): options = extra_opts if main_output_is_file: options += [(['-o', '--output-file'], dict(dest='output_file', metavar='FILE', help='Output file'))] options += [( ['--output-dir'], dict( dest='output_dir', metavar='DIR', help= 'Output directory (or directory name in case of bcbio final dir)', default=os.getcwd()))] elif main_output_is_dir: options += [( ['-o', '--output-dir'], dict( dest='output_dir', metavar='DIR', help= 'Output directory (or directory name in case of bcbio final dir)', default=os.getcwd()))] options += [(['--output-file'], dict(dest='output_file', metavar='FILE', help='Output file'))] options += [ (['-s', '--sample', '--name'], dict( dest='sample', metavar='NAME', help= 'Sample name (default is part of name of the first parameter prior to the first - or .' )), (['-c', '--caller'], dict( dest='caller', metavar='CALLER_NAME', help= 'Variant caller name (default is part of name of the first parameter between the first - and following .' )), (['-t', '--nt', '--threads'], dict(dest='threads', type='int', help='Number of threads')), ( ['--clean'], dict( # do not keep work directory dest='keep_intermediate', action='store_false', help=SUPPRESS_HELP)), (['--debug'], dict(dest='debug', action='store_true', default=False, help=SUPPRESS_HELP)), (['--reuse'], dict( dest='reuse_intermediate', help= 'reuse intermediate non-empty files in the work dir from previous run', action='store_true')), (['--sys-cnf'], dict( dest='sys_cnf', metavar='SYS_CNF.yaml', help= 'System configuration file with paths to external tools and genome resources. The default is ' '(see default one %s)' % defaults['sys_cnf'])), (['--run-cnf'], dict( dest='run_cnf', metavar='RUN_CNF.yaml', default=defaults['run_cnf_exome_seq'], help= 'Customised run details: list of annotations/QC metrics/databases/filtering criteria. ' 'The default is %s' % defaults['run_cnf_exome_seq'])), (['--transcripts'], dict(dest='transcripts_fpath')), (['--work-dir'], dict(dest='work_dir', metavar='DIR', help=SUPPRESS_HELP)), (['--log-dir'], dict(dest='log_dir', metavar='DIR', help=SUPPRESS_HELP)), (['--proc-name'], dict(dest='proc_name', help=SUPPRESS_HELP)), (['--project-name'], dict(dest='project_name')), (['--no-check'], dict(dest='no_check', action='store_true', help=SUPPRESS_HELP)), (['-g', '--genome'], dict(dest='genome')), (['--email'], dict(dest='email', help=SUPPRESS_HELP)), (['--done-marker'], dict(dest='done_marker', help=SUPPRESS_HELP)), ] parser = OptionParser(description=description) for args, kwargs in options: parser.add_option(*args, **kwargs) req_keys_usage = '' if required_keys: req_keys_usage = '\nRequired options:' for args, kwargs in options: try: if kwargs['dest'] in required_keys: req_keys_usage += '\n ' + '/'.join(args) except: err(format_exc()) pass parser.set_usage(parser.get_usage() + req_keys_usage) (opts, args) = parser.parse_args() logger.is_debug = opts.debug run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed')) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) errors = check_keys_presence(cnf, required_keys) if errors: parser.print_help() critical(errors) file_keys = [k for k in file_keys if k in required_keys] dir_keys = [k for k in dir_keys if k in required_keys] errors = check_dirs_and_files(cnf, file_keys, dir_keys) if errors: critical(errors) if cnf.sample: cnf.sample = remove_quotes(cnf.sample) else: if not fpath_for_sample_name: if not key_for_sample_name: critical('Error: --sample must be provided in options.') fpath_for_sample_name = cnf[key_for_sample_name] if not fpath_for_sample_name: critical('Error: --sample or ' + (str(key_for_sample_name)) + ' must be provided in options.') key_fname = basename(cnf[key_for_sample_name]) cnf.sample = key_fname.split('.')[0] if cnf.caller: cnf.caller = remove_quotes(cnf.caller) # elif key_for_sample_name and cnf[key_for_sample_name]: # key_fname = basename(cnf[key_for_sample_name]) # try: # cnf.caller = cnf.caller or key_fname.split('.')[0].split('-')[1] # except: # cnf.caller = '' else: cnf.caller = None cnf.proc_name = cnf.proc_name or proc_name set_up_dirs(cnf) info(' '.join(sys.argv)) info() return cnf
def proc_args(argv): info(' '.join(sys.argv)) info() description = 'This script generates target QC reports for each BAM provided as an input.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--log-dir', dest='log_dir') parser.add_option('--is-wgs', dest='is_wgs', action='store_true', default=False, help='whole genome sequencing') parser.add_option('--is-deep-seq', dest='is_deep_seq', action='store_true', default=False, help='deep targeted sequencing') parser.add_option('--only-summary', dest='only_summary', action='store_true') parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'targetqc')) parser.add_option('-c', '--caller', dest='caller') parser.add_option('--qc', dest='qc', action='store_true', default=False) parser.add_option('--no-qc', dest='qc', action='store_false', default=False) parser.add_option('--qc-caption', dest='qc_caption', help=SUPPRESS_HELP) parser.add_option('--no-tsv', dest='tsv', action='store_false', default=True, help=SUPPRESS_HELP) (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) == 0: critical('No vcf files provided to input.') run_cnf = determine_run_cnf(opts, is_targetseq=opts.is_deep_seq, is_wgs=opts.is_wgs) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) vcf_fpath_by_sample = read_samples(args, cnf.caller) info() if not cnf.project_name: cnf.project_name = basename(cnf.output_dir) info('Project name: ' + cnf.project_name) cnf.proc_name = 'Variants' set_up_dirs(cnf) # cnf.name = 'TargQC_' + cnf.project_name info(' '.join(sys.argv)) samples = [ source.VarSample(s_name, join(cnf.output_dir, s_name), vcf=vcf_fpath) for s_name, vcf_fpath in vcf_fpath_by_sample.items() ] samples.sort(key=lambda _s: _s.key_to_sort()) check_genome_resources(cnf) if not cnf.only_summary: cnf.qsub_runner = adjust_system_path(cnf.qsub_runner) if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.') verify_file(cnf.qsub_runner, is_critical=True) return cnf, samples