def parse_args(args): """Parse the command line arguments. """ # Convert ~ to real path, and get input files if args['<in2.fastq>']: fin1 = os.path.expanduser(args['<in1.fastq>']) fin2 = os.path.expanduser(args['<in2.fastq>']) input_files = [fin1, fin2] else: fin = os.path.expanduser(args['<input.fastq>']) input_files = [fin] num_threads = int(args['--threads']) compress = args['--compress'] outdir = args_to_out_dir(args) phred = args['--phred'] if phred not in ('33', '64'): raise CannotContinueException( """Phred score {} is not supported.""".format(phred)) phred = int(phred) trimmomatic_args = args['--trimmomatic'].strip("'").strip('"').strip("'") return (num_threads, outdir, compress, phred, trimmomatic_args, input_files)
def parse_args(args): """Parse the command line arguments.""" debug_switch = args['--debug-switch'] dump_rg_db = args['--debug-dump-rg-db'] dump_loc_db = args['--debug-dump-loc-db'] # Convert ~ to real path input_file = os.path.expanduser(args['<alignment-file>']) # Which kit? kit = args['--kit'] if kit == KIT_BIOO: pass else: raise CannotContinueException( """Kit {} is not supported.""".format(kit)) # Figure out which function to use to write to output file. num_threads = args['--threads'] # Which store to use if args['--store'] not in (STORE_OPTION_LMDB, STORE_OPTION_MEMORY): raise CannotContinueException("""Store {} is not supported.""".format( args['--store'])) store = args['--store'] outdir = args_to_out_dir(args) return (kit, store, outdir, input_file, debug_switch, dump_rg_db, dump_loc_db)
def parse_args(args): """Parse the command line arguments.""" # Convert ~ to real path (strip silly leading './' too) if args['<in2.fastq>']: # PE mode in1 = os.path.expanduser(args['<in1.fastq>']) in2 = os.path.expanduser(args['<in2.fastq>']) input_files = (in1, in2) else: fin = os.path.expanduser(args['<input.fastq>']) input_files = (fin, ) # Figure out which function to use to write to output file. compress = args['--compress'] num_threads = int(args['--threads']) if num_threads > 1 and which('pigz') and compress: # return a partial for pigzwrite write_func = functools.partial(pigzwrite, num_threads) else: write_func = functools.partial(open, mode='w') outdir = args_to_out_dir(args) # optional trimlog opt_trimlog = args['-l'] if args['-l'] else None return (write_func, outdir, compress, opt_trimlog, input_files)
def parse_args(args): """Parse the command line arguments.""" debug_switch = args['--debug-switch'] dump_rg_db = args['--dump-rg-db'] dump_loc_db = args['--dump-loc-db'] dump_dup_group_db = args['--dump-dup-group-db'] dump_dup_db = args['--dump-dup-db'] dump_umi_error_db = args['--dump-umi-error-db'] random_seed = args['--random-seed'] write_dedupped_sam = not args['--no-write-dedupped-sam'] write_flagged_sam = args['--write-flagged-sam'] write_dup_only_sam = not args['--no-write-dup-sam'] write_dup_group_sam_like = not args['--no-write-dup-group-file'] write_sam_headers = not args['--no-write-sam-headers'] paired = False if args['--unpaired'] else True reject_umi_errors = not args['--keep-bad-umis'] correct_umis = args['--correct-umis'] build_read_and_loc_dbs = not args['--debug-no-build-read-and-loc-dbs'] # Convert ~ to real path input_file = os.path.expanduser(args['<alignment-file>']) # Which kit? kit = str(args['--kit']).lower() if kit == KIT_BIOO: write_umi_error_rejects = True else: write_umi_error_rejects = False if correct_umis and kit != KIT_BIOO: raise CannotContinueException( """Cannot correct UMIs when kit is not Bioo.""") if reject_umi_errors and correct_umis: raise CannotContinueException( "Doesn't make sense to reject and *also* correct erroneous UMIs!!" " If passing --correct, you must also pass --keep-bad-umis.") # Which store to use if args['--store'] == None: store = STORE_OPTION_MEMORY elif args['--store'] not in (STORE_OPTION_LMDB, STORE_OPTION_MEMORY): raise CannotContinueException("""Store {} is not supported.""".format( args['--store'])) else: store = args['--store'] outdir = args_to_out_dir(args) return (kit, store, outdir, input_file, paired, build_read_and_loc_dbs, reject_umi_errors, correct_umis, write_dedupped_sam, write_flagged_sam, write_dup_only_sam, write_dup_group_sam_like, write_umi_error_rejects, write_sam_headers, random_seed, debug_switch, dump_rg_db, dump_loc_db, dump_dup_group_db, dump_dup_db, dump_umi_error_db)
def parse_args(args): """Parse the command line arguments. """ umi_reads_file = os.path.expanduser(args['<in.umi.fq>']) barcode_reads_file = os.path.expanduser(args['<in.barcode.fq>']) barcode_list_file = os.path.expanduser(args['<barcode_file>']) delete_temp_files_upon_failure = not args['--no-delete-tmp-files'] write_rejects_files = not args['--no-write-rejects'] min_umi_qual = int(args['--umi-min-qual']) umi_qf_win_size = int(args['--umi-qf-win-size']) # phred if args['--phred'] not in ('33', '64'): raise ArgumentException("ERR212: --phred can be only '33' or '64'.") phred = int(args['--phred']) # Paired or single end? if args['<in.R2.fq>']: # mode = PE paired = True # Convert ~ to real path args['<in.R1.fq>'] = os.path.expanduser(args['<in.R1.fq>']) args['<in.R2.fq>'] = os.path.expanduser(args['<in.R2.fq>']) reads_files = [args['<in.R1.fq>'], args['<in.R2.fq>']] else: # Convert ~ to real path args['<in.fq>'] = os.path.expanduser(args['<in.fq>']) # mode = SR paired = False reads_files = [args['<in.fq>']] fp_write = functools.partial(open, mode='w') # Return an appropriate function pointer for annotation. if paired: fp_split_qf_umi_anno_raw = split_qf_umi_anno_raw_pe else: fp_split_qf_umi_anno_raw = split_qf_umi_anno_raw_sr outdir = args_to_out_dir(args) return (fp_split_qf_umi_anno_raw, fp_write, outdir, phred, min_umi_qual, umi_qf_win_size, write_rejects_files, delete_temp_files_upon_failure, barcode_list_file, barcode_reads_file, umi_reads_file, reads_files)
def parse_args(args): """Parse the command line arguments.""" adapters = [] # Convert ~ to real path if args['<in2.fastq>']: in1 = os.path.expanduser(args['<in1.fastq>']) in2 = os.path.expanduser(args['<in2.fastq>']) input_files = [in1, in2] if not args['--adapter1'] or not args['--adapter2']: raise ArgumentException("""Error: --adapter1 and --adapter2 are required if running in paired-end mode (i.e. when you give two FASTQ files). """) adapters = [args['--adapter1'], args['--adapter2']] else: input_files = [os.path.expanduser(args['<input.fastq>'])] adapters = [args['--adapter1']] # Uncomment if we ever remove the default from the --apapter arguments # if args['--adapter2']: # raise ArgumentException( # """Error: # --adapter2 was given (suggesting you wanted paired-end # adapter removal), but only one FASTQ file was given. # """) outdir = args_to_out_dir(args) cutadapt_args = args['--cutadapt'].strip("'").strip('"').strip("'") # compress output? compress = args['--compress'] return (compress, outdir, cutadapt_args, adapters, input_files)
def parse_args(args): """Parse the command line arguments. """ # Paired or single end? FASTQ or BAM? if args['<in2.fastq>']: # mode = FASTQ, PE paired = True filetype = 'FASTQ' # Convert ~ to real path args['<in1.fastq>'] = os.path.expanduser(args['<in1.fastq>']) args['<in2.fastq>'] = os.path.expanduser(args['<in2.fastq>']) input_files = [args['<in1.fastq>'], args['<in2.fastq>']] else: # Convert ~ to real path args['<input.fastq>'] = os.path.expanduser(args['<input.fastq>']) # Note: If you write the following for docopt: # dupliganger remove-umi [options] <input.fastq> # dupliganger remove-umi [options] <input.bam> # then it will always populate <input.fastq> and never populate # <input.bam>, hence the somewhat confusing names going on down # below... if is_bam(args['<input.fastq>']): # It is a bam, so hack docopt a bit args['<input.bam>'] = args['<input.fastq>'] args['<input.fastq>'] = None paired = True if is_paired_bam(args['<input.bam>']) else False filetype = 'BAM' input_files = [args['<input.bam>']] else: # mode = FASTQ, SR paired = False filetype = 'FASTQ' input_files = [args['<input.fastq>']] # What fp_extract_umi to use? kit = args['--kit'] if kit == KIT_BIOO: if paired: fp_extract_umi = extract_paired_umis_bioo else: fp_extract_umi = extract_single_umi_bioo else: raise CannotContinueException( """Kit {} is not supported.""".format(kit)) # Figure out which function to use to write to output file. num_threads = int(args['--threads']) compress = args['--compress'] if num_threads > 1 and which('pigz') and compress: # return a partial for pigzwrite # TODO: Maybe alter this to num_threads/2 if --paired-end? fp_write = functools.partial(pigzwrite, num_threads) elif which('gzip') and compress: # return a partial for gzwrite fp_write = gzwrite else: fp_write = functools.partial(open, mode='w') # Return an appropriate function pointer for annotation. if filetype == 'FASTQ': if paired: fp_anno = create_annotated_files_from_fastq else: fp_anno = create_annotated_file_from_fastq elif filetype == 'BAM': if paired: fp_anno = create_annotated_files_from_bam else: fp_anno = create_annotated_file_from_bam else: raise ControlFlowException("""ERR213: Not possible to be here.""") outdir = args_to_out_dir(args) if args['--force-paired'] and not paired: sys.stderr.write( "WARNING: Passed --force-paired but {} appears not to be " "paired-end.\n") force_paired = args['--force-paired'] return (fp_extract_umi, fp_anno, fp_write, outdir, compress, force_paired, input_files)