Exemplo n.º 1
0
def parse_args(args):
    """Parse the command line arguments.
    """

    # Convert ~ to real path, and get input files
    if args['<in2.fastq>']:
        fin1 = os.path.expanduser(args['<in1.fastq>'])
        fin2 = os.path.expanduser(args['<in2.fastq>'])
        input_files = [fin1, fin2]
    else:
        fin = os.path.expanduser(args['<input.fastq>'])
        input_files = [fin]

    num_threads = int(args['--threads'])
    compress = args['--compress']
    outdir = args_to_out_dir(args)

    phred = args['--phred']
    if phred not in ('33', '64'):
        raise CannotContinueException(
            """Phred score {} is not supported.""".format(phred))
    phred = int(phred)

    trimmomatic_args = args['--trimmomatic'].strip("'").strip('"').strip("'")

    return (num_threads, outdir, compress, phred, trimmomatic_args,
            input_files)
def parse_args(args):
    """Parse the command line arguments."""
    debug_switch = args['--debug-switch']
    dump_rg_db = args['--debug-dump-rg-db']
    dump_loc_db = args['--debug-dump-loc-db']

    # Convert ~ to real path
    input_file = os.path.expanduser(args['<alignment-file>'])

    # Which kit?
    kit = args['--kit']
    if kit == KIT_BIOO:
        pass
    else:
        raise CannotContinueException(
            """Kit {} is not supported.""".format(kit))

    # Figure out which function to use to write to output file.
    num_threads = args['--threads']

    # Which store to use
    if args['--store'] not in (STORE_OPTION_LMDB, STORE_OPTION_MEMORY):
        raise CannotContinueException("""Store {} is not supported.""".format(
            args['--store']))
    store = args['--store']

    outdir = args_to_out_dir(args)

    return (kit, store, outdir, input_file, debug_switch, dump_rg_db,
            dump_loc_db)
def parse_args(args):
    """Parse the command line arguments."""

    # Convert ~ to real path (strip silly leading './' too)
    if args['<in2.fastq>']:
        # PE mode
        in1 = os.path.expanduser(args['<in1.fastq>'])
        in2 = os.path.expanduser(args['<in2.fastq>'])
        input_files = (in1, in2)
    else:
        fin = os.path.expanduser(args['<input.fastq>'])
        input_files = (fin, )

    # Figure out which function to use to write to output file.
    compress = args['--compress']
    num_threads = int(args['--threads'])
    if num_threads > 1 and which('pigz') and compress:
        # return a partial for pigzwrite
        write_func = functools.partial(pigzwrite, num_threads)
    else:
        write_func = functools.partial(open, mode='w')

    outdir = args_to_out_dir(args)

    # optional trimlog
    opt_trimlog = args['-l'] if args['-l'] else None

    return (write_func, outdir, compress, opt_trimlog, input_files)
Exemplo n.º 4
0
def parse_args(args):
    """Parse the command line arguments."""

    debug_switch = args['--debug-switch']
    dump_rg_db = args['--dump-rg-db']
    dump_loc_db = args['--dump-loc-db']
    dump_dup_group_db = args['--dump-dup-group-db']
    dump_dup_db = args['--dump-dup-db']
    dump_umi_error_db = args['--dump-umi-error-db']

    random_seed = args['--random-seed']

    write_dedupped_sam = not args['--no-write-dedupped-sam']
    write_flagged_sam = args['--write-flagged-sam']
    write_dup_only_sam = not args['--no-write-dup-sam']
    write_dup_group_sam_like = not args['--no-write-dup-group-file']
    write_sam_headers = not args['--no-write-sam-headers']

    paired = False if args['--unpaired'] else True
    reject_umi_errors = not args['--keep-bad-umis']
    correct_umis = args['--correct-umis']
    build_read_and_loc_dbs = not args['--debug-no-build-read-and-loc-dbs']

    # Convert ~ to real path
    input_file = os.path.expanduser(args['<alignment-file>'])

    # Which kit?
    kit = str(args['--kit']).lower()
    if kit == KIT_BIOO:
        write_umi_error_rejects = True
    else:
        write_umi_error_rejects = False

    if correct_umis and kit != KIT_BIOO:
        raise CannotContinueException(
            """Cannot correct UMIs when kit is not Bioo.""")

    if reject_umi_errors and correct_umis:
        raise CannotContinueException(
            "Doesn't make sense to reject and *also* correct erroneous UMIs!!"
            " If passing --correct, you must also pass --keep-bad-umis.")

    # Which store to use
    if args['--store'] == None:
        store = STORE_OPTION_MEMORY
    elif args['--store'] not in (STORE_OPTION_LMDB, STORE_OPTION_MEMORY):
        raise CannotContinueException("""Store {} is not supported.""".format(
            args['--store']))
    else:
        store = args['--store']

    outdir = args_to_out_dir(args)

    return (kit, store, outdir, input_file, paired, build_read_and_loc_dbs,
            reject_umi_errors, correct_umis, write_dedupped_sam,
            write_flagged_sam, write_dup_only_sam, write_dup_group_sam_like,
            write_umi_error_rejects, write_sam_headers, random_seed,
            debug_switch, dump_rg_db, dump_loc_db, dump_dup_group_db,
            dump_dup_db, dump_umi_error_db)
Exemplo n.º 5
0
def parse_args(args):
    """Parse the command line arguments. """

    umi_reads_file = os.path.expanduser(args['<in.umi.fq>'])
    barcode_reads_file = os.path.expanduser(args['<in.barcode.fq>'])
    barcode_list_file = os.path.expanduser(args['<barcode_file>'])
    delete_temp_files_upon_failure = not args['--no-delete-tmp-files']
    write_rejects_files = not args['--no-write-rejects']
    min_umi_qual = int(args['--umi-min-qual'])
    umi_qf_win_size = int(args['--umi-qf-win-size'])

    # phred
    if args['--phred'] not in ('33', '64'):
        raise ArgumentException("ERR212: --phred can be only '33' or '64'.")
    phred = int(args['--phred'])

    # Paired or single end?
    if args['<in.R2.fq>']:
        # mode = PE
        paired = True
        # Convert ~ to real path
        args['<in.R1.fq>'] = os.path.expanduser(args['<in.R1.fq>'])
        args['<in.R2.fq>'] = os.path.expanduser(args['<in.R2.fq>'])
        reads_files = [args['<in.R1.fq>'], args['<in.R2.fq>']]
    else:
        # Convert ~ to real path
        args['<in.fq>'] = os.path.expanduser(args['<in.fq>'])

        # mode = SR
        paired = False
        reads_files = [args['<in.fq>']]

    fp_write = functools.partial(open, mode='w')

    # Return an appropriate function pointer for annotation.
    if paired:
        fp_split_qf_umi_anno_raw = split_qf_umi_anno_raw_pe
    else:
        fp_split_qf_umi_anno_raw = split_qf_umi_anno_raw_sr

    outdir = args_to_out_dir(args)

    return (fp_split_qf_umi_anno_raw, fp_write, outdir, phred, min_umi_qual,
            umi_qf_win_size, write_rejects_files,
            delete_temp_files_upon_failure, barcode_list_file,
            barcode_reads_file, umi_reads_file, reads_files)
def parse_args(args):
    """Parse the command line arguments."""
    adapters = []

    # Convert ~ to real path
    if args['<in2.fastq>']:
        in1 = os.path.expanduser(args['<in1.fastq>'])
        in2 = os.path.expanduser(args['<in2.fastq>'])
        input_files = [in1, in2]
        if not args['--adapter1'] or not args['--adapter2']:
            raise ArgumentException("""Error:

                    --adapter1 and --adapter2 are required if running in
                    paired-end mode (i.e. when you give two FASTQ files).

                    """)
        adapters = [args['--adapter1'], args['--adapter2']]
    else:
        input_files = [os.path.expanduser(args['<input.fastq>'])]
        adapters = [args['--adapter1']]
        # Uncomment if we ever remove the default from the --apapter arguments
        # if args['--adapter2']:
        #     raise ArgumentException(
        #             """Error:

        #             --adapter2 was given (suggesting you wanted paired-end
        #             adapter removal), but only one FASTQ file was given.

        #             """)

    outdir = args_to_out_dir(args)

    cutadapt_args = args['--cutadapt'].strip("'").strip('"').strip("'")

    # compress output?
    compress = args['--compress']
    return (compress, outdir, cutadapt_args, adapters, input_files)
Exemplo n.º 7
0
def parse_args(args):
    """Parse the command line arguments. """

    # Paired or single end? FASTQ or BAM?
    if args['<in2.fastq>']:
        # mode = FASTQ, PE
        paired = True
        filetype = 'FASTQ'
        # Convert ~ to real path
        args['<in1.fastq>'] = os.path.expanduser(args['<in1.fastq>'])
        args['<in2.fastq>'] = os.path.expanduser(args['<in2.fastq>'])
        input_files = [args['<in1.fastq>'], args['<in2.fastq>']]
    else:
        # Convert ~ to real path
        args['<input.fastq>'] = os.path.expanduser(args['<input.fastq>'])

        # Note: If you write the following for docopt:
        #   dupliganger remove-umi [options] <input.fastq>
        #   dupliganger remove-umi [options] <input.bam>
        # then it will always populate <input.fastq> and never populate
        # <input.bam>, hence the somewhat confusing names going on down
        # below...
        if is_bam(args['<input.fastq>']):
            # It is a bam, so hack docopt a bit
            args['<input.bam>'] = args['<input.fastq>']
            args['<input.fastq>'] = None
            paired = True if is_paired_bam(args['<input.bam>']) else False
            filetype = 'BAM'
            input_files = [args['<input.bam>']]
        else:
            # mode = FASTQ, SR
            paired = False
            filetype = 'FASTQ'
            input_files = [args['<input.fastq>']]

    # What fp_extract_umi to use?
    kit = args['--kit']
    if kit == KIT_BIOO:
        if paired:
            fp_extract_umi = extract_paired_umis_bioo
        else:
            fp_extract_umi = extract_single_umi_bioo
    else:
        raise CannotContinueException(
            """Kit {} is not supported.""".format(kit))

    # Figure out which function to use to write to output file.
    num_threads = int(args['--threads'])
    compress = args['--compress']
    if num_threads > 1 and which('pigz') and compress:
        # return a partial for pigzwrite
        # TODO: Maybe alter this to num_threads/2 if --paired-end?
        fp_write = functools.partial(pigzwrite, num_threads)
    elif which('gzip') and compress:
        # return a partial for gzwrite
        fp_write = gzwrite
    else:
        fp_write = functools.partial(open, mode='w')

    # Return an appropriate function pointer for annotation.
    if filetype == 'FASTQ':
        if paired:
            fp_anno = create_annotated_files_from_fastq
        else:
            fp_anno = create_annotated_file_from_fastq
    elif filetype == 'BAM':
        if paired:
            fp_anno = create_annotated_files_from_bam
        else:
            fp_anno = create_annotated_file_from_bam
    else:
        raise ControlFlowException("""ERR213: Not possible to be here.""")

    outdir = args_to_out_dir(args)

    if args['--force-paired'] and not paired:
        sys.stderr.write(
            "WARNING: Passed --force-paired but {} appears not to be "
            "paired-end.\n")
    force_paired = args['--force-paired']

    return (fp_extract_umi, fp_anno, fp_write, outdir, compress, force_paired,
            input_files)