示例#1
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Map sequence to current trace using squiggle ' +
        'predictor model',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, "limit jobs output recursive version".split())

    parser.add_argument(
        '--back_prob', default=1e-15, metavar='probability',
        type=proportion, help='Probability of backwards move')
    parser.add_argument(
        '--input_strand_list', default=None, action=FileExists,
        help='Strand summary file containing subset')
    parser.add_argument(
        '--localpen', default=None, type=Maybe(NonNegative(float)),
        help='Penalty for staying in start and end states, or None to ' +
        'disable them')
    parser.add_argument(
        '--minscore', default=None, type=Maybe(NonNegative(float)),
        help='Minimum score for matching')
    parser.add_argument(
        '--trim', default=(200, 10), nargs=2, type=NonNegative(int),
        metavar=('beginning', 'end'),
        help='Number of samples to trim off start and end')

    parser.add_argument(
        'model', action=FileExists, help='Model file')
    parser.add_argument(
        'references', action=FileExists, help='Fasta file')
    parser.add_argument(
        'read_dir', action=FileExists,
        help='Directory for fast5 reads')
示例#2
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Train a flip-flop neural network',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, """adam alphabet device eps limit niteration
        outdir overwrite quiet save_every version
        weight_decay""".split())

    parser.add_argument('--batch_size',
                        default=128,
                        metavar='chunks',
                        type=Positive(int),
                        help='Number of chunks to run in parallel')
    parser.add_argument(
        '--gradient_cap_fraction',
        default=0.05,
        metavar='f',
        type=Maybe(NonNegative(float)),
        help='Cap L2 norm of gradient so that a fraction f of gradients ' +
        'are capped. Use --gradient_cap_fraction None for no capping.')
    parser.add_argument('--lr_max',
                        default=4.0e-3,
                        metavar='rate',
                        type=Positive(float),
                        help='Initial learning rate')
    parser.add_argument('--size',
                        default=96,
                        metavar='neurons',
                        type=Positive(int),
                        help='Base layer size for model')
    parser.add_argument('--seed',
                        default=None,
                        metavar='integer',
                        type=Positive(int),
                        help='Set random number seed')
    parser.add_argument('--stride',
                        default=2,
                        metavar='samples',
                        type=Positive(int),
                        help='Stride for model')
    parser.add_argument('--winlen',
                        default=19,
                        type=Positive(int),
                        help='Length of window over data')

    parser.add_argument('model',
                        action=FileExists,
                        help='File to read python model description from')
    parser.add_argument('chunks',
                        action=FileExists,
                        help='file containing chunks')
    parser.add_argument('reference',
                        action=FileExists,
                        help='file containing fasta reference')

    return parser
示例#3
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Dump JSON representation of model',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(parser, ["output"])

    parser.add_argument('model', action=FileExists, help='Model checkpoint')

    return parser
示例#4
0
def get_parser():
    """Get argparser object.

    Returns:
        :argparse:`ArgumentParser` : the argparser object
    """
    parser = argparse.ArgumentParser(
        description="Prepare data for model training and save to hdf5 file " +
        "by remapping with flip-flop model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, ('alphabet input_folder input_strand_list jobs limit ' +
                 'overwrite recursive version').split())

    parser.add_argument('--localpen',
                        metavar='penalty',
                        default=0.0,
                        type=float,
                        help='Penalty for local mapping')
    parser.add_argument(
        '--max_read_length',
        metavar='bases',
        default=None,
        type=Maybe(int),
        help='Don\'t attempt remapping for reads longer than this')
    parser.add_argument('--mod',
                        nargs=3,
                        metavar=('mod_base', 'canonical_base',
                                 'mod_long_name'),
                        default=[],
                        action='append',
                        help='Modified base description')
    parser.add_argument(
        '--batch_format',
        action='store_true',
        help='Output batched mapped signal file format. This can ' +
        'significantly improve I/O performance and use less ' +
        'disk space. An entire batch must be loaded into memory in order ' +
        'access any read potentailly increasing RAM requirements.')

    parser.add_argument('input_per_read_params',
                        action=FileExists,
                        help='Input per read parameter .tsv file')
    parser.add_argument('output', help='Output HDF5 file')
    parser.add_argument('model', action=FileExists, help='Taiyaki model file')
    parser.add_argument(
        'references',
        action=FileExists,
        help='Single fasta file containing references for each read')

    return parser
示例#5
0
def get_parser():
    parser = argparse.ArgumentParser(
        description="Basecall reads using a taiyaki model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, """alphabet device input_folder
        input_strand_list jobs limit output quiet
        recursive version""".split())

    parser.add_argument(
        '--beam', default=None, metavar=('width', 'guided'), nargs=2,
        type=(int, bool), action=ParseToNamedTuple,
        help='Use beam search decoding')
    parser.add_argument(
        "--chunk_size", type=Positive(int), metavar="blocks",
        default=basecall_helpers._DEFAULT_CHUNK_SIZE,
        help="Size of signal chunks sent to GPU is chunk_size * model stride")
    parser.add_argument(
        '--fastq', default=False, action=AutoBool,
        help='Write output in fastq format (default is fasta)')
    parser.add_argument(
        "--max_concurrent_chunks", type=Positive(int), default=128,
        help="Maximum number of chunks to call at "
        "once. Lower values will consume less (GPU) RAM.")
    parser.add_argument(
        "--overlap", type=NonNegative(int), metavar="blocks",
        default=basecall_helpers._DEFAULT_OVERLAP,
        help="Overlap between signal chunks sent to GPU")
    parser.add_argument(
        '--posterior', default=True, action=AutoBool,
        help='Use posterior-viterbi decoding')
    parser.add_argument(
        "--qscore_offset", type=float, default=0.0,
        help="Offset to apply to q scores in fastq (after scale)")
    parser.add_argument(
        "--qscore_scale", type=float, default=1.0,
        help="Scaling factor to apply to q scores in fastq")
    parser.add_argument(
        '--reverse', default=False, action=AutoBool,
        help='Reverse sequences in output')
    parser.add_argument(
        '--scaling', action=FileExists, default=None,
        help='Path to TSV containing per-read scaling params')
    parser.add_argument(
        '--temperature', default=1.0, type=float,
        help='Scaling factor applied to network outputs before decoding')
    parser.add_argument(
        "model", action=FileExists,
        help="Model checkpoint file to use for basecalling")

    return parser
示例#6
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Upgrade mapped signal HDF5 file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(parser, ['version'])

    parser.add_argument('input',
                        action=FileExists,
                        help='Mapped signal to read from')
    parser.add_argument('output',
                        action=FileAbsent,
                        help='Name for output upgraded mapped signal file')

    return parser
def get_parser():
    parser = argparse.ArgumentParser()

    add_common_command_args(parser,
                            ('input_folder input_strand_list limit output ' +
                             'recursive version jobs').split())

    parser.add_argument('--trim',
                        default=(200, 50),
                        nargs=2,
                        type=NonNegative(int),
                        metavar=('beginning', 'end'),
                        help='Number of samples to trim off start and end')

    return parser
示例#8
0
def get_parser():
    """Get argparser object.

    Returns:
        :argparse:`ArgumentParser` : the argparser object
    """
    parser = argparse.ArgumentParser(
        description='Predict squiggle from sequence',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(parser, "output version".split())

    parser.add_argument('model', action=FileExists, help='Model file')
    parser.add_argument('input', action=FileExists, help='Fasta file')

    return parser
示例#9
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Extract reference sequence for each read from a SAM ' +
        'alignment file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(parser, ["output"])

    parser.add_argument('--complement',
                        default=False,
                        action=AutoBool,
                        help='Complement all reference sequences')
    parser.add_argument('--input_strand_list',
                        default=None,
                        action=FileExists,
                        help='Strand summary file containing subset')
    parser.add_argument(
        '--min_coverage',
        metavar='proportion',
        default=0.6,
        type=proportion,
        help='Ignore reads with alignments shorter than min_coverage * ' +
        'read length')
    parser.add_argument(
        '--pad',
        type=int,
        default=0,
        help='Number of bases by which to pad reference sequence')
    parser.add_argument('--reverse',
                        default=False,
                        action=AutoBool,
                        help='Reverse all reference sequences (for RNA)')
    parser.add_argument(
        'reference',
        action=FileExists,
        help="Genomic references that reads were aligned against")
    parser.add_argument(
        'input',
        metavar='input.sam',
        nargs='+',
        help="SAM or BAM file(s) containing read alignments to reference")

    return parser
示例#10
0

COMPATIBLE_LAYERS = set((
    'convolution',
    'LSTM',
    'GruMod',
    'reverse',
    'GlobalNormTwoState',
    'GlobalNormTwoStateCatMod'))


parser = argparse.ArgumentParser(
    description='Convert JSON representation of model to pytorch checkpoint ' +
    'for use within taiyaki/megalodon.',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
add_common_command_args(
    parser, ["output"])

parser.add_argument(
    'json_model', action=FileExists, help='JSON model with params')


def set_params_gru(layer, params_name, jsn_params, layer_params):
    # convert from guppy format back to pytorch format
    if re.search('weight_ih', params_name) and 'iW' in jsn_params:
        jsn_layer_params = torch.Tensor(np.concatenate([
            jsn_params['iW'][1], jsn_params['iW'][0], jsn_params['iW'][2]]))
    elif re.search('weight_hh', params_name) and 'sW' in jsn_params:
        jsn_layer_params = torch.Tensor(np.concatenate([
            jsn_params['sW'][1], jsn_params['sW'][0], jsn_params['sW'][2]]))
    elif re.search('bias_ih', params_name) and 'b' in jsn_params:
        jsn_layer_params = torch.Tensor(np.concatenate([
示例#11
0
import time

import torch
from torch.optim.lr_scheduler import CosineAnnealingLR

from taiyaki import ctc, flipflopfings, helpers
from taiyaki.cmdargs import FileExists, Positive
from taiyaki.common_cmdargs import add_common_command_args


# This is here, not in main to allow documentation to be built
parser = argparse.ArgumentParser(
    description='Train a flip-flop neural network',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

add_common_command_args(parser, """adam alphabet device eps limit niteration
                                   outdir overwrite quiet save_every version""".split())

parser.add_argument('--batch_size', default=128, metavar='chunks',
                    type=Positive(int), help='Number of chunks to run in parallel')
parser.add_argument( '--lr_max', default=4.0e-3, metavar='rate',
                    type=Positive(float), help='Initial learning rate')
parser.add_argument('--size', default=96, metavar='neurons',
                    type=Positive(int), help='Base layer size for model')
parser.add_argument('--seed', default=None, metavar='integer', type=Positive(int),
                    help='Set random number seed')
parser.add_argument('--stride', default=2, metavar='samples', type=Positive(int),
                    help='Stride for model')
parser.add_argument('--winlen', default=19, type=Positive(int),
                    help='Length of window over data')

parser.add_argument('model', action=FileExists,
示例#12
0
import torch

from taiyaki import (alphabet, chunk_selection, constants, ctc, flipflopfings,
                     helpers, layers, mapped_signal_files, maths, optim)
from taiyaki.cmdargs import AutoBool, FileExists, Maybe, NonNegative, Positive
from taiyaki.common_cmdargs import add_common_command_args
from taiyaki.constants import DOTROWLENGTH

# This is here, not in main to allow documentation to be built
parser = argparse.ArgumentParser(
    description='Train flip-flop neural network',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

add_common_command_args(
    parser, """adam device eps filter_max_dwell
                                   filter_mean_dwell limit lr_cosine_iters
                                   niteration outdir overwrite quiet save_every
                                   sample_nreads_before_filtering version
                                   weight_decay""".split())

parser.add_argument('--chunk_len_min',
                    default=2000,
                    metavar='samples',
                    type=Positive(int),
                    help='Min length of each chunk in samples' +
                    ' (chunk lengths are random between min and max)')
parser.add_argument('--chunk_len_max',
                    default=4000,
                    metavar='samples',
                    type=Positive(int),
                    help='Max length of each chunk in samples ' +
                    '(chunk lengths are random between min and max)')
示例#13
0
import numpy as np
import os
import sys

from taiyaki import alphabet, bio, fast5utils, helpers, prepare_mapping_funcs
from taiyaki.cmdargs import FileExists, Maybe
from taiyaki.common_cmdargs import add_common_command_args
from taiyaki.iterators import imap_mp

program_description = "Prepare data for model training and save to hdf5 file by remapping with flip-flop model"
parser = argparse.ArgumentParser(
    description=program_description,
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

add_common_command_args(
    parser,
    'alphabet input_folder input_strand_list jobs limit overwrite recursive version'
    .split())

parser.add_argument('--localpen',
                    metavar='penalty',
                    default=0.0,
                    type=float,
                    help='Penalty for local mapping')
parser.add_argument('--max_read_length',
                    metavar='bases',
                    default=None,
                    type=Maybe(int),
                    help='Don\'t attempt remapping for reads longer than this')
parser.add_argument('--mod',
                    nargs=3,
                    metavar=('base', 'canonical', 'name'),
示例#14
0
from taiyaki import (chunk_selection, ctc, flipflopfings, helpers,
                     mapped_signal_files, optim)
from taiyaki import __version__
from taiyaki.cmdargs import FileExists, Positive
from taiyaki.common_cmdargs import add_common_command_args
from taiyaki.constants import DOTROWLENGTH

# This is here, not in main to allow documentation to be built
parser = argparse.ArgumentParser(
    description='Train a flip-flop neural network',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

add_common_command_args(
    parser,
    """adam chunk_logging_threshold device filter_max_dwell filter_mean_dwell
                                   limit lr_cosine_iters niteration overwrite quiet save_every
                                   sample_nreads_before_filtering version weight_decay"""
    .split())

parser.add_argument(
    '--chunk_len_min',
    default=2000,
    metavar='samples',
    type=Positive(int),
    help=
    'Min length of each chunk in samples (chunk lengths are random between min and max)'
)
parser.add_argument(
    '--chunk_len_max',
    default=4000,
    metavar='samples',
示例#15
0
#!/usr/bin/env python3
import argparse
from Bio import SeqIO
import numpy as np
import torch

from taiyaki import helpers, squiggle_match
from taiyaki.cmdargs import FileExists
from taiyaki.common_cmdargs import add_common_command_args

parser = argparse.ArgumentParser(
    description='Predict squiggle from sequence',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

add_common_command_args(parser, "output version".split())

parser.add_argument('model', action=FileExists, help='Model file')
parser.add_argument('input', action=FileExists, help='Fasta file')


def main():
    args = parser.parse_args()

    predict_squiggle = helpers.load_model(args.model)

    with helpers.open_file_or_stdout(args.output) as fh:
        for seq in SeqIO.parse(args.input, 'fasta'):
            seqstr = str(seq.seq)
            embedded_seq_numpy = np.expand_dims(
                squiggle_match.embed_sequence(seqstr), axis=1)
            embedded_seq_torch = torch.tensor(embedded_seq_numpy,
示例#16
0
import os
import sys

from ont_fast5_api import fast5_interface
from taiyaki.cmdargs import NonNegative
from taiyaki.common_cmdargs import add_common_command_args
import taiyaki.fast5utils as fast5utils
from taiyaki.helpers import open_file_or_stdout
from taiyaki.iterators import imap_mp
from taiyaki.maths import med_mad
from taiyaki.signal import Signal

parser = argparse.ArgumentParser()

add_common_command_args(
    parser,
    'input_folder input_strand_list limit output recursive version jobs'.split(
    ))

parser.add_argument('--trim',
                    default=(200, 50),
                    nargs=2,
                    type=NonNegative(int),
                    metavar=('beginning', 'end'),
                    help='Number of samples to trim off start and end')


def one_read_shift_scale(read_tuple):

    read_filename, read_id = read_tuple

    try:
示例#17
0
#!/usr/bin/env python3
import argparse

from taiyaki import common_cmdargs, fast5utils, helpers, squiggle_match
from taiyaki.cmdargs import (display_version_and_exit, FileExists, Maybe,
                             NonNegative, Positive, proportion)
from taiyaki.iterators import imap_mp
from taiyaki import __version__

parser = argparse.ArgumentParser(
    description='Map sequence to current trace using squiggle predictor model',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

common_cmdargs.add_common_command_args(parser, "limit jobs version".split())

parser.add_argument('--back_prob',
                    default=1e-15,
                    metavar='probability',
                    type=proportion,
                    help='Probability of backwards move')
parser.add_argument('--input_strand_list',
                    default=None,
                    action=FileExists,
                    help='Strand summary file containing subset')
parser.add_argument(
    '--localpen',
    default=None,
    type=Maybe(NonNegative(float)),
    help='Penalty for staying in start and end states, or None to disable them'
)
parser.add_argument('--minscore',
示例#18
0
#!/usr/bin/env python3
import argparse

from taiyaki import fast5utils, helpers, squiggle_match
from taiyaki.cmdargs import (FileExists, Maybe, NonNegative, proportion)
from taiyaki.common_cmdargs import add_common_command_args
from taiyaki.iterators import imap_mp


parser = argparse.ArgumentParser(
    description='Map sequence to current trace using squiggle predictor model',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)


add_common_command_args(parser, "limit jobs output recursive version".split())

parser.add_argument('--back_prob', default=1e-15, metavar='probability',
                    type=proportion, help='Probability of backwards move')
parser.add_argument('--input_strand_list', default=None, action=FileExists,
                    help='Strand summary file containing subset')
parser.add_argument('--localpen', default=None, type=Maybe(NonNegative(float)),
                    help='Penalty for staying in start and end states, or None to disable them')
parser.add_argument('--minscore', default=None, type=Maybe(NonNegative(float)),
                    help='Minimum score for matching')
parser.add_argument('--trim', default=(200, 10), nargs=2, type=NonNegative(int),
                    metavar=('beginning', 'end'), help='Number of samples to trim off start and end')
parser.add_argument('model', action=FileExists, help='Model file')
parser.add_argument('references', action=FileExists, help='Fasta file')
parser.add_argument('read_dir', action=FileExists, help='Directory for fast5 reads')

示例#19
0
from taiyaki.decode import flipflop_make_trans, flipflop_viterbi
from taiyaki.flipflopfings import extract_mod_weights, nstate_flipflop, path_to_str
from taiyaki.helpers import (guess_model_stride, load_model,
                             open_file_or_stdout, Progress)
from taiyaki.maths import med_mad
from taiyaki.prepare_mapping_funcs import get_per_read_params_dict_from_tsv
from taiyaki.signal import Signal

STITCH_BEFORE_VITERBI = False

parser = argparse.ArgumentParser(
    description="Basecall reads using a taiyaki model",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

add_common_command_args(
    parser, """alphabet device input_folder
                        input_strand_list limit output quiet
                        recursive version""".split())

parser.add_argument(
    "--chunk_size",
    type=Positive(int),
    metavar="blocks",
    default=basecall_helpers._DEFAULT_CHUNK_SIZE,
    help="Size of signal chunks sent to GPU is chunk_size * model stride")
parser.add_argument('--fastq',
                    default=False,
                    action=AutoBool,
                    help='Write output in fastq format (default is fasta)')
parser.add_argument("--max_concurrent_chunks",
                    type=Positive(int),
                    default=128,
#!/usr/bin/env python3
# Combine mapped-read files in HDF5 format into a single file

import argparse
from taiyaki import mapped_signal_files
from taiyaki.cmdargs import Positive
from taiyaki.common_cmdargs import add_common_command_args

parser = argparse.ArgumentParser(
    description='Combine HDF5 mapped-read files into a single file')

add_common_command_args(parser, ['version'])
parser.add_argument('output', help='Output filename')
parser.add_argument('input', nargs='+', help='One or more input files')

#To convert to any new mapped read format (e.g. mapped_signal_files.SQL)
#we should be able to just change MAPPED_READ_CLASS to equal the new class.
MAPPED_READ_CLASS = mapped_signal_files.HDF5Reader
MAPPED_WRITE_CLASS = mapped_signal_files.HDF5Writer


def main():
    args = parser.parse_args()

    with MAPPED_READ_CLASS(args.input[0]) as hin:
        #  Copy alphabet and modification information from first file
        in_alphabet, in_collapse_alphabet, in_mod_long_names \
            = hin.get_alphabet_information()
        args.alphabet = in_alphabet
        args.collapse_alphabet = in_collapse_alphabet
        args.mod_long_names = in_mod_long_names
示例#21
0
#!/usr/bin/env python
import argparse
from taiyaki.iterators import imap_mp
import os
import sys
from taiyaki.cmdargs import FileExists
import taiyaki.common_cmdargs as common_cmdargs
from taiyaki import fast5utils, helpers, prepare_mapping_funcs, variables

program_description = "Prepare data for model training and save to hdf5 file by remapping with flip-flop model"
parser = argparse.ArgumentParser(
    description=program_description,
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

common_cmdargs.add_common_command_args(
    parser,
    'device input_folder input_strand_list jobs limit overwrite version'.split(
    ))
default_alphabet_str = variables.DEFAULT_ALPHABET.decode("utf-8")
parser.add_argument('--alphabet',
                    default=default_alphabet_str,
                    help='Alphabet for basecalling. Defaults to ' +
                    default_alphabet_str)
parser.add_argument('--collapse_alphabet',
                    default=default_alphabet_str,
                    help='Collapsed alphabet for basecalling. Defaults to ' +
                    default_alphabet_str)
parser.add_argument('input_per_read_params',
                    action=FileExists,
                    help='Input per read parameter .tsv file')
parser.add_argument('output', help='Output HDF5 file')
parser.add_argument('model', action=FileExists, help='Taiyaki model file')
示例#22
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Train a model to predict ionic current levels ' +
        'from sequence',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, """adam device eps filter_max_dwell filter_mean_dwell limit
        niteration outdir overwrite quiet reverse save_every
        sample_nreads_before_filtering version weight_decay""".split())

    parser.add_argument('--batch_size',
                        default=100,
                        metavar='chunks',
                        type=Positive(int),
                        help='Number of chunks to run in parallel')
    parser.add_argument('--back_prob',
                        default=1e-15,
                        metavar='probability',
                        type=proportion,
                        help='Probability of backwards move')
    parser.add_argument('--depth',
                        metavar='layers',
                        default=4,
                        type=Positive(int),
                        help='Number of residual convolution layers')
    parser.add_argument(
        '--drop_slip',
        default=5,
        type=Maybe(Positive(int)),
        metavar='length',
        help='Drop chunks with slips greater than given length (None = off)')
    parser.add_argument(
        '--filter_path_buffer',
        default=1.1,
        metavar='ratio',
        type=float,
        help='Drop chunks with small ratio of signal length to bases * ' +
        'model stride, which would restrict potential CTC paths.')
    parser.add_argument(
        '--filter_min_pass_fraction',
        default=0.5,
        metavar='fraction',
        type=Maybe(Positive(float)),
        help='Halt if fraction of chunks passing tests is less than this')
    parser.add_argument('--full_filter_status',
                        default=False,
                        action=AutoBool,
                        help='Output full chunk filtering statistics. ' +
                        'Default: only proportion of filtered chunks.')
    parser.add_argument(
        '--input_strand_list',
        default=None,
        action=FileExists,
        help='Strand summary file containing column read_id. Filenames in ' +
        'file are ignored.')
    parser.add_argument(
        '--lr_decay',
        default=5000,
        metavar='n',
        type=Positive(float),
        help='Learning rate for batch i is lr_max / (1.0 + i / n)')
    parser.add_argument('--lr_max',
                        default=1.0e-4,
                        metavar='rate',
                        type=Positive(float),
                        help='Max (and starting) learning rate')
    parser.add_argument('--sd',
                        default=0.5,
                        metavar='value',
                        type=Positive(float),
                        help='Standard deviation to initialise with')
    parser.add_argument('--seed',
                        default=None,
                        metavar='integer',
                        type=Positive(int),
                        help='Set random number seed')
    parser.add_argument('--size',
                        metavar='n',
                        default=32,
                        type=Positive(int),
                        help='Size of layers in convolution network')
    parser.add_argument('--target_len',
                        metavar='n',
                        default=300,
                        type=Positive(int),
                        help='Target length of sequence')
    parser.add_argument('--winlen',
                        metavar='n',
                        default=9,
                        type=Positive(int),
                        help='Window for convolution network')
    parser.add_argument('input',
                        action=FileExists,
                        help='HDF5 file containing mapped reads')

    return parser