示例#1
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Map sequence to current trace using squiggle ' +
        'predictor model',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, "limit jobs output recursive version".split())

    parser.add_argument(
        '--back_prob', default=1e-15, metavar='probability',
        type=proportion, help='Probability of backwards move')
    parser.add_argument(
        '--input_strand_list', default=None, action=FileExists,
        help='Strand summary file containing subset')
    parser.add_argument(
        '--localpen', default=None, type=Maybe(NonNegative(float)),
        help='Penalty for staying in start and end states, or None to ' +
        'disable them')
    parser.add_argument(
        '--minscore', default=None, type=Maybe(NonNegative(float)),
        help='Minimum score for matching')
    parser.add_argument(
        '--trim', default=(200, 10), nargs=2, type=NonNegative(int),
        metavar=('beginning', 'end'),
        help='Number of samples to trim off start and end')

    parser.add_argument(
        'model', action=FileExists, help='Model file')
    parser.add_argument(
        'references', action=FileExists, help='Fasta file')
    parser.add_argument(
        'read_dir', action=FileExists,
        help='Directory for fast5 reads')
示例#2
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Train a flip-flop neural network',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, """adam alphabet device eps limit niteration
        outdir overwrite quiet save_every version
        weight_decay""".split())

    parser.add_argument('--batch_size',
                        default=128,
                        metavar='chunks',
                        type=Positive(int),
                        help='Number of chunks to run in parallel')
    parser.add_argument(
        '--gradient_cap_fraction',
        default=0.05,
        metavar='f',
        type=Maybe(NonNegative(float)),
        help='Cap L2 norm of gradient so that a fraction f of gradients ' +
        'are capped. Use --gradient_cap_fraction None for no capping.')
    parser.add_argument('--lr_max',
                        default=4.0e-3,
                        metavar='rate',
                        type=Positive(float),
                        help='Initial learning rate')
    parser.add_argument('--size',
                        default=96,
                        metavar='neurons',
                        type=Positive(int),
                        help='Base layer size for model')
    parser.add_argument('--seed',
                        default=None,
                        metavar='integer',
                        type=Positive(int),
                        help='Set random number seed')
    parser.add_argument('--stride',
                        default=2,
                        metavar='samples',
                        type=Positive(int),
                        help='Stride for model')
    parser.add_argument('--winlen',
                        default=19,
                        type=Positive(int),
                        help='Length of window over data')

    parser.add_argument('model',
                        action=FileExists,
                        help='File to read python model description from')
    parser.add_argument('chunks',
                        action=FileExists,
                        help='file containing chunks')
    parser.add_argument('reference',
                        action=FileExists,
                        help='file containing fasta reference')

    return parser
示例#3
0
def get_parser():
    parser = argparse.ArgumentParser(
        description="Basecall reads using a taiyaki model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    add_common_command_args(
        parser, """alphabet device input_folder
        input_strand_list jobs limit output quiet
        recursive version""".split())

    parser.add_argument(
        '--beam', default=None, metavar=('width', 'guided'), nargs=2,
        type=(int, bool), action=ParseToNamedTuple,
        help='Use beam search decoding')
    parser.add_argument(
        "--chunk_size", type=Positive(int), metavar="blocks",
        default=basecall_helpers._DEFAULT_CHUNK_SIZE,
        help="Size of signal chunks sent to GPU is chunk_size * model stride")
    parser.add_argument(
        '--fastq', default=False, action=AutoBool,
        help='Write output in fastq format (default is fasta)')
    parser.add_argument(
        "--max_concurrent_chunks", type=Positive(int), default=128,
        help="Maximum number of chunks to call at "
        "once. Lower values will consume less (GPU) RAM.")
    parser.add_argument(
        "--overlap", type=NonNegative(int), metavar="blocks",
        default=basecall_helpers._DEFAULT_OVERLAP,
        help="Overlap between signal chunks sent to GPU")
    parser.add_argument(
        '--posterior', default=True, action=AutoBool,
        help='Use posterior-viterbi decoding')
    parser.add_argument(
        "--qscore_offset", type=float, default=0.0,
        help="Offset to apply to q scores in fastq (after scale)")
    parser.add_argument(
        "--qscore_scale", type=float, default=1.0,
        help="Scaling factor to apply to q scores in fastq")
    parser.add_argument(
        '--reverse', default=False, action=AutoBool,
        help='Reverse sequences in output')
    parser.add_argument(
        '--scaling', action=FileExists, default=None,
        help='Path to TSV containing per-read scaling params')
    parser.add_argument(
        '--temperature', default=1.0, type=float,
        help='Scaling factor applied to network outputs before decoding')
    parser.add_argument(
        "model", action=FileExists,
        help="Model checkpoint file to use for basecalling")

    return parser
def get_parser():
    parser = argparse.ArgumentParser()

    add_common_command_args(parser,
                            ('input_folder input_strand_list limit output ' +
                             'recursive version jobs').split())

    parser.add_argument('--trim',
                        default=(200, 50),
                        nargs=2,
                        type=NonNegative(int),
                        metavar=('beginning', 'end'),
                        help='Number of samples to trim off start and end')

    return parser
def get_parser():
    parser = argparse.ArgumentParser(
        description='Combine mapped-signal files into a single file. ' +
        'Checks that alphabets are compatible.')
    parser.add_argument('output', help='Output filename')

    parser.add_argument(
        '--input',
        required=True,
        nargs=2,
        action='append',
        metavar=('mapped_signal_file', 'num_reads'),
        help='Mapped signal filename and the number of reads to merge from ' +
        'this file. Specify "None" to merge all reads from a file.')
    parser.add_argument(
        '--load_in_mem',
        action=AutoBool,
        default=True,
        help='Load each input file into memory before processing. ' +
        'Potentially large increase in speed but also increased memory usage')
    parser.add_argument(
        '--seed',
        type=Maybe(NonNegative(int)),
        default=None,
        help='Seed for randomly selected reads when limits are set ' +
        '(default random seed)')
    parser.add_argument(
        '--allow_mod_merge',
        action='store_true',
        help='Allow merging of data sets with different modified bases. ' +
        'While alphabets may differ, incompatible alphabets are not allowed ' +
        '(e.g. same single letter code used for different canonical bases).')
    parser.add_argument(
        '--batch_format',
        action='store_true',
        help='Output batched mapped signal file format. This can ' +
        'significantly improve I/O performance and use less ' +
        'disk space. An entire batch must be loaded into memory in order ' +
        'access any read potentailly increasing RAM requirements.')

    return parser
示例#6
0
def add_common_command_args(parser, arglist):
    """Given an argparse parser object and a list of keys such as
    ['input_strand_list', 'jobs'], add these command line args
    to the parser.

    Not all command line args used in the package are
    included in this func: only those that are used by more than
    one script and which have the same defaults.

    Some args are positional and some are optional.
    The optional ones are listed first below."""

    ############################################################################
    #
    # Optional arguments
    #
    ############################################################################

    if 'adam' in arglist:
        parser.add_argument(
            '--adam',
            nargs=2,
            metavar=('beta1', 'beta2'),
            default=[0.9, 0.999],
            type=NonNegative(float),
            help=
            'Parameters beta1, beta2 for Exponential Decay Adaptive Momentum')

    if 'alphabet' in arglist:
        parser.add_argument('--alphabet',
                            default=DEFAULT_ALPHABET,
                            help='Canonical base alphabet')

    if 'device' in arglist:
        parser.add_argument(
            '--device',
            default='cpu',
            action=DeviceAction,
            help=
            'Integer specifying which GPU to use, or "cpu" to use CPU only. '
            'Other accepted formats: "cuda" (use default GPU), "cuda:2" '
            'or "cuda2" (use GPU 2).')
    if 'eps' in arglist:
        parser.add_argument('--eps',
                            default=1e-6,
                            metavar='adjustment',
                            type=Positive(float),
                            help='Small value to stabilise optimiser')

    if 'filter_max_dwell' in arglist:
        parser.add_argument(
            '--filter_max_dwell',
            default=10.0,
            metavar='multiple',
            type=Maybe(Positive(float)),
            help=
            'Drop chunks with max dwell more than multiple of median (over chunks)'
        )

    if 'filter_mean_dwell' in arglist:
        parser.add_argument(
            '--filter_mean_dwell',
            default=3.0,
            metavar='radius',
            type=Maybe(Positive(float)),
            help=
            'Drop chunks with mean dwell more than radius deviations from the median (over chunks)'
        )

    if 'input_strand_list' in arglist:
        parser.add_argument(
            '--input_strand_list',
            default=None,
            action=FileExists,
            help=
            'Strand list TSV file with columns filename_fast5 or read_id or both'
        )

    if 'jobs' in arglist:
        parser.add_argument(
            '--jobs',
            default=1,
            metavar='n',
            type=Positive(int),
            help='Number of threads to use when processing data')

    if 'limit' in arglist:
        parser.add_argument('--limit',
                            default=None,
                            type=Maybe(Positive(int)),
                            help='Limit number of reads to process')

    if 'niteration' in arglist:
        parser.add_argument('--niteration',
                            metavar='batches',
                            type=Positive(int),
                            default=50000,
                            help='Maximum number of batches to train for')

    if 'outdir' in arglist:
        parser.add_argument('--outdir',
                            default='training',
                            help='Output directory, created when run.')

    if 'output' in arglist:
        parser.add_argument('--output',
                            default=None,
                            metavar='filename',
                            action=FileAbsent,
                            help='Write output to file')

    if 'overwrite' in arglist:
        parser.add_argument('--overwrite',
                            default=False,
                            action=AutoBool,
                            help='Whether to overwrite any output files')

    if 'quiet' in arglist:
        parser.add_argument('--quiet',
                            default=False,
                            action=AutoBool,
                            help="Don't print progress information to stdout")

    if 'recursive' in arglist:
        parser.add_argument('--recursive',
                            default=True,
                            action=AutoBool,
                            help='Search for fast5s recursively within ' +
                            'input_folder. Otherwise only search first level.')

    if 'sample_nreads_before_filtering' in arglist:
        parser.add_argument(
            '--sample_nreads_before_filtering',
            metavar='n',
            type=NonNegative(int),
            default=1000,
            help=
            'Sample n reads to decide on bounds for filtering before training. Set to 0 to do all.'
        )

    if 'save_every' in arglist:
        parser.add_argument('--save_every',
                            metavar='x',
                            type=Positive(int),
                            default=5000,
                            help='Save model every x batches')

    if 'version' in arglist:
        parser.add_argument('--version',
                            nargs=0,
                            action=display_version_and_exit,
                            metavar=__version__,
                            help='Display version information.')

    if 'weight_decay' in arglist:
        parser.add_argument(
            '--weight_decay',
            default=0.0,
            metavar='penalty',
            type=NonNegative(float),
            help='Adam weight decay (L2 normalisation penalty)')

    ############################################################################
    #
    # Positional arguments
    #
    ############################################################################

    if 'input_folder' in arglist:
        parser.add_argument(
            'input_folder',
            action=FileExists,
            help='Directory containing single or multi-read fast5 files')
示例#7
0
def get_train_flipflop_parser():
    parser = argparse.ArgumentParser(
        description='Train flip-flop neural network',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    mdl_grp = parser.add_argument_group('Model Arguments')
    mdl_grp.add_argument(
        '--size', default=384, metavar='neurons',
        type=Positive(int), help='Base layer size for model')
    mdl_grp.add_argument(
        '--stride', default=5, metavar='samples',
        type=Positive(int), help='Stride for model')
    mdl_grp.add_argument(
        '--winlen', default=19, type=Positive(int),
        help='Length of window over data')

    trn_grp = parser.add_argument_group('Training Arguments')
    trn_grp.add_argument(
        '--adam', nargs=2, metavar=('beta1', 'beta2'),
        default=[0.9, 0.999], type=NonNegative(float),
        help='Parameters beta1, beta2 for Exponential Decay ' +
        'Adaptive Momentum')
    trn_grp.add_argument(
        '--eps', default=1e-6, metavar='adjustment',
        type=Positive(float), help='Small value to stabilise optimiser')
    trn_grp.add_argument(
        '--niteration', metavar='batches', type=Positive(int),
        default=150000, help='Maximum number of batches to train for')
    trn_grp.add_argument(
        '--weight_decay', default=0.01, metavar='penalty',
        type=NonNegative(float),
        help='Adam weight decay (L2 normalisation penalty)')
    trn_grp.add_argument(
        '--gradient_clip_num_mads', default=0, metavar='num_MADs',
        type=Maybe(NonNegative(float)),
        help='Clip gradients (by value) at num_MADs above the median of ' +
        'the last 1000 parameter gradient maximums. Gradient threshold ' +
        'values are computed for each parameter group independently. Use ' +
        '"--gradient_clip_num_mads None" for no clipping.')
    trn_grp.add_argument(
        '--lr_max', default=4.0e-3, metavar='rate', type=Positive(float),
        help='Max learning rate, reached at --warmup_batches iterations.')
    trn_grp.add_argument(
        '--lr_min', default=1.0e-4, metavar='rate', type=Positive(float),
        help='Min (starting and final) learning rate')
    trn_grp.add_argument(
        '--seed', default=None, metavar='integer', type=Positive(int),
        help='Set random number seed')
    trn_grp.add_argument(
        '--sharpen', default=(1.0, 1.0, 25000), nargs=3,
        metavar=('min', 'max', 'niter'), action=ParseToNamedTuple,
        type=(Positive(float), Positive(float), Positive(int)),
        help='Increase sharpening factor linearly from "min" to ' +
        '"max" over "niter" iterations')
    trn_grp.add_argument(
        '--warmup_batches', type=int, default=200,
        help='Over first n batches, increase learning rate like cosine.')
    trn_grp.add_argument(
        '--lr_warmup', metavar='rate', type=Positive(float),
        help='Start learning rate for warmup. Defaults to lr_min.')
    trn_grp.add_argument(
        '--min_momentum', type=Positive(float),
        help='Min momentum in cycling. default = Adam beta1, no cycling')

    data_grp = parser.add_argument_group('Data Arguments')
    data_grp.add_argument(
        '--filter_max_dwell', default=10.0, metavar='multiple',
        type=Maybe(Positive(float)),
        help='Drop chunks with max dwell more than multiple of median ' +
        '(over chunks)')
    data_grp.add_argument(
        '--filter_mean_dwell', default=3.0, metavar='radius',
        type=Maybe(Positive(float)),
        help='Drop chunks with mean dwell more than radius deviations ' +
        'from the median (over chunks)')
    data_grp.add_argument(
        '--filter_min_pass_fraction', default=0.5, metavar='fraction',
        type=Maybe(Positive(float)),
        help='Halt if fraction of chunks passing tests is less than this')
    data_grp.add_argument(
        '--filter_path_buffer', default=1.1, metavar='ratio',
        type=Bounded(float, lower=1.0),
        help='Drop chunks with small ratio of signal length to bases * ' +
        'model stride, which would restrict potential CTC paths. Must be ' +
        'greater than 1.0.')
    data_grp.add_argument(
        '--limit', default=None, type=Maybe(Positive(int)),
        help='Limit number of reads to process')
    data_grp.add_argument(
        '--reverse', default=False, action=AutoBool,
        help='Reverse input sequence and current')
    data_grp.add_argument(
        '--sample_nreads_before_filtering', metavar='n',
        type=NonNegative(int), default=100000,
        help='Sample n reads to decide on bounds for filtering before ' +
        'training. Set to 0 to do all.')
    data_grp.add_argument(
        '--chunk_len_min', default=3000, metavar='samples', type=Positive(int),
        help='Min length of each chunk in samples (chunk lengths are ' +
        'random between min and max)')
    data_grp.add_argument(
        '--chunk_len_max', default=8000, metavar='samples', type=Positive(int),
        help='Max length of each chunk in samples (chunk lengths are ' +
        'random between min and max)')
    data_grp.add_argument(
        '--include_reporting_strands', default=False, action=AutoBool,
        help='Include reporting strands in training. Default: Hold ' +
        'training strands out of training.')
    data_grp.add_argument(
        '--input_strand_list', default=None, action=FileExists,
        help='Strand summary file containing column read_id. Filenames in ' +
        'file are ignored.')
    data_grp.add_argument(
        '--min_sub_batch_size', default=128, metavar='chunks',
        type=Positive(int),
        help='Number of chunks to run in parallel per sub-batch for ' +
        'chunk_len = chunk_len_max. Actual length of sub-batch used is ' +
        '(min_sub_batch_size * chunk_len_max / chunk_len).')
    data_grp.add_argument(
        '--reporting_percent_reads', default=1, metavar='sub_batches',
        type=Positive(float),
        help='Percent of reads to use for std loss reporting')
    data_grp.add_argument(
        '--reporting_strand_list', action=FileExists,
        help='Strand summary file containing column read_id. All other ' +
        'fields are ignored. If not provided reporting strands will be ' +
        'randomly selected.')
    data_grp.add_argument(
        '--reporting_sub_batches', default=100, metavar='sub_batches',
        type=Positive(int),
        help='Number of sub-batches to use for std loss reporting')
    data_grp.add_argument(
        '--standardize', default=True, action=AutoBool,
        help='Standardize currents for each read')
    data_grp.add_argument(
        '--sub_batches', default=1, metavar='sub_batches', type=Positive(int),
        help='Number of sub-batches per batch')

    cmp_grp = parser.add_argument_group('Compute Arguments')
    cmp_grp.add_argument(
        '--device', default='cpu', action=DeviceAction,
        help='Integer specifying which GPU to use, or "cpu" to use CPU only. '
        'Other accepted formats: "cuda" (use default GPU), "cuda:2" '
        'or "cuda2" (use GPU 2).')
    # Argument local_rank is used only by when the script is run in multi-GPU
    # mode using torch.distributed.launch. See the README.
    cmp_grp.add_argument(
        '--local_rank', type=int, default=None, help=argparse.SUPPRESS)

    out_grp = parser.add_argument_group('Output Arguments')
    out_grp.add_argument(
        '--full_filter_status', default=False, action=AutoBool,
        help='Output full chunk filtering statistics. Default: only ' +
        'proportion of filtered chunks.')
    out_grp.add_argument(
        '--outdir', default='training',
        help='Output directory, created when run.')
    out_grp.add_argument(
        '--overwrite', default=False, action=AutoBool,
        help='Whether to overwrite any output files')
    out_grp.add_argument(
        '--quiet', default=False, action=AutoBool,
        help="Don't print progress information to stdout")
    out_grp.add_argument(
        '--save_every', metavar='x', type=Positive(int), default=2500,
        help='Save model every x batches')

    mod_grp = parser.add_argument_group('Modified Base Arguments')
    mod_grp.add_argument(
        '--mod_factor', default=(8.0, 1.0, 50000), nargs=3,
        metavar=('start', 'final', 'niter'), action=ParseToNamedTuple,
        type=(Positive(float), Positive(float), Positive(int)),
        help='Relative weight applied to modified base transitions in ' +
        'loss/gradient compared to canonical transitions. Larger values ' +
        'increase the effective modified base learning rate. Scale factor ' +
        'linearly from "start" to "final" over first "niter" iterations')
    mod_grp.add_argument(
        '--mod_prior_factor', type=float,
        help='Exponential factor applied to prior mod weights estimated ' +
        'from training data. Intended to balance modified base scores. ' +
        'Default: no mod prior')
    mod_grp.add_argument(
        '--num_mod_weight_reads', type=int, default=5000,
        help='Number of reads to sample to compute the modified base prior ' +
        'weights from the training data.')

    misc_grp = parser.add_argument_group('Miscellaneous  Arguments')
    misc_grp.add_argument(
        '--version', nargs=0, action=display_version_and_exit,
        metavar=__version__,
        help='Display version information.')

    parser.add_argument(
        'model', action=FileExists,
        help='File to read python model (or checkpoint) from')
    parser.add_argument(
        'input', action=FileExists,
        help='file containing mapped reads')

    return parser
示例#8
0
parser.add_argument('--fastq',
                    default=False,
                    action=AutoBool,
                    help='Write output in fastq format (default is fasta)')
parser.add_argument("--max_concurrent_chunks",
                    type=Positive(int),
                    default=128,
                    help="Maximum number of chunks to call at "
                    "once. Lower values will consume less (GPU) RAM.")
parser.add_argument("--modified_base_output",
                    action=FileAbsent,
                    default=None,
                    metavar="mod_basecalls.hdf5",
                    help="Output filename for modified base output.")
parser.add_argument("--overlap",
                    type=NonNegative(int),
                    metavar="blocks",
                    default=basecall_helpers._DEFAULT_OVERLAP,
                    help="Overlap between signal chunks sent to GPU")
parser.add_argument("--qscore_offset",
                    type=float,
                    default=0.0,
                    help="Offset to apply to q scores in fastq (after scale)")
parser.add_argument("--qscore_scale",
                    type=float,
                    default=1.0,
                    help="Scaling factor to apply to q scores in fastq")
parser.add_argument('--reverse',
                    default=False,
                    action=AutoBool,
                    help='Reverse sequences in output')
示例#9
0
                    ' (chunk lengths are random between min and max)')
parser.add_argument('--chunk_len_max',
                    default=4000,
                    metavar='samples',
                    type=Positive(int),
                    help='Max length of each chunk in samples ' +
                    '(chunk lengths are random between min and max)')
parser.add_argument('--full_filter_status',
                    default=False,
                    action=AutoBool,
                    help='Output full chunk filtering statistics. ' +
                    'Default: only proportion of filtered chunks.')
parser.add_argument('--gradient_cap_fraction',
                    default=0.05,
                    metavar='f',
                    type=Maybe(NonNegative(float)),
                    help='Cap L2 norm of gradient so that a fraction f of ' +
                    'gradients are capped. ' +
                    'Use --gradient_cap_fraction None for no capping.')
parser.add_argument('--input_strand_list',
                    default=None,
                    action=FileExists,
                    help='Strand summary file containing column read_id. ' +
                    'Filenames in file are ignored.')
#Argument local_rank is used only by when the script is run in multi-GPU
#mode using torch.distributed.launch. See the README.
parser.add_argument('--local_rank',
                    type=int,
                    default=None,
                    help=argparse.SUPPRESS)
parser.add_argument('--lr_cosine_iters',
示例#10
0
STITCH_BEFORE_VITERBI = False


parser = argparse.ArgumentParser(
    description="Basecall reads using a taiyaki model",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

add_common_command_args(parser, 'device input_folder input_strand_list limit output quiet recursive version'.split())

parser.add_argument("--alphabet", default=DEFAULT_ALPHABET,
                    help="Alphabet used by basecaller")
parser.add_argument("--chunk_size", type=Positive(int),
                    default=basecall_helpers._DEFAULT_CHUNK_SIZE,
                    help="Size of signal chunks sent to GPU")
parser.add_argument("--overlap", type=NonNegative(int),
                    default=basecall_helpers._DEFAULT_OVERLAP,
                    help="Overlap between signal chunks sent to GPU")
parser.add_argument("--modified_base_output", action=FileAbsent, default=None,
                    help="Output filename for modified base output.")
parser.add_argument("model", action=FileExists,
                    help="Model checkpoint file to use for basecalling")


def med_mad_norm(x, dtype='f4'):
    """ Normalise a numpy array using median and MAD """
    med, mad = med_mad(x)
    normed_x = (x - med) / mad
    return normed_x.astype(dtype)

示例#11
0
def add_common_command_args(parser, arglist):
    """Given an argparse parser object and a list of keys such as
    ['input_strand_list', 'jobs'], add these command line args
    to the parser.
    
    Note that not all command line args used in the package are
    included in this func: only those that are used by more than
    one script and which have the same defaults.

    Also note that some args are positional and some are optional.
    The optional ones are listed first below."""

    ############################################################################
    #
    # Optional arguments
    #
    ############################################################################

    if 'adam' in arglist:
        parser.add_argument(
            '--adam',
            nargs=3,
            metavar=('rate', 'decay1', 'decay2'),
            default=(1e-3, 0.9, 0.999),
            type=(NonNegative(float), NonNegative(float), NonNegative(float)),
            action=ParseToNamedTuple,
            help='Parameters for Exponential Decay Adaptive Momementum')

    if 'chunk_logging_threshold' in arglist:
        parser.add_argument(
            '--chunk_logging_threshold',
            default=10.0,
            metavar='multiple',
            type=NonNegative(float),
            help=
            'If loss > (threshold * smoothed loss) for a batch, then log chunks to '
            +
            'output/chunklog.tsv. Set to zero to log all, including rejected chunks'
        )

    if 'device' in arglist:
        parser.add_argument(
            '--device',
            default='cpu',
            action=DeviceAction,
            help=
            'Integer specifying which GPU to use, or "cpu" to use CPU only. '
            'Other accepted formats: "cuda" (use default GPU), "cuda:2" '
            'or "cuda2" (use GPU 2).')

    if 'filter_max_dwell' in arglist:
        parser.add_argument(
            '--filter_max_dwell',
            default=10.0,
            metavar='multiple',
            type=Maybe(Positive(float)),
            help=
            'Drop chunks with max dwell more than multiple of median (over chunks)'
        )

    if 'filter_mean_dwell' in arglist:
        parser.add_argument(
            '--filter_mean_dwell',
            default=3.0,
            metavar='radius',
            type=Maybe(Positive(float)),
            help=
            'Drop chunks with mean dwell more than radius deviations from the median (over chunks)'
        )

    if 'input_strand_list' in arglist:
        parser.add_argument('--input_strand_list',
                            default=None,
                            action=FileExists,
                            help='Strand summary file containing subset')

    if 'jobs' in arglist:
        parser.add_argument(
            '--jobs',
            default=1,
            metavar='n',
            type=Positive(int),
            help='Number of threads to use when processing data')

    if 'limit' in arglist:
        parser.add_argument('--limit',
                            default=None,
                            type=Maybe(Positive(int)),
                            help='Limit number of reads to process')

    if 'lrdecay' in arglist:
        parser.add_argument(
            '--lrdecay',
            default=5000,
            metavar='n',
            type=Positive(float),
            help='Learning rate for batch i is adam.rate / (1.0 + i / n)')

    if 'niteration' in arglist:
        parser.add_argument('--niteration',
                            metavar='batches',
                            type=Positive(int),
                            default=50000,
                            help='Maximum number of batches to train for')

    if 'overwrite' in arglist:
        parser.add_argument('--overwrite',
                            default=False,
                            action=AutoBool,
                            help='Whether to overwrite any output files')

    if 'quiet' in arglist:
        parser.add_argument('--quiet',
                            default=False,
                            action=AutoBool,
                            help="Don't print progress information to stdout")

    if 'sample_nreads_before_filtering' in arglist:
        parser.add_argument(
            '--sample_nreads_before_filtering',
            metavar='n',
            type=NonNegative(int),
            default=1000,
            help=
            'Sample n reads to decide on bounds for filtering before training. Set to 0 to do all.'
        )

    if 'save_every' in arglist:
        parser.add_argument('--save_every',
                            metavar='x',
                            type=Positive(int),
                            default=5000,
                            help='Save model every x batches')

    if 'version' in arglist:
        parser.add_argument('--version',
                            nargs=0,
                            action=display_version_and_exit,
                            metavar=__version__,
                            help='Display version information.')

    if 'weight_decay' in arglist:
        parser.add_argument(
            '--weight_decay',
            default=0.0,
            metavar='penalty',
            type=NonNegative(float),
            help='Adam weight decay (L2 normalisation penalty)')

    ############################################################################
    #
    # Positional arguments
    #
    ############################################################################

    if 'input_folder' in arglist:
        parser.add_argument(
            'input_folder',
            action=FileExists,
            help='Directory containing single-read fast5 files')
示例#12
0
common_cmdargs.add_common_command_args(parser, "limit jobs version".split())

parser.add_argument('--back_prob',
                    default=1e-15,
                    metavar='probability',
                    type=proportion,
                    help='Probability of backwards move')
parser.add_argument('--input_strand_list',
                    default=None,
                    action=FileExists,
                    help='Strand summary file containing subset')
parser.add_argument(
    '--localpen',
    default=None,
    type=Maybe(NonNegative(float)),
    help='Penalty for staying in start and end states, or None to disable them'
)
parser.add_argument('--minscore',
                    default=None,
                    type=Maybe(NonNegative(float)),
                    help='Minimum score for matching')
parser.add_argument('--trim',
                    default=(200, 10),
                    nargs=2,
                    type=NonNegative(int),
                    metavar=('beginning', 'end'),
                    help='Number of samples to trim off start and end')
parser.add_argument('model', action=FileExists, help='Model file')
parser.add_argument('references', action=FileExists, help='Fasta file')
parser.add_argument('read_dir',
示例#13
0
from taiyaki.common_cmdargs import add_common_command_args
from taiyaki.iterators import imap_mp


parser = argparse.ArgumentParser(
    description='Map sequence to current trace using squiggle predictor model',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)


add_common_command_args(parser, "limit jobs output recursive version".split())

parser.add_argument('--back_prob', default=1e-15, metavar='probability',
                    type=proportion, help='Probability of backwards move')
parser.add_argument('--input_strand_list', default=None, action=FileExists,
                    help='Strand summary file containing subset')
parser.add_argument('--localpen', default=None, type=Maybe(NonNegative(float)),
                    help='Penalty for staying in start and end states, or None to disable them')
parser.add_argument('--minscore', default=None, type=Maybe(NonNegative(float)),
                    help='Minimum score for matching')
parser.add_argument('--trim', default=(200, 10), nargs=2, type=NonNegative(int),
                    metavar=('beginning', 'end'), help='Number of samples to trim off start and end')
parser.add_argument('model', action=FileExists, help='Model file')
parser.add_argument('references', action=FileExists, help='Fasta file')
parser.add_argument('read_dir', action=FileExists, help='Directory for fast5 reads')


def main():
    args = parser.parse_args()

    worker_kwarg_names = ['back_prob', 'localpen', 'minscore', 'trim']