Пример #1
0
def build_parser(parser):

    parser.add_argument('fastq',
                        nargs='?',
                        default=sys.stdin,
                        type=Opener('r'),
                        help='fastq-sanger file with phred scores')
    parser.add_argument('-o',
                        '--out',
                        default=sys.stdout,
                        type=Opener('w'),
                        help='csv-format file containing stats for each read.')
    parser.add_argument('-l',
                        '--limit',
                        type=int,
                        metavar='N',
                        help='Limit number of sequences read from input to N')
    parser.add_argument(
        '-e',
        '--extra-fields',
        type=str,
        help="extra fields for csv file in form 'name1:val1,name2:val2'")
    parser.add_argument('-n',
                        '--no-header',
                        action='store_false',
                        default=True,
                        dest='show_header')
Пример #2
0
def build_parser(parser):
    parser.add_argument('raw_reads',
                        type=lambda f: fastalite(Opener()(f)),
                        help="""input fasta file containing original
                        clustered reads (default stdin).""")
    parser.add_argument('readmap',
                        type=Opener('r'),
                        help="""output of `bioy denoise --readmap`
                        (csv file with columns readname,clustername)""")
    parser.add_argument('-r',
                        '--rlefile',
                        type=Csv2Dict('name',
                                      'rle',
                                      fieldnames=['name', 'rle']),
                        help="""An optional file containing run
                        length encoding for infile (.csv.bz2)""")
    parser.add_argument('-d', '--outdir', help='output directory', default='.')
    parser.add_argument('--pattern',
                        help="""A regular expression matching cluster names""")
    parser.add_argument('-N',
                        '--sample',
                        type=int,
                        default=100,
                        metavar='N',
                        help='inculde no more than N reads [%(default)s]')
    parser.add_argument('--name-suffix',
                        help='string to insert into name before .fasta',
                        default='aln')
    parser.add_argument('--no-align',
                        action='store_false',
                        dest='align',
                        default=True)
Пример #3
0
def build_parser(parser):
    parser.add_argument('tree',
                        default=sys.stdin,
                        nargs='?',
                        type=Opener('r'),
                        help='sequences in fasta format')
    parser.add_argument('--info',
                        type=Opener('r'),
                        metavar='CSV',
                        help="""selectively replace leaves from a two column
                     csv [seqname,*newname*] *optional column*""")
    parser.add_argument('-o',
                        '--out',
                        default='/dev/stdout',
                        help='output file [%(default)s]')
    parser.add_argument('--add-prefix',
                        default='',
                        metavar='PRE',
                        help='append a prefix string to all names')
    parser.add_argument('--add-suffix',
                        default='',
                        metavar='SUF',
                        help='append a suffix string to all names')
    parser.add_argument('--tree-type',
                        default='newick',
                        help='tree type to parse')
    parser.add_argument('--remove-word',
                        metavar='REGEX',
                        help='remove a word from a ')
Пример #4
0
def build_parser(parser):
    parser.add_argument('fasta',
                        type=lambda f: fastalite(Opener()(f)),
                        help='input fasta file')
    parser.add_argument('-l',
                        '--left-aligns',
                        type=Opener(),
                        help='left primer ssearch36 alignment results')
    parser.add_argument('-r',
                        '--right-aligns',
                        type=Opener(),
                        help='right primer ssearch36 alignment results')
    parser.add_argument('--left-range',
                        metavar='START,STOP',
                        help='Range of acceptable left primer start positions')
    parser.add_argument('--left-zscore',
                        metavar='VALUE',
                        type=float,
                        help='Min acceptable left primer z-score')
    parser.add_argument('--right-range',
                        metavar='START,STOP',
                        help=('Range of acceptable right '
                              'primer start positions'))
    parser.add_argument('--right-zscore',
                        metavar='VALUE',
                        type=float,
                        help='Min acceptable right primer z-score')
    parser.add_argument('--left-expr',
                        help=('python expression defining '
                              'criteria for keeping left primer'))
    parser.add_argument('--right-expr',
                        help=('python expression defining criteria '
                              'for keeping left primer'))
    parser.add_argument('-o',
                        '--fasta-out',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='trimmed fasta output file')
    parser.add_argument('--rle',
                        type=Csv2Dict('name',
                                      'rle',
                                      fieldnames=['name', 'rle']),
                        help='rle input file (required if --rle-out)')
    parser.add_argument(
        '--rle-out',
        type=lambda f: DictWriter(Opener('w')(f), fieldnames=['name', 'rle']),
        help='trimmed rle output file')
    parser.add_argument('-i',
                        '--include-primer',
                        action='store_true',
                        default=False,
                        help='Include primer in trimmed sequence')
    parser.add_argument('--keep-all-seqs',
                        action='store_true',
                        help='keep seqs that outside the trimming thresholds')
Пример #5
0
def build_parser(parser):
    parser.add_argument('alignments',
                        default=sys.stdin,
                        type=Opener('r'),
                        nargs='?',
                        help='ssearch -m 10 formatted file')
    parser.add_argument('-o',
                        '--out',
                        default=sys.stdout,
                        type=Opener('w'),
                        help='(default csv)-formatted output')
    parser.add_argument('-p',
                        '--print-one',
                        default=False,
                        action='store_true',
                        help='pretty print first alignment and exit')
    parser.add_argument(
        '-f',
        '--fieldnames',
        type=lambda f: f.split(','),
        help='comma-delimited list of field names to include in output')
    parser.add_argument('--limit',
                        type=int,
                        metavar='N',
                        help='Print no more than N alignments')
    parser.add_argument('--no-header', dest='header', action='store_false')
    parser.add_argument('-r',
                        '--rlefile',
                        type=Csv2Dict(index='name',
                                      value='rle',
                                      fieldnames=['name', 'rle']),
                        nargs='+',
                        help='CSV file containing run-length encoding')
    parser.add_argument('--min-zscore',
                        default=None,
                        type=float,
                        metavar='X',
                        help='Exclude alignments with z-score < X')
    parser.add_argument('-a',
                        '--top-alignment',
                        default=False,
                        action='store_true',
                        help="""By default, return all alignments;
                  provide this option to include
                  only the top entry per query.""")
    parser.add_argument(
        '-e',
        '--extra-fields',
        help="extra fields for csv file in form 'name1:val1,name2:val2'")
    parser.add_argument('-d',
                        '--with-diff',
                        action='store_true',
                        default=False,
                        help="""add fields 'q_diff' and 't_diff' containing
            aligned substrings with mismatches in lowercase""")
Пример #6
0
def build_parser(parser):
    parser.add_argument('infile',
                        type=Opener('r'),
                        default=sys.stdin,
                        nargs='?',
                        help='File containing stdout of call to cmalign')
    parser.add_argument('-o',
                        '--outfile',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='Output file in csv format (default is stdout)')
Пример #7
0
def build_parser(parser):
    parser.add_argument('csv',
            default = sys.stdin,
            nargs = '?',
            type = Opener(),
            help = 'input tsv file')
    parser.add_argument('-o', '--out',
            type = Opener('w'),
            default = sys.stdout,
            help = 'csv file')
    parser.add_argument('--split-column',
            help = 'column:delimiter:*newcolumns')
Пример #8
0
def build_parser(parser):
    parser.add_argument('seqs',
                        type=lambda f: fastalite(Opener()(f), readfile=False),
                        help='Input fasta file')
    parser.add_argument(
        'rle',
        type=Opener(),
        help='csv file (may be bzip encoded) containing columns "name","rle"')
    parser.add_argument('-o',
                        '--outfile',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='Name of output file')
Пример #9
0
def build_parser(parser):
    parser.add_argument('query', help='input fasta query file')
    parser.add_argument('library',
                        help='input fasta library file to search against')
    parser.add_argument('-o',
                        '--out',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='tabulated ssearch results')
    parser.add_argument('--no-header',
                        dest='header',
                        action='store_false',
                        default=True,
                        help='no header')
    parser.add_argument(
        '--all-alignments',
        action='store_true',
        help='maximum number of alignments to keep default = 1')
    parser.add_argument('-g',
                        '--gap-extension-penalty',
                        default='4',
                        help='gap extension penalty default = %(default)s')
    parser.add_argument('-f',
                        '--gap-open-penalty',
                        default='12',
                        help='gap open penalty default = %(default)s')
    parser.add_argument('-a',
                        '--full-sequences',
                        default=False,
                        action='store_true',
                        help='return full sequences in alignment')
    parser.add_argument('-O',
                        '--out-raw',
                        type=Opener('w'),
                        help='return raw ssearch output')
    parser.add_argument('--decode',
                        type=Csv2Dict(index='name',
                                      value='rle',
                                      fieldnames=['name', 'rle']),
                        help='Decode alignment')
    parser.add_argument(
        '--fieldnames',
        type=lambda f: f.split(','),
        help='comma-delimited list of field names to include in output')
    parser.add_argument('--min-zscore',
                        default=0,
                        type=float,
                        metavar='X',
                        help='Exclude alignments with z-score < X')
Пример #10
0
def build_parser(parser):
    parser.add_argument('csv',
                        nargs='?',
                        default=sys.stdin,
                        metavar='FILE',
                        type=Opener(),
                        help='A csv file with at least one column')
    parser.add_argument('--columns',
                        default='1,2',
                        help='Comma-delimited list of column names or numbers')
    parser.add_argument('--out',
                        metavar='FILE',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='output fasta file')
Пример #11
0
def build_parser(parser):
    parser.add_argument('fasta',
                        metavar='FILE',
                        type=Opener(),
                        help='input fasta')
    parser.add_argument('specimen_map',
                        metavar='CSV',
                        type=Opener(),
                        help='columns: readname, specimen')
    parser.add_argument(
        '--outdir',
        metavar='DIR',
        help=
        'output folder for specimen fasta files, name being specimen.fasta.bz2',
        default='.')
Пример #12
0
def build_parser(parser):
    parser.add_argument('fasta',
            nargs = '?',
            default = sys.stdin,
            metavar = 'FILE',
            type = Opener(),
            help = 'A fasta file')
    parser.add_argument('--get',
            action = 'append',
            help = 'columname[:newname]')
    parser.add_argument('--out',
            metavar = 'FILE',
            type = Opener('w'),
            default = sys.stdout,
            help = 'output csv file columns: [id,description,seq]')
Пример #13
0
def build_parser(parser):
    parser.add_argument('sseqids', nargs='?', type=Opener('r'),
            default = sys.stdin,
            help = 'csv input file, each line containing gb,seq_start,seq_stop')
    parser.add_argument('-o', '--outfasta',
            type = Opener('w'),
            default = sys.stdout,
            help = 'multi-fasta, one sequence for each provided identifier')
    parser.add_argument('-i', '--seqinfo',
            type = Opener('w'),
            help = "optionally output seqinfo for each sequence : {}".format(FETCH_HEADERS))
    parser.add_argument('-n', '--no-header',
            help = "suppress seqinfo header")
    parser.add_argument('-e', '--email', required=True,
            help = "users of NCBI Entrez API should provide email.  if usage is excessive, ncbi may block access to its API")
Пример #14
0
def build_parser(parser):
    parser.add_argument('infile',
                        default=sys.stdin,
                        nargs='?',
                        type=Opener(),
                        help='input csv file')
    parser.add_argument(
        '-o',
        '--outfile',
        help='HDF5 file [use basename of input file by default]')
    parser.add_argument('-d',
                        '--outdir',
                        help="""Optional output directory. Ignored
                        if -o/--outfile is specified.""")
    parser.add_argument('--fieldnames',
                        help='comma-delimited list of field names.')
    parser.add_argument('-H',
                        '--no-header',
                        action='store_true',
                        default=False,
                        help="""indicate that the input file has no
                        header row. Uses value of --fieldnames if provided.""")
    parser.add_argument('-k',
                        '--key',
                        default='data',
                        help="""A label identifing this table in the
                        data store "[%(default)s]" """)
    parser.add_argument('-c',
                        '--no-compress',
                        action='store_false',
                        default=True,
                        dest='compress',
                        help="""Don't compress data store.""")
Пример #15
0
def build_parser(parser):
    parser.add_argument(
        'taxids',
        type=lambda l: set(l.split(',')),
        help='comma delimited list of column names to group columns')
    parser.add_argument('taxonomy',
                        metavar='CSV',
                        default=sys.stdin,
                        nargs='?',
                        type=Opener(),
                        help='input classify csv file')
    parser.add_argument('-o',
                        '--out',
                        type=Opener('w'),
                        default=sys.stdout,
                        help="""csv with columns
            [parent_name,parent_id,parent_rank,tax_name,tax_id,rank]""")
Пример #16
0
def build_parser(parser):
    parser.add_argument('infiles',
                        nargs='*',
                        default=[sys.stdin],
                        type=Opener(),
                        help='Input fasta file')
    parser.add_argument('-o',
                        '--outfile',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='Name of output file; default: %(default)s')
    parser.add_argument(
        '-r',
        '--rlefile',
        type=lambda f: DictWriter(Opener('w')(f), fieldnames=['name', 'rle']),
        help="""Name of output file for run length encoding; default is to
                      append .csv.bz2 to --outfile basename.""")
Пример #17
0
def build_parser(parser):
    parser.add_argument('fasta', default = sys.stdin, type = Opener('r'),
                        nargs = '?', help='sequences in fasta format')
    parser.add_argument('-o', '--out', default = sys.stdout, type = Opener('w'),
                        help='output file (default stdout)')
    parser.add_argument('--matrix-out', type = Opener('w'),
                        help = """median score of pairwise alignments""")
    parser.add_argument('--primary-group', metavar = 'COLUMN_NAME',
                        help = """column in split_info to use for grouping""")
    parser.add_argument('--secondary-group', metavar = 'COLUN_NAME',
                        help = """column in split_info to use for grouping if
                                  primary_group is undefined for a given row""")
    parser.add_argument('--split-info', metavar = 'FILE', type = Opener(),
                        help = """csv file containing column "seqname" plus another
                                  column for grouping sequences prior to deduplication """)
    parser.add_argument('-d', '--distance', action = 'store_true', default = False,
                        help = 'Calculate distance rather than identity.')
Пример #18
0
def build_parser(parser):
    parser.add_argument(
        'clusters', type=Opener(),
        help='Clusters file (output of "usearch -uc")')
    parser.add_argument(
        '--fasta-in', type=lambda f: fastalite(Opener()(f)),
        help='input fasta file containing original clustered reads')
    parser.add_argument(
        '--fasta-out', type=Opener('w'),
        help='Output fasta containing centroids')
    parser.add_argument(
        '-g', '--groups', metavar='FILE', type=Opener(),
        help="""An optional file defining groups for partitioning
        input reads. If provided, cluster weights will be normalized
        to proportionally represent each group. File is a headerless
        csv with columns "seqname","group" (.csv.bz2)""")
    parser.add_argument(
        '--min-clust-size', type=int,
        default=1, help='[%(default)s]')
    parser.add_argument(
        '-o', '--out', type=Opener('w'), default=sys.stdout,
        help='Output file with columns (readname,centroidname)')
    parser.add_argument(
        '--specimenmap', type=Opener('w'),
        help='Output file with columns (clustername,samplename)')
    parser.add_argument(
        '--specimen', metavar='SAMPLENAME',
        help='provides samplename for mapfile')
    parser.add_argument(
        '-w', '--weights', type=Opener('w'),
        help='Output file with columns (clustername,weight)')
Пример #19
0
def build_parser(parser):
    parser.add_argument('csv',
            default = sys.stdin,
            nargs = '?',
            type = Opener(),
            help = 'input tsv file')
    parser.add_argument('-o', '--out',
            type = Opener('w'),
            default = '/dev/stdout',
            help = 'csv file')
    parser.add_argument('--value',
            help = 'value to pivot on for each row and column')
    parser.add_argument('--rows',
            help = 'comma delimited list of values')
    parser.add_argument('--cols',
            help = 'comma delimited list of values')
    parser.add_argument('--fill-value',
            help = 'if no pivot value')
Пример #20
0
def build_parser(parser):
    parser.add_argument('seqs',
                        help='Input fasta file')
    parser.add_argument('--is-file', action='store_true',
                        help=('if input is a fasta file. '
                              'Note: Output will also be fasta'))
    parser.add_argument('--rlefile', type=Opener(),
                        help=('csv file (may be bzip encoded) '
                              'containing columns "name","rle"'))
    parser.add_argument('-O', '--out-rle',
                        type=Opener('w'), help='reversed rlefile')
    parser.add_argument('-o', '--out-fasta',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='Name of fasta output file')
    parser.add_argument('--out',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='Name of output file')
Пример #21
0
def build_parser(parser):
    parser.add_argument(
        'infile',
        type=Opener(),
        nargs='?',
        default=sys.stdin,
        help=('csv file with ssearch36 columns '
              '[q_name,q_seq,t_name,t_seq,q_al_start,q_al_stop,'
              't_al_start,t_al_stop,t_sq_len,sw_zscore]'))
    parser.add_argument(
        '-i', '--info',
        type=Opener(),
        metavar='CSV',
        help='info file mapping seqname to tax_id')
    parser.add_argument(
        '-t', '--taxonomy',
        metavar='CSV',
        type=Opener(),
        help='taxonomy file mapping tax_id to taxonomy')
    parser.add_argument(
        '-o', '--out',
        metavar='CSV',
        type=Opener('w'),
        default=sys.stdout,
        help='csv output of bases {tax_id, species, positions,,}')
    parser.add_argument(
        '-r', '--rank',
        default='species',
        help='Aggregate primer stats by specified rank. [%(default)s]')
    parser.add_argument(
        '-f', '--position-freq',
        metavar='FLOAT',
        default=0.05,
        type=float,
        help='Minimum base frequency reported for a position [%(default)s]')
    parser.add_argument(
        '-z', '--min-zscore',
        type=float,
        help='Minimum z-score value to include alignment in base count.',
        default=0)
Пример #22
0
def build_parser(parser):
    parser.add_argument('query',
                        help='input fasta query file')
    parser.add_argument('library',
                        help='input fasta library file to search against')
    parser.add_argument('-o', '--out',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='tabulated ssearch results')
    parser.add_argument('--no-header',
                        dest='header',
                        action='store_false',
                        default=True,
                        help='no header')
    parser.add_argument('--all-alignments',
                        action='store_true',
                        help='maximum number of alignments to keep [1]')
    parser.add_argument('-g', '--gap-extension-penalty',
                        default='4',
                        help='gap extension penalty [%(default)s]')
    parser.add_argument('-f', '--gap-open-penalty',
                        default='12',
                        help='gap open penalty [%(default)s]')
    parser.add_argument('-a', '--full-sequences',
                        default=False,
                        action='store_true',
                        help='return full sequences in alignment')
    parser.add_argument('--decode',
                        type=Csv2Dict(index='name', value='rle',
                                      fieldnames=['name', 'rle']),
                        help='Decode alignment')
    parser.add_argument('--fieldnames',
                        default=('q_name,t_name,sw_zscore,sw_overlap,'
                                 'q_al_start,q_al_stop,sw_ident,qcovs,sw_frame'),
                        type=lambda f: f.split(','),
                        help=('comma-delimited list of field '
                              'names to include in output'))
    parser.add_argument('-z', '--statistical-calculation',
                        default='1',
                        help="""built in statistical calculation
                      of E values for sequences. [%(default)s]""")
    parser.add_argument('--min-zscore',
                        default=0,
                        type=float,
                        metavar='X',
                        help=('Exclude alignments with '
                              'z-score < X [%(default)s]'))
    parser.add_argument('--strand',
                        default='forward',
                        choices=['forward','both'])
Пример #23
0
def build_parser(parser):
    # required inputs
    parser.add_argument(
        'taxonomy',
        help='must have tax_id column and rank columns')
    parser.add_argument(
        'thresholds',
        help='with required columns tax_id, low, target_rank')

    # common outputs
    parser.add_argument(
        '-o', '--out',
        default=sys.stdout, type=Opener('w'), metavar='FILE',
        help="Classification results.")
Пример #24
0
def build_parser(parser):
    parser.add_argument('fasta', default='-', help='input fasta file')
    parser.add_argument(
        '-o',
        '--out',
        type=Opener('w'),
        default=sys.stdout,
        help='tabulated BLAST results with the following default headers {}'.
        format(BLAST_HEADER_DEFAULT))
    parser.add_argument('-d',
                        '--database',
                        help='blast database path for local blasts')
    parser.add_argument(
        '-r',
        '--remote-database',
        choices=['nt', 'nr'],
        help='type of remote database to use, if remote flag provided')
    parser.add_argument(
        '--limit',
        type=int,
        help='maximum number of query sequences to read from the alignment')
    parser.add_argument('--header', action='store_true', help='output header')
    parser.add_argument(
        '--strand',
        default='plus',
        choices=['plus', 'minus', 'both'],
        help="""query strand(s) to search against database/subject.
                      default = %(default)s""")
    parser.add_argument(
        '--id',
        default='90',
        help='minimum identity for accepted values [%(default)s]')
    parser.add_argument(
        '--max', help='maximum number of alignments to keep default = (all)')
    parser.add_argument('-n',
                        '--dry-run',
                        action='store_true',
                        help='print blast command and exit')
    parser.add_argument('--nohits', action='store_true', help='')
    parser.add_argument(
        '--coverage',
        type=float,
        help='minimum coverage for accepted values [%(default)s]')
    parser.add_argument('--outfmt',
                        default=BLAST_FORMAT_DEFAULT,
                        help='A comma delimited list of field names to output')
    parser.add_argument('--remote',
                        action='store_true',
                        help='execute query on remote NCBI server')
Пример #25
0
def build_parser(parser):
    parser.add_argument('infile',
                        type=Opener(),
                        nargs='?',
                        default=sys.stdin,
                        help='input fasta file (default %(default)s).')
    parser.add_argument('outfile',
                        type=Opener('w'),
                        default=sys.stdout,
                        nargs='?',
                        help='Output fasta file.')
    parser.add_argument('-r',
                        '--rlefile',
                        type=Opener(),
                        help=('An optional file containing run length '
                              'encoding for infile (.json.bz2)'))
    parser.add_argument('-n',
                        '--seqname',
                        help=('Name for the output sequence. '
                              'Default basename(infile).replace(".fasta","") '
                              'if infile is provided, otherwise "consensus"'))
    parser.add_argument('--gaps',
                        action='store_true',
                        help='retain gaps in consensus sequence')
Пример #26
0
def build_parser(parser):
    parser.add_argument('fasta',
                        type=lambda f: fastalite(opener(f)),
                        help='input file containing raw reads')
    parser.add_argument('--sample-id', help='sample id to pull reads for')
    parser.add_argument(
        '--map-file',
        type=Csv2Dict(value='sample_id',
                      fieldnames=['sequence_id', 'sample_id']),
        help='csv(.bz2) file containing sequence_id,sample_id in the rows.')
    parser.add_argument('-o',
                        '--out',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='fasta output file')
Пример #27
0
def build_parser(parser):
    parser.add_argument('aligns',
                        nargs='?',
                        default=sys.stdin,
                        type=Opener('r'),
                        help='csvfile of ssearch results')
    parser.add_argument(
        '-o',
        '--out',
        default=sys.stdout,
        type=Opener('w'),
        help='csv file tallying each error category for each read')
    parser.add_argument(
        '-m',
        '--homopolymer-matrix',
        dest='matrix',
        type=lambda f: writer(Opener('w')(f)),
        help='csv file containing transition matrix of homopolymer lengths')
    parser.add_argument(
        '-M',
        '--homopolymer-max',
        default=6,
        type=int,
        help='csv homopolymer length above which counts are binned')
    parser.add_argument('--step',
                        action='store_true',
                        help='step through reults (for debugging)')
    parser.add_argument(
        '-f',
        '--extra-fields',
        type=lambda f: parse_extras(f),
        default={},
        help="extra fields for csv file in form 'name1:val1,name2:val2'")
    parser.add_argument('--output-alignment',
                        action='store_true',
                        help='Include the actual alignment in csv output')
Пример #28
0
def build_parser(parser):
    parser.add_argument('infile',
                        default=sys.stdin,
                        type=Opener(),
                        nargs='?',
                        help='input csv file')
    parser.add_argument(
        '-o', '--outfile', default='-', help='output csv file',)
    parser.add_argument('-a', '--add', metavar='FIELD_SPEC',
                        help=("new fields for csv file in form "
                              "'name1:val1,name2:val2,...'"))
    parser.add_argument('-r', '--rename', metavar='FIELD_SPEC',
                        help=("fields to rename in the format "
                              "'from1:to1,from2:to2,...'"))
    parser.add_argument('-i', '--inplace', action='store_true', default=False,
                        help='modify input file in place [%(default)s]')
Пример #29
0
def build_parser(parser):
    parser.add_argument('sequences',
                        type=Opener(),
                        default=sys.stdin,
                        help='input fasta file')
    parser.add_argument('-i',
                        '--split-info',
                        metavar='FILE',
                        type=Opener('rU'),
                        help="""csv file containing column "seqname" plus
                                  another column for grouping sequences prior to deduplication"""
                        )
    parser.add_argument(
        '--primary-group',
        metavar='COLUMN_NAME',
        help='column in split_info to use for grouping [default %(default)s]',
        default='species')
    parser.add_argument('--secondary-group',
                        metavar='COLUMN_NAME',
                        help="""column in split_info to use for grouping
                                  if primary_group is undefined
                                  for a given row [default %(default)s]""",
                        default='tax_id')
    parser.add_argument(
        '--out-map',
        metavar='FILE',
        type=Opener('w'),
        help='map file of sequences from (kept_seq_id,orig_seq_i)')
    parser.add_argument('--out-weights',
                        metavar='FILE',
                        help='weight file for each kept sequence',
                        type=Opener('w'))
    parser.add_argument('-O',
                        '--out-info',
                        metavar='FILE',
                        type=Opener('w'),
                        help='deduplicate seq info file')
    parser.add_argument('-o',
                        '--out',
                        metavar='FILE',
                        type=Opener('w'),
                        default=sys.stdout,
                        help='deduplicated sequences in fasta format')
Пример #30
0
def build_parser(parser):
    parser.add_argument(
        'blast_file',
        nargs='?',
        default=sys.stdin,
        type=Opener('r'),
        help='CSV tabular blast file of query and subject hits')
    parser.add_argument(
        '--all-one-group',
        dest='all_one_group',
        action='store_true',
        help="""If --map is not provided, the default behavior is to treat
                    all reads as one group; use this option to treat
                    each read as a separate group [%(default)s]""")
    parser.add_argument(
        '-a',
        '--asterisk',
        default=100,
        metavar='PERCENT',
        type=float,
        help='Next to any species above a certain threshold [%(default)s]')
    parser.add_argument('--copy-numbers',
                        metavar='CSV',
                        type=Opener(),
                        help='columns: tax_id, median')
    parser.add_argument(
        '-c',
        '--coverage',
        default=95,
        metavar='PERCENT',
        type=float,
        help='percent of alignment coverage of blast result [%(default)s]')
    parser.add_argument(
        '--details-identity',
        metavar='PERCENT',
        help='Minimum identity to include blast hits in details file',
        type=float,
        default=90)
    parser.add_argument(
        '--details-full',
        action='store_true',
        help='do not limit out_details to only larget cluster per assignment')
    parser.add_argument('--exclude-by-taxid',
                        metavar='CSV',
                        type=lambda f: set(e for e in DictReader(
                            opener(f), fieldnames='tax_id')),
                        default={},
                        help='column: tax_id')
    parser.add_argument(
        '--group-def',
        metavar='INT',
        action='append',
        default=[],
        help="""define a group threshold for a particular rank overriding
                      --target-max-group-size. example: genus:2""")
    parser.add_argument('--group-label',
                        metavar='LABEL',
                        default='all',
                        help='Single group label for reads')
    parser.add_argument(
        '-o',
        '--out',
        default=sys.stdout,
        type=Opener('w'),
        metavar='CSV',
        help="""columns: specimen, max_percent, min_percent, max_coverage,
                      min_coverage, assignment_id, assignment, clusters, reads,
                      pct_reads, corrected, pct_corrected, target_rank, hi, low, tax_ids"""
    )
    parser.add_argument('-m',
                        '--map',
                        metavar='CSV',
                        type=Opener(),
                        default={},
                        help='columns: name, specimen')
    parser.add_argument(
        '--max-ambiguous',
        metavar='INT',
        default=3,
        type=int,
        help='Maximum ambiguous count in reference sequences [%(default)s]')
    parser.add_argument(
        '--max-identity',
        default=100,
        metavar='PERCENT',
        type=float,
        help='maximum identity threshold for accepting matches [<= %(default)s]'
    )
    parser.add_argument(
        '--min-cluster-size',
        default=0,
        metavar='INT',
        type=int,
        help='minimum cluster size to include in classification output')
    parser.add_argument(
        '--min-identity',
        default=99,
        metavar='PERCENT',
        type=float,
        help='minimum identity threshold for accepting matches [> %(default)s]'
    )
    parser.add_argument(
        '-s',
        '--seq-info',
        required=True,
        metavar='CSV',
        type=Opener(),
        help='seq info file(s) to match sequence ids to taxids [%(default)s]')
    parser.add_argument(
        '-t',
        '--taxonomy',
        required=True,
        metavar='CSV',
        type=Csv2Dict('tax_id'),
        help='tax table of taxids and species names [%(default)s]')
    parser.add_argument(
        '-O',
        '--out-detail',
        type=lambda f: DictWriter(
            opener(f, 'w'),
            extrasaction='ignore',
            fieldnames=[
                'specimen', 'assignment', 'assignment_id', 'qseqid', 'sseqid',
                'pident', 'coverage', 'ambig_count', 'accession', 'tax_id',
                'tax_name', 'target_rank', 'rank', 'hi', 'low'
            ]),
        metavar='CSV',
        help="""columns: specimen, assignment, assignment_id,
                      qseqid, sseqid, pident, coverage, ambig_count,
                      accession, tax_id, tax_name, target_rank, rank, hi, low"""
    )
    parser.add_argument('--target-max-group-size',
                        metavar='INTEGER',
                        default=3,
                        type=int,
                        help="""group multiple target-rank assignments that
                      excede a threshold to a higher rank [%(default)s]""")
    parser.add_argument(
        '--target-rank',
        metavar='RANK',
        help='Rank at which to classify. Default: "%(default)s"',
        default='species')
    parser.add_argument('-w',
                        '--weights',
                        metavar='CSV',
                        type=Opener(),
                        help='columns: name, weight')
    ### csv.Sniffer.has_header is *not* reliable enough
    parser.add_argument('--has-header',
                        action='store_true',
                        help='specify this if blast data has a header')