def subparser(subparsers): subparser = subparsers.add_parser('gather') subparser.add_argument('query', help='query signature') subparser.add_argument( 'databases', nargs='+', help='signatures/SBTs to search', ) subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument('-d', '--debug', action='store_true') subparser.add_argument( '-n', '--num-results', default=None, type=int, metavar='N', help= 'number of results to report (default: terminate at --threshold-bp)') subparser.add_argument('--traverse-directory', action='store_true', help='search all signatures underneath directories') subparser.add_argument('-o', '--output', metavar='FILE', help='output CSV containing matches to this file') subparser.add_argument( '--save-matches', metavar='FILE', help='save the matched signatures from the database to the ' 'specified file') subparser.add_argument( '--threshold-bp', metavar='REAL', type=float, default=5e4, help= 'reporting threshold (in bp) for estimated overlap with remaining query (default=50kb)' ) subparser.add_argument( '--output-unassigned', metavar='FILE', help='output unassigned portions of the query as a signature to the ' 'specified file') subparser.add_argument( '--scaled', metavar='FLOAT', type=float, default=0, help='downsample query to the specified scaled factor') subparser.add_argument('--ignore-abundance', action='store_true', help='do NOT use k-mer abundances if present') subparser.add_argument('--md5', default=None, help='select the signature with this md5 as query') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('categorize') subparser.add_argument('sbt_name', help='name of SBT to load') subparser.add_argument('queries', nargs='+', help='list of signatures to categorize') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') add_ksize_arg(subparser, 31) subparser.add_argument( '--threshold', default=0.08, type=float, help='minimum threshold for reporting matches; default=0.08') subparser.add_argument('--traverse-directory', action="store_true") subparser.add_argument('--ignore-abundance', action='store_true', help='do NOT use k-mer abundances if present') add_moltype_args(subparser) # TODO: help messages in these subparser.add_argument('--csv', help='output summary CSV to this file') subparser.add_argument('--load-csv', default=None)
def subparser(subparsers): subparser = subparsers.add_parser('watch') subparser.add_argument('sbt_name', help='name of SBT to search') subparser.add_argument('inp_file', nargs='?', default='/dev/stdin') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument('-o', '--output', help='save signature generated from data here') subparser.add_argument('--threshold', metavar='T', default=0.05, type=float, help='minimum threshold for matches (default=0.05)') subparser.add_argument( '--input-is-protein', action='store_true', help='Consume protein sequences - no translation needed') add_moltype_args(subparser) subparser.add_argument( '-n', '--num-hashes', type=int, default=500, help='number of hashes to use in each sketch (default: %(default)i)') subparser.add_argument('--name', type=str, default='stdin', help='name to use for generated signature') add_ksize_arg(subparser, 31)
def subparser(subparsers): subparser = subparsers.add_parser('filter') subparser.add_argument('signatures', nargs='+') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument( '-o', '--output', metavar='FILE', help='output signature to this file (default stdout)') subparser.add_argument( '--md5', type=str, default=None, help='select signatures whose md5 contains this substring') subparser.add_argument( '--name', type=str, default=None, help='select signatures whose name contains this substring') subparser.add_argument('-m', '--min-abundance', type=int, default=1, help='keep hashes >= this minimum abundance') subparser.add_argument('-M', '--max-abundance', type=int, default=None, help='keep hashes <= this maximum abundance') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('search') subparser.add_argument('query', help='query signature') subparser.add_argument( 'databases', nargs='+', help='signatures/SBTs to search', ) subparser.add_argument('--traverse-directory', action='store_true', help='search all signatures underneath directories') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument( '--threshold', metavar='T', default=0.08, type=float, help='minimum threshold for reporting matches; default=0.08') subparser.add_argument( '--save-matches', metavar='FILE', help='output matching signatures to the specified file') subparser.add_argument( '--best-only', action='store_true', help='report only the best match (with greater speed)') subparser.add_argument('-n', '--num-results', default=3, type=int, metavar='N', help='number of results to report') subparser.add_argument('--containment', action='store_true', help='evaluate containment rather than similarity') subparser.add_argument( '--ignore-abundance', action='store_true', help='do NOT use k-mer abundances if present; note: has no effect if ' '--containment is specified') subparser.add_argument( '--scaled', metavar='FLOAT', type=float, default=0, help='downsample query to this scaled factor (yields greater speed)') subparser.add_argument('-o', '--output', metavar='FILE', help='output CSV containing matches to this file') subparser.add_argument('--md5', default=None, help='select the signature with this md5 as query') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('index', description=__doc__, usage=usage) subparser.add_argument( 'sbt_name', help='name to save index into; .sbt.zip or .sbt.json file') subparser.add_argument('signatures', nargs='+', help='signatures to load into SBT') subparser.add_argument( '--from-file', help='a file containing a list of signatures file to load') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') add_ksize_arg(subparser, 31) subparser.add_argument( '-d', '--n_children', metavar='D', type=int, default=2, help='number of children for internal nodes; default=2') subparser.add_argument( '--traverse-directory', action='store_true', help='load all signatures underneath any directories') subparser.add_argument('--append', action='store_true', default=False, help='add signatures to an existing SBT') subparser.add_argument('-x', '--bf-size', metavar='S', type=float, default=1e5, help='Bloom filter size used for internal nodes') subparser.add_argument( '-f', '--force', action='store_true', help='try loading all files with --traverse-directory') subparser.add_argument( '-s', '--sparseness', metavar='FLOAT', type=float, default=.0, help='What percentage of internal nodes will not be saved; ranges ' 'from 0.0 (save all nodes) to 1.0 (no nodes saved)') subparser.add_argument( '--scaled', metavar='FLOAT', type=float, default=0, help='downsample signatures to the specified scaled factor') add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('overlap') subparser.add_argument('signature1') subparser.add_argument('signature2') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('export') subparser.add_argument('filename') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument( '-o', '--output', metavar='FILE', help='output signature to this file (default stdout)') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('index') subparser.add_argument('csv', help='taxonomy spreadsheet') subparser.add_argument('lca_db_out', help='output database name') subparser.add_argument( 'signatures', nargs='+', help='one or more sourmash signatures' ) subparser.add_argument( '--scaled', metavar='S', default=10000, type=float ) add_ksize_arg(subparser, 31) add_moltype_args(subparser) subparser.add_argument( '-q', '--quiet', action='store_true', help='suppress non-error output' ) subparser.add_argument( '-d', '--debug', action='store_true', help='output debugging output' ) subparser.add_argument( '-C', '--start-column', metavar='C', default=2, type=int, help='column at which taxonomic assignments start; default=2' ) subparser.add_argument( '--tabs', action='store_true', help='input spreadsheet is tab-delimited; default is commas' ) subparser.add_argument( '--no-headers', action='store_true', help='no headers present in taxonomy spreadsheet' ) subparser.add_argument( '--split-identifiers', action='store_true', help='split names in signatures on whitspace and period' ) subparser.add_argument('-f', '--force', action='store_true') subparser.add_argument( '--traverse-directory', action='store_true', help='load all signatures underneath directories' ) subparser.add_argument( '--report', help='output a report on anomalies, if any' ) subparser.add_argument( '--require-taxonomy', action='store_true', help='ignore signatures with no taxonomy entry' )
def subparser(subparsers): subparser = subparsers.add_parser('compare') subparser.add_argument('signatures', nargs='*', help='list of signatures to compare', default=[]) subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') add_ksize_arg(subparser) add_moltype_args(subparser) subparser.add_argument( '-o', '--output', metavar='F', help='file to which output will be written; default is terminal ' '(standard output)') subparser.add_argument('--ignore-abundance', action='store_true', help='do NOT use k-mer abundances even if present') subparser.add_argument('--containment', action='store_true', help='calculate containment instead of similarity') subparser.add_argument( '--traverse-directory', action='store_true', help='compare all signatures underneath directories') subparser.add_argument( '--from-file', help='a file containing a list of signatures file to compare') subparser.add_argument('-f', '--force', action='store_true', help='continue past errors in file loading') subparser.add_argument( '--csv', metavar='F', help='write matrix to specified file in CSV format (with column ' 'headers)') subparser.add_argument( '-p', '--processes', metavar='N', type=int, default=None, help='Number of processes to use to calculate similarity')
def subparser(subparsers): subparser = subparsers.add_parser('merge') subparser.add_argument('signatures', nargs='+') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument( '-o', '--output', metavar='FILE', help='output signature to this file (default stdout)') subparser.add_argument('--flatten', action='store_true', help='remove abundances from all signatures') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('rename') subparser.add_argument('sigfiles', nargs='+') subparser.add_argument('name') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument('-d', '--debug', action='store_true', help='print debugging output') subparser.add_argument('-o', '--output', metavar='FILE', help='output to this file') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('intersect') subparser.add_argument('signatures', nargs='+') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument( '-o', '--output', metavar='FILE', help='output signature to this file (default stdout)') subparser.add_argument( '-A', '--abundances-from', metavar='FILE', help='intersect with & take abundances from this signature') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('downsample') subparser.add_argument('signatures', nargs="+") subparser.add_argument( '--scaled', type=int, default=0, help='scaled value to downsample to' ) subparser.add_argument( '--num', metavar='N', type=int, default=0, help='num value to downsample to' ) subparser.add_argument( '-q', '--quiet', action='store_true', help='suppress non-error output' ) subparser.add_argument( '-o', '--output', metavar='FILE', help='output signature to this file (default stdout)' ) add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('extract') subparser.add_argument('signatures', nargs='+') subparser.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') subparser.add_argument( '-o', '--output', metavar='FILE', help='output signature to this file (default stdout)') subparser.add_argument( '--md5', default=None, help='select signatures whose md5 contains this substring') subparser.add_argument( '--name', default=None, help='select signatures whose name contains this substring') add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def subparser(subparsers): subparser = subparsers.add_parser('multigather') subparser.add_argument( '--query', nargs='*', default=[], action='append', help='query signature' ) subparser.add_argument( '--query-from-file', help='file containing list of signature files to query' ) subparser.add_argument( '--db', nargs='+', action='append', help='signatures/SBTs to search', ) subparser.add_argument( '-q', '--quiet', action='store_true', help='suppress non-error output' ) subparser.add_argument( '-d', '--debug', action='store_true' ) subparser.add_argument( '--traverse-directory', action='store_true', help='search all signatures underneath directories' ) subparser.add_argument( '--threshold-bp', metavar='REAL', type=float, default=5e4, help='threshold (in bp) for reporting results (default=50,000)' ) subparser.add_argument( '--scaled', metavar='FLOAT', type=float, default=0, help='downsample query to the specified scaled factor' ) subparser.add_argument( '--ignore-abundance', action='store_true', help='do NOT use k-mer abundances if present' ) add_ksize_arg(subparser, 31) add_moltype_args(subparser)
def main(): p = argparse.ArgumentParser() p.add_argument('signatures', nargs='+') p.add_argument('-q', '--quiet', action='store_true', help='suppress non-error output') p.add_argument('-o', '--output', metavar='FILE', help='output histogram to this file (in CSV format)') p.add_argument( '--abundances', metavar='FILE', help='output hashes and abundances to this file (in CSV format)') p.add_argument('--md5', default=None, help='select signatures whose md5 contains this substring') p.add_argument('--name', default=None, help='select signatures whose name contains this substring') p.add_argument('--max', type=int, default=None, help='max value for histogram range (default none)') p.add_argument('--min', type=int, default=None, help='min value for histogram range (default none)') p.add_argument('--bins', type=int, default=10, help='number of bins (default 10)') add_ksize_arg(p, 31) add_moltype_args(p) args = p.parse_args() return abundhist(args)
def subparser(subparsers): subparser = subparsers.add_parser('dump') subparser.add_argument('filenames', nargs='+') add_ksize_arg(subparser, 31)