示例#1
0
def list_ranks_db(rank_file=None, debug=False):
    '''list all taxonomic ranks present in the NCBI taxonomy database

    Parameters
    ----------
    rank_file : str, default None
        Specify the location of the rank definition and order file; by default, taxonkit uses
        `~/taxonkit/ranks.txt`
    debug : bool, default False
        Print debugging output, e.g., system calls to `taxonkit`

    Returns
    -------
    list
        A list of taxonomic ranks.

    >>> import pytaxonkit
    >>> ranks = pytaxonkit.list_ranks_db()
    >>> ranks[:5]
    ['superkingdom', 'kingdom', 'subkingdom', 'superphylum', 'phylum']
    '''
    arglist = ['taxonkit', 'filter', '--list-ranks']
    if rank_file:  # pragma: no cover
        arglist.extend(['--rank-file', rank_file])
    if debug:
        log(*arglist)
    proc = Popen(arglist, stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
    out, err = proc.communicate(input='')
    data = pandas.read_csv(StringIO(out), header=None, names=['Rank'], index_col=False)
    return pylist(data.Rank)
示例#2
0
def test_list_take(xs: PyList[int], x: int):
    ys: FrozenList[int]
    try:
        ys = frozenlist.of_seq(xs).take(x)
        assert pylist(ys) == xs[:x]
    except ValueError:
        assert x > len(xs)
示例#3
0
def test_list_slice(xs: PyList[int], x: int, y: int):
    expected = xs[x:y]

    ys: FrozenList[int]
    ys = frozenlist.of_seq(xs)[x:y]

    assert pylist(ys) == expected
示例#4
0
def filter(ids, threads=None, equal_to=None, higher_than=None, lower_than=None,
           discard_norank=False, save_predictable=False, discard_root=False, root_taxid=None,
           blacklist=None, rank_file=None, debug=False):
    '''filter taxids by taxonomic rank (or a range of ranks)

    Executes the `taxonkit filter` command to include or exclude taxa at the specified ranks.

    Parameters
    ----------
    ids : list or iterable
        A list of taxids (ints or strings are ok)
    threads : int
        Override the default taxonkit threads setting
    equal_to : str or list, default None
        Keep only taxa at the specified rank(s); can be a string or a list of strings
    higher_than : str, default None
        Keep only taxa ranked higher than the specified rank
    lower_than : str, default None
        Keep only taxa ranked lower than the specified rank
    discard_norank : bool, default False
        Discard generic ranks without an explicit ranking order ("no rank" and "clade")
    save_predictable : bool, default False
        When `discard_norank=True`, do not discard some special ranks without order where the rank
        of the closest higher node is still lower than rank cutoff
    discard_root : bool, default False
        Discard root taxon
    root_taxid : int or str
        override taxid of the root taxon
    blacklist : list of strs
        A list of ranks to exclude
    rank_file : str, default None
        Specify the location of the rank definition and order file; by default, taxonkit uses
        `~/taxonkit/ranks.txt`
    debug : bool, default False
        Print debugging output, e.g., system calls to `taxonkit`

    Returns
    -------
    list
        A list of taxids passing the specified filters.

    >>> import pytaxonkit
    >>> taxids = [131567, 2, 1783257, 74201, 203494, 48461, 1647988, 239934, 239935, 349741]
    >>> pytaxonkit.filter(taxids, blacklist=['family', 'species'])
    [131567, 2, 1783257, 74201, 203494, 48461, 239934, 349741]
    >>> pytaxonkit.filter(taxids, lower_than='genus')
    [131567, 1783257, 239935, 349741]
    '''
    if higher_than is not None and lower_than is not None:
        raise ValueError('cannot specify "higher_than" and "lower_than" simultaneously')
    idlist = '\n'.join(map(str, ids))
    arglist = ['taxonkit', 'filter']
    if threads:
        arglist.extend(('--threads', validate_threads(threads)))
    if equal_to:
        if isinstance(equal_to, (pylist, tuple)):
            equal_to = ','.join(equal_to)
        arglist.extend(['--equal-to', equal_to])
    if higher_than:
        arglist.extend(['--higher-than', higher_than])
    if lower_than:
        arglist.extend(['--lower-than', lower_than])
    if discard_norank:
        arglist.append('--discard-noranks')
    if save_predictable:
        arglist.append('--save-predictable-norank')
    if discard_root:  # pragma: no cover
        arglist.append('--discard-root')
    if blacklist:
        arglist.extend(['--black-list', ','.join(blacklist)])
    if root_taxid:  # pragma: no cover
        arglist.extend(['--root-taxid', str(root_taxid)])
    if rank_file:  # pragma: no cover
        arglist.extend(['--rank-file', rank_file])
    if debug:
        log(*arglist)
    proc = Popen(arglist, stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
    out, err = proc.communicate(input=idlist)
    data = pandas.read_csv(StringIO(out), header=None, names=['TaxID'], index_col=False)
    return pylist(data.TaxID)
示例#5
0
def test_list_skip_last(xs: PyList[int], x: int):
    expected = xs[:-x]
    ys: FrozenList[int]
    ys = frozenlist.of_seq(xs).skip_last(x)
    assert pylist(ys) == expected
示例#6
0
def test_list_take_last(xs: PyList[int], x: int):
    expected = xs[-x:]
    ys: FrozenList[int]
    ys = frozenlist.of_seq(xs).take_last(x)
    assert pylist(ys) == expected