示例#1
0
def para(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    keep_regex,
    default,
    min_tips,
    proportion,
    scale,
    number,
    seed,
    newick,
    zero,
    tree,
):
    """
    Paraphyletic sampling. The sampling algorithm starts at the root and
    descends to the tips. At each node, we store monophyletic subtrees in a
    list and descend into polyphyletic ones (whose leaves have multiple
    factors). If we reach a tip or encounter a monophyletic child of a
    different factor than the stored subtrees, then we stop and sample from all
    tips in the stored trees and initialize a new list with the new
    monophyletic child.
    """

    import smot.algorithm as alg

    if not (proportion or scale or number):
        die("Please add either a --proportion or --scale or --number option")

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleParaphyletic(
        tree.tree,
        keep=keep,
        keep_regex=keep_regex,
        proportion=proportion,
        scale=scale,
        number=number,
        minTips=min_tips,
        seed=seed,
    )

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
示例#2
0
def prop(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    keep_regex,
    default,
    min_tips,
    proportion,
    scale,
    number,
    seed,
    newick,
    zero,
    tree,
):
    """
    Proportional sampling. Randomly sample p (0 to 1, from --proportion) tips
    from each monophyletic (relative to factors) subtree. Retain at least N
    tips in each branch (--min-tips).
    """

    import smot.algorithm as alg

    if not (proportion or scale or number):
        die("Please add either a --proportion or --scale or --number option")

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleProportional(
        tree.tree,
        keep=keep,
        keep_regex=keep_regex,
        proportion=proportion,
        scale=scale,
        number=number,
        minTips=min_tips,
        seed=seed,
    )

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
示例#3
0
def colorBranches(is_para, factor_by_capture, factor_by_field, factor_by_table,
                  colormap, tree):
    import smot.algorithm as alg

    tree = read_tree(tree)

    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
    )
    tree.tree = alg.setFactorCounts(tree.tree)

    factors = sorted(list(tree.tree.data.factorCount.keys()))

    _colormap = dict()
    if colormap:
        with open(colormap, "r") as f:
            try:
                _colormap = {
                    f.strip(): c.strip().upper()
                    for (f,
                         c) in [p.strip().split("\t") for p in f.readlines()]
                }
                for clade, color in _colormap.items():
                    if color[0] != "#":
                        _colormap[clade] = "#" + color
                    if len(color) != 7:
                        die('Expected colors in hexadecimal (e.g., "#AA10FF")')
            except ValueError:
                die("Invalid color map: expected TAB-delimited, two-column file"
                    )
    else:
        _colormap = chooseColorScheme(factors)

    if is_para:
        tree.tree = alg.colorPara(tree.tree, colormap=_colormap)
    else:
        tree.tree = alg.colorMono(tree.tree, colormap=_colormap)

    print(sf.nexus(tree))
示例#4
0
def rm_color(newick, tree):
    """
    Remove all color annotations from a tree
    """
    import smot.algorithm as alg

    tree = read_tree(tree)
    tree.colmap = dict()

    def _fun(d):
        if d.form and "!color" in d.form:
            del d.form["!color"]
        return d

    tree.tree = alg.treemap(tree.tree, _fun)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
示例#5
0
def equal(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    default,
    max_tips,
    zero,
    newick,
    tree,
):
    """
    Equal sampling. Descend from root to tip. At each node, determine if each
    subtree contains a single factor. If a subtree is not monophyletic, recurse
    into the subtree. If the subtree is monophyletic, then select up to N tips
    (from the --max-tips argument) from the subtree. The selection of tips is
    deterministic but dependent on the ordering of leaves. To sample a subtree,
    an equal number of tips is sampled from each descendent subtree, and so on
    recursively down to the tips. The resulting downsampled subtree captures
    the depth of the tree, but is not representative of the tree's breadth.
    That is, if N=6 and a tree splits into two subtrees, one with 3 tips and
    one with 300 tips, still 3 tips will be sampled from each branch.
    """

    import smot.algorithm as alg

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleContext(tree.tree, keep=keep, maxTips=max_tips)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
示例#6
0
def tipsed(pattern, replacement, newick, tree):
    """
    Search and replace patterns in tip labels.
    """

    import smot.algorithm as alg
    import re

    pat = re.compile(pattern)

    def fun_(nodeData):
        if nodeData.label:
            nodeData.label = re.sub(pat, replacement, nodeData.label)
        return nodeData

    tree = read_tree(tree)
    tree.tree = alg.treemap(tree.tree, fun_)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
示例#7
0
def grep(pattern, tree, invert_match, perl, newick, file):
    """
    Prune a tree to preserve only the tips with that match a pattern.
    """

    import smot.algorithm as alg
    import re

    if file:
        with open(pattern, "r") as f:
            patterns = [p.strip() for p in f.readlines()]
            matcher = lambda s: any([p in s for p in patterns])
    elif perl:
        regex = re.compile(pattern)
        if invert_match:
            matcher = lambda s: not re.search(regex, s)
        else:
            matcher = lambda s: re.search(regex, s)
    else:
        if invert_match:
            matcher = lambda s: pattern not in s
        else:
            matcher = lambda s: pattern in s

    def fun_(node):
        return [
            kid for kid in node.kids
            if (not kid.data.isLeaf or matcher(kid.data.label))
        ]

    tree = read_tree(tree)
    tree.tree = alg.clean(alg.treecut(tree.tree, fun_))

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
示例#8
0
def leaf(pattern, perl, tree):
    """
    Color the tips on a tree.

    smot color -p "swine" "#FFA500" -p "2020-" "#00FF00" my.tre > color.tre
    """
    import smot.algorithm as alg
    import re

    tree = read_tree(tree)

    tips = alg.tips(tree.tree)

    for (pat_str, col) in pattern:
        if perl:
            pat = re.compile(pat_str)
            matcher = lambda x: re.search(pat, x)
        else:
            matcher = lambda x: pat_str in x
        for tip in tips:
            if matcher(tip):
                tree.colmap[tip] = col

    print(sf.nexus(tree))
示例#9
0
def filter_cmd(
    # conditions
    all_match,
    some_match,
    none_match,
    larger_than,
    smaller_than,
    # actions
    remove,
    color,
    sample,
    replace,
    # factor methods
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    default,
    # phylogenetic options
    patristic,
    seed,
    # boilerplate
    newick,
    tree,
):
    """
    An advanced tool for performaing actions (remove, color, sample, or
    replace) on monophyletic groups that meet specified conditions (all-match,
    some-match, etc.
    """
    import smot.algorithm as alg
    import re

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
        patristic=patristic,
    )

    def condition(node):
        tips = alg.tips(node)
        return ((not larger_than or len(tips) > larger_than)
                and (not smaller_than or len(tips) < smaller_than)
                and (not all_match or all([
                    all([re.search(pat, tip) for tip in tips])
                    for pat in all_match
                ])) and (not some_match or all([
                    any([re.search(pat, tip) for tip in tips])
                    for pat in some_match
                ])) and (not none_match or all([
                    all([not re.search(pat, tip) for tip in tips])
                    for pat in none_match
                ])))

    if remove:
        action = lambda x: None
    elif color:
        action = lambda x: alg.colorTree(x, color)
    elif sample:
        action = lambda x: alg.sampleProportional(x,
                                                  proportion=sample,
                                                  scale=None,
                                                  minTips=3,
                                                  keep_regex="",
                                                  seed=seed)
    elif replace:

        def _fun(d):
            d.label = re.sub(replace[0], replace[1], d.label)
            return d

        action = lambda x: alg.treemap(x, _fun)

    tree.tree = alg.filterMono(tree.tree, condition=condition, action=action)
    tree.tree = alg.clean(tree.tree)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
示例#10
0
def factor(
    method,
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    default,
    impute,
    patristic,
    newick,
    tree,
):
    """
    Impute, annotate with, and/or tabulate factors. The --impute option will
    fill in missing factors in monophyletic branches. This is useful, for
    example, for inferring clades given a few references in a tree. There are
    three modes: 'table' prints a TAB-delimited table of tip names and factors,
    'prepend' adds the factor to the beginning of the tiplabel (delimited with
    '|'), 'append' adds it to the end.
    """

    import smot.algorithm as alg

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
        impute=impute,
        patristic=patristic,
    )

    # create TAB-delimited, table with columns for the tip labels and the
    # (possibly imputed) factor
    if method.lower() == "table":

        def _fun(b, x):
            if x.isLeaf:
                if x.factor is None:
                    factor = default
                else:
                    factor = x.factor
                b.append(f"{x.label}\t{factor}")
            return b

        for row in alg.treefold(tree.tree, _fun, []):
            print(row)

    # prepend or append the factor to the tip labels and print the resulting tree
    else:

        def _fun(x):
            if x.isLeaf:
                if x.factor is None:
                    x.factor = default
                if method.lower() == "prepend":
                    x.label = f"{x.factor}|{x.label}"
                else:
                    x.label = f"{x.label}|{x.factor}"
            return x

        tree.tree = alg.treemap(tree.tree, _fun)

        if newick:
            print(sf.newick(tree))
        else:
            print(sf.nexus(tree))