Пример #1
0
    def test_string_complex(self):
        s = """("that's cool"[&!color=#000000]:0.3);"""
        self.assertEqual(newick(sp.p_tree.parse(s)), s)

        s = """(A:3);"""
        self.assertEqual(newick(sp.p_tree.parse(s)), s)

        s = """('that"s !@#$%^&)(*&^[]cool'[&!color=#000000]:0.3);"""
        self.assertEqual(newick(sp.p_tree.parse(s)), s)
Пример #2
0
    def test_sampleParaphyletic(self):
        fork = "(X1|H,(X2|H,(X3|H,(X4|H,((Y1|H,(Y2|H,(Y3|H,(Y4|H,Y5|H)))),X6|S)))));"

        forkFac = alg.factorByField(sp.p_tree.parse(fork).tree, field=2)

        self.assertEqual(
            newick(
                alg.sampleParaphyletic(forkFac,
                                       proportion=0.3,
                                       keep=["S"],
                                       minTips=2,
                                       seed=42)),
            "(X1|H,(X4|H,((Y2|H,Y3|H),X6|S)));",
        )
        self.assertEqual(
            newick(
                alg.sampleParaphyletic(sp.p_tree.parse(fork).tree,
                                       number=2,
                                       seed=46)),
            "(X2|H,Y2|H);",
        )

        nine = "(Y|x,(U|x,(I|x,(((A|y,B|y),C|y),(D|z,(E|z,F|z))))));"

        self.assertEqual(
            newick(
                alg.sampleParaphyletic(sp.p_tree.parse(nine).tree,
                                       number=1,
                                       seed=43)),
            "(A|y);",
        )
        self.assertEqual(
            newick(
                alg.sampleParaphyletic(
                    alg.factorByField(sp.p_tree.parse(nine).tree, field=2),
                    number=1,
                    seed=43,
                )),
            "(I|x,(B|y,F|z));",
        )
        self.assertEqual(
            newick(
                alg.sampleParaphyletic(
                    alg.factorByField(sp.p_tree.parse(nine).tree, field=2),
                    number=2,
                    seed=43,
                )),
            "(U|x,(I|x,((A|y,C|y),(E|z,F|z))));",
        )
Пример #3
0
def para(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    keep_regex,
    default,
    min_tips,
    proportion,
    scale,
    number,
    seed,
    newick,
    zero,
    tree,
):
    """
    Paraphyletic sampling. The sampling algorithm starts at the root and
    descends to the tips. At each node, we store monophyletic subtrees in a
    list and descend into polyphyletic ones (whose leaves have multiple
    factors). If we reach a tip or encounter a monophyletic child of a
    different factor than the stored subtrees, then we stop and sample from all
    tips in the stored trees and initialize a new list with the new
    monophyletic child.
    """

    import smot.algorithm as alg

    if not (proportion or scale or number):
        die("Please add either a --proportion or --scale or --number option")

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleParaphyletic(
        tree.tree,
        keep=keep,
        keep_regex=keep_regex,
        proportion=proportion,
        scale=scale,
        number=number,
        minTips=min_tips,
        seed=seed,
    )

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Пример #4
0
def prop(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    keep_regex,
    default,
    min_tips,
    proportion,
    scale,
    number,
    seed,
    newick,
    zero,
    tree,
):
    """
    Proportional sampling. Randomly sample p (0 to 1, from --proportion) tips
    from each monophyletic (relative to factors) subtree. Retain at least N
    tips in each branch (--min-tips).
    """

    import smot.algorithm as alg

    if not (proportion or scale or number):
        die("Please add either a --proportion or --scale or --number option")

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleProportional(
        tree.tree,
        keep=keep,
        keep_regex=keep_regex,
        proportion=proportion,
        scale=scale,
        number=number,
        minTips=min_tips,
        seed=seed,
    )

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Пример #5
0
def rm_color(newick, tree):
    """
    Remove all color annotations from a tree
    """
    import smot.algorithm as alg

    tree = read_tree(tree)
    tree.colmap = dict()

    def _fun(d):
        if d.form and "!color" in d.form:
            del d.form["!color"]
        return d

    tree.tree = alg.treemap(tree.tree, _fun)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Пример #6
0
def equal(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    default,
    max_tips,
    zero,
    newick,
    tree,
):
    """
    Equal sampling. Descend from root to tip. At each node, determine if each
    subtree contains a single factor. If a subtree is not monophyletic, recurse
    into the subtree. If the subtree is monophyletic, then select up to N tips
    (from the --max-tips argument) from the subtree. The selection of tips is
    deterministic but dependent on the ordering of leaves. To sample a subtree,
    an equal number of tips is sampled from each descendent subtree, and so on
    recursively down to the tips. The resulting downsampled subtree captures
    the depth of the tree, but is not representative of the tree's breadth.
    That is, if N=6 and a tree splits into two subtrees, one with 3 tips and
    one with 300 tips, still 3 tips will be sampled from each branch.
    """

    import smot.algorithm as alg

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleContext(tree.tree, keep=keep, maxTips=max_tips)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Пример #7
0
def tipsed(pattern, replacement, newick, tree):
    """
    Search and replace patterns in tip labels.
    """

    import smot.algorithm as alg
    import re

    pat = re.compile(pattern)

    def fun_(nodeData):
        if nodeData.label:
            nodeData.label = re.sub(pat, replacement, nodeData.label)
        return nodeData

    tree = read_tree(tree)
    tree.tree = alg.treemap(tree.tree, fun_)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Пример #8
0
def grep(pattern, tree, invert_match, perl, newick, file):
    """
    Prune a tree to preserve only the tips with that match a pattern.
    """

    import smot.algorithm as alg
    import re

    if file:
        with open(pattern, "r") as f:
            patterns = [p.strip() for p in f.readlines()]
            matcher = lambda s: any([p in s for p in patterns])
    elif perl:
        regex = re.compile(pattern)
        if invert_match:
            matcher = lambda s: not re.search(regex, s)
        else:
            matcher = lambda s: re.search(regex, s)
    else:
        if invert_match:
            matcher = lambda s: pattern not in s
        else:
            matcher = lambda s: pattern in s

    def fun_(node):
        return [
            kid for kid in node.kids
            if (not kid.data.isLeaf or matcher(kid.data.label))
        ]

    tree = read_tree(tree)
    tree.tree = alg.clean(alg.treecut(tree.tree, fun_))

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Пример #9
0
 def test_sampleN(self):
     self.assertEqual(
         newick(alg.sampleN(sp.p_tree.parse("(B,(A,C,E),D);").tree, 2)),
         "(B,A);")
Пример #10
0
    def test_sampleProportional(self):
        six = "(((A,B),C),(D,(E,F)));"
        # sampling is across root children
        self.assertEqual(
            newick(
                alg.sampleProportional(sp.p_tree.parse(six).tree,
                                       proportion=0.1,
                                       minTips=2,
                                       seed=43)),
            "(A,C);",
        )

        seven = "(O|x,(((A|y,B|y),C|y),(D|z,(E|z,F|z))));"
        self.assertEqual(
            newick(
                alg.sampleProportional(
                    alg.factorByField(sp.p_tree.parse(seven).tree, field=2),
                    proportion=0.1,
                    minTips=2,
                    seed=46,
                )),
            "(O|x,((A|y,B|y),(D|z,F|z)));",
        )
        # --- selection by number works for unfactored trees
        # sometimes a basal strain is selected
        self.assertEqual(
            newick(
                alg.sampleProportional(sp.p_tree.parse(seven).tree,
                                       number=1,
                                       seed=46)),
            "(O|x);",
        )
        # sometimes it isn't (random)
        self.assertEqual(
            newick(
                alg.sampleProportional(sp.p_tree.parse(seven).tree,
                                       number=1,
                                       seed=44)),
            "(C|y);",
        )
        # sometimes both root branches will be sampled
        self.assertEqual(
            newick(
                alg.sampleProportional(sp.p_tree.parse(seven).tree,
                                       number=3,
                                       seed=46)),
            "(O|x,(C|y,F|z));",
        )
        # sometimes they won't
        self.assertEqual(
            newick(
                alg.sampleProportional(sp.p_tree.parse(seven).tree,
                                       number=3,
                                       seed=40)),
            "(C|y,(D|z,E|z));",
        )
        # --- selection by number works for factored trees
        self.assertEqual(
            newick(
                alg.sampleProportional(
                    alg.factorByField(sp.p_tree.parse(seven).tree, field=2),
                    number=1,
                    seed=43,
                )),
            "(O|x,(A|y,E|z));",
        )
        self.assertEqual(
            newick(
                alg.sampleProportional(
                    alg.factorByField(sp.p_tree.parse(seven).tree, field=2),
                    number=2,
                    seed=43,
                )),
            "(O|x,((A|y,B|y),(D|z,F|z)));",
        )
        # --- high numbers cleanly select everything
        self.assertEqual(
            newick(
                alg.sampleProportional(alg.factorByField(
                    sp.p_tree.parse(seven).tree, field=2),
                                       number=100)),
            seven,
        )
        self.assertEqual(
            newick(
                alg.sampleProportional(alg.factorByField(
                    sp.p_tree.parse(seven).tree, field=2),
                                       number=100)),
            seven,
        )
Пример #11
0
 def test_stringify(self):
     s = "(B|a,(A|b,C|b,E|b),D|c);"
     self.assertEqual(newick(sp.p_tree.parse(s)), s)
Пример #12
0
    def test_nexus(self):
        self.assertEqual(
            sp.p_nexus_tree_line.parse(
                "\ttree tree_1 = [&R] (B,(A,C,E),D);\n"),
            sp.p_tree.parse("(B,(A,C,E),D);").tree,
        )

        taxa_block = "\n".join([
            "\tdimensions ntax=3",
            "\ttaxlabels",
            "\t'A'",
            "\t'B'[&!color=#999999]",
            "\t'C'",
            "\t'D'",
            "\t'E'",
            ";",
        ])

        self.assertEqual(sp.p_taxa_block.parse(taxa_block), dict(B="#999999"))
        taxa_section = "\n".join([
            "begin taxa;",
            "\tdimensions ntax=3",
            "\ttaxlabels",
            "\t'A'",
            "\t'B'[&!color=#999999]",
            "\t'C'",
            "\t'D'",
            "\t'E'",
            ";",
            "end;"
            "",
        ])
        self.assertEqual(sp.p_nexus_section.parse(taxa_section),
                         ("taxa", dict(B="#999999")))
        tree_section = "\n".join([
            "begin trees;",
            "\ttree tree_1 = [&R] (B,(A,C,E),D);",
            "end;",
        ])
        self.assertEqual(newick(sp.p_nexus_section.parse(tree_section)[1]),
                         "(B,(A,C,E),D);")

        nexus_file = "\n".join([
            "#NEXUS",
            "begin taxa;",
            "\tdimensions ntax=3",
            "\ttaxlabels",
            "\t'A'",
            "\t'B'",
            "\t'C'",
            "\t'D'",
            "\t'E'",
            ";",
            "end;",
            "",
            "begin trees;",
            "\ttree tree_1 = [&R] (B,(A,C,E),D);",
            "end;",
            "",
        ])
        self.assertEqual(
            sp.p_nexus.parse(nexus_file).tree,
            sp.p_tree.parse("(B,(A,C,E),D);").tree)
        self.assertEqual(
            sp.p_tree.parse(nexus_file).tree,
            sp.p_tree.parse("(B,(A,C,E),D);").tree)

        big_nexus_file = "\n".join([
            """#NEXUS""",
            """begin taxa;""",
            """	dimensions ntax=6;""",
            """	taxlabels""",
            """	'X1|H'[&!color=#ff0000]""",
            """	'X2|H'""",
            """	'X3|H'""",
            """	'X4|H'""",
            """	'X5|H'""",
            """	'X6|S'""",
            """;""",
            """end;""",
            """""",
            """begin trees;""",
            """	tree tree_1 = [&R] ('X1|H':0.3,('X2|H':0.3,('X3|H':0.3,('X4|H':0.3,('X5|H':0.3,'X6|S':0.3):0.3):0.3):0.3):0.3);""",
            """end;""",
            """""",
            """begin figtree;""",
            """	set appearance.backgroundColorAttribute="Default";""",
            """	set appearance.backgroundColour=#ffffff;""",
            """	set appearance.branchColorAttribute="User selection";""",
            """	set appearance.branchColorGradient=false;""",
            """	set appearance.branchLineWidth=1.0;""",
            """	set appearance.branchMinLineWidth=0.0;""",
            """	set appearance.branchWidthAttribute="Fixed";""",
            """	set appearance.foregroundColour=#000000;""",
            """	set appearance.hilightingGradient=false;""",
            """	set appearance.selectionColour=#2d3680;""",
            """	set branchLabels.colorAttribute="User selection";""",
            """	set branchLabels.displayAttribute="Branch times";""",
            """	set branchLabels.fontName="Al Bayan";""",
            """	set branchLabels.fontSize=8;""",
            """	set branchLabels.fontStyle=0;""",
            """	set branchLabels.isShown=false;""",
            """	set branchLabels.significantDigits=4;""",
            """	set layout.expansion=0;""",
            """	set layout.layoutType="RECTILINEAR";""",
            """	set layout.zoom=0;""",
            """	set legend.attribute=null;""",
            """	set legend.fontSize=10.0;""",
            """	set legend.isShown=false;""",
            """	set legend.significantDigits=4;""",
            """	set nodeBars.barWidth=4.0;""",
            """	set nodeBars.displayAttribute=null;""",
            """	set nodeBars.isShown=false;""",
            """	set nodeLabels.colorAttribute="User selection";""",
            """	set nodeLabels.displayAttribute="Node ages";""",
            """	set nodeLabels.fontName="Al Bayan";""",
            """	set nodeLabels.fontSize=8;""",
            """	set nodeLabels.fontStyle=0;""",
            """	set nodeLabels.isShown=false;""",
            """	set nodeLabels.significantDigits=4;""",
            """	set nodeShape.colourAttribute=null;""",
            """	set nodeShape.isShown=false;""",
            """	set nodeShape.minSize=10.0;""",
            """	set nodeShape.scaleType=Width;""",
            """	set nodeShape.shapeType=Circle;""",
            """	set nodeShape.size=4.0;""",
            """	set nodeShape.sizeAttribute=null;""",
            """	set polarLayout.alignTipLabels=false;""",
            """	set polarLayout.angularRange=0;""",
            """	set polarLayout.rootAngle=0;""",
            """	set polarLayout.rootLength=100;""",
            """	set polarLayout.showRoot=true;""",
            """	set radialLayout.spread=0.0;""",
            """	set rectilinearLayout.alignTipLabels=false;""",
            """	set rectilinearLayout.curvature=0;""",
            """	set rectilinearLayout.rootLength=100;""",
            """	set scale.offsetAge=0.0;""",
            """	set scale.rootAge=1.0;""",
            """	set scale.scaleFactor=1.0;""",
            """	set scale.scaleRoot=false;""",
            """	set scaleAxis.automaticScale=true;""",
            """	set scaleAxis.fontSize=8.0;""",
            """	set scaleAxis.isShown=false;""",
            """	set scaleAxis.lineWidth=1.0;""",
            """	set scaleAxis.majorTicks=1.0;""",
            """	set scaleAxis.origin=0.0;""",
            """	set scaleAxis.reverseAxis=false;""",
            """	set scaleAxis.showGrid=true;""",
            """	set scaleBar.automaticScale=true;""",
            """	set scaleBar.fontSize=10.0;""",
            """	set scaleBar.isShown=true;""",
            """	set scaleBar.lineWidth=1.0;""",
            """	set scaleBar.scaleRange=0.0;""",
            """	set tipLabels.colorAttribute="User selection";""",
            """	set tipLabels.displayAttribute="Names";""",
            """	set tipLabels.fontName="Al Bayan";""",
            """	set tipLabels.fontSize=8;""",
            """	set tipLabels.fontStyle=0;""",
            """	set tipLabels.isShown=true;""",
            """	set tipLabels.significantDigits=4;""",
            """	set trees.order=false;""",
            """	set trees.orderType="increasing";""",
            """	set trees.rooting=false;""",
            """	set trees.rootingType="User Selection";""",
            """	set trees.transform=false;""",
            """	set trees.transformType="cladogram";""",
            """end;""",
        ])
        self.assertEqual(
            sp.p_tree.parse(big_nexus_file).tree,
            sp.p_tree.parse(
                "('X1|H':0.3,('X2|H':0.3,('X3|H':0.3,('X4|H':0.3,('X5|H':0.3,'X6|S':0.3):0.3):0.3):0.3):0.3);"
            ).tree,
        )
Пример #13
0
def filter_cmd(
    # conditions
    all_match,
    some_match,
    none_match,
    larger_than,
    smaller_than,
    # actions
    remove,
    color,
    sample,
    replace,
    # factor methods
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    default,
    # phylogenetic options
    patristic,
    seed,
    # boilerplate
    newick,
    tree,
):
    """
    An advanced tool for performaing actions (remove, color, sample, or
    replace) on monophyletic groups that meet specified conditions (all-match,
    some-match, etc.
    """
    import smot.algorithm as alg
    import re

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
        patristic=patristic,
    )

    def condition(node):
        tips = alg.tips(node)
        return ((not larger_than or len(tips) > larger_than)
                and (not smaller_than or len(tips) < smaller_than)
                and (not all_match or all([
                    all([re.search(pat, tip) for tip in tips])
                    for pat in all_match
                ])) and (not some_match or all([
                    any([re.search(pat, tip) for tip in tips])
                    for pat in some_match
                ])) and (not none_match or all([
                    all([not re.search(pat, tip) for tip in tips])
                    for pat in none_match
                ])))

    if remove:
        action = lambda x: None
    elif color:
        action = lambda x: alg.colorTree(x, color)
    elif sample:
        action = lambda x: alg.sampleProportional(x,
                                                  proportion=sample,
                                                  scale=None,
                                                  minTips=3,
                                                  keep_regex="",
                                                  seed=seed)
    elif replace:

        def _fun(d):
            d.label = re.sub(replace[0], replace[1], d.label)
            return d

        action = lambda x: alg.treemap(x, _fun)

    tree.tree = alg.filterMono(tree.tree, condition=condition, action=action)
    tree.tree = alg.clean(tree.tree)

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Пример #14
0
def factor(
    method,
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    default,
    impute,
    patristic,
    newick,
    tree,
):
    """
    Impute, annotate with, and/or tabulate factors. The --impute option will
    fill in missing factors in monophyletic branches. This is useful, for
    example, for inferring clades given a few references in a tree. There are
    three modes: 'table' prints a TAB-delimited table of tip names and factors,
    'prepend' adds the factor to the beginning of the tiplabel (delimited with
    '|'), 'append' adds it to the end.
    """

    import smot.algorithm as alg

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
        impute=impute,
        patristic=patristic,
    )

    # create TAB-delimited, table with columns for the tip labels and the
    # (possibly imputed) factor
    if method.lower() == "table":

        def _fun(b, x):
            if x.isLeaf:
                if x.factor is None:
                    factor = default
                else:
                    factor = x.factor
                b.append(f"{x.label}\t{factor}")
            return b

        for row in alg.treefold(tree.tree, _fun, []):
            print(row)

    # prepend or append the factor to the tip labels and print the resulting tree
    else:

        def _fun(x):
            if x.isLeaf:
                if x.factor is None:
                    x.factor = default
                if method.lower() == "prepend":
                    x.label = f"{x.factor}|{x.label}"
                else:
                    x.label = f"{x.label}|{x.factor}"
            return x

        tree.tree = alg.treemap(tree.tree, _fun)

        if newick:
            print(sf.newick(tree))
        else:
            print(sf.nexus(tree))