def test_collapse(): lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) collapsed = Collapse({"left":("A","B","C"), "right":("D","E","F")}).consume(trees) # These groups are monophyletic in the first 5 of the 6 basic trees, so... for n, t in enumerate(collapsed): assert len(t.get_leaves()) == (2 if n < 5 else 6)
def test_length(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) lengths = Length().consume(trees) for l in lengths: assert type(l) == float assert l >= 0.0
def test_file_collapse(): lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) collapsed = Collapse(filename="tests/argfiles/collapse.txt").consume(trees) # These groups are monophyletic in the first 5 of the 6 basic trees, so... for n, t in enumerate(collapsed): assert len(t.get_leaves()) == (2 if n < 5 else 6)
def test_height(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) heights = Height().consume(trees) for h in heights: assert type(h) == float assert h >= 0.0
def test_uniq(): lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) uniq = Uniq().consume(trees) # The 6 basic trees comprise 5 unique topologies. # This is a pretty weak test, but... assert sum((1 for t in uniq)) == 5
def test_file_prune(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) pruned = Prune(filename="tests/argfiles/taxa_abc.txt").consume(trees) for t in pruned: leaves = t.get_leaf_names() assert not any((x in leaves for x in ("A", "B", "C")))
def test_file_subtree(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) subtrees = Subtree(filename="tests/argfiles/taxa_abc.txt").consume(trees) expected_taxa = (3, 3, 3, 3, 3, 6) for t, n in zip(subtrees, expected_taxa): assert len(t.get_leaves()) == n
def test_prune(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) pruned = Prune(["A"]).consume(trees) for t in pruned: leaves = t.get_leaf_names() assert "A" not in leaves assert all((x in leaves for x in ("B", "C", "D", "E", "F")))
def test_inverse_prune(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) pruned = Prune(["A", "B"], inverse=True).consume(trees) for t in pruned: leaves = t.get_leaf_names() assert all((x in leaves for x in ("A", "B"))) assert not any((x in leaves for x in ("C", "D", "E", "F")))
def test_subtree(): subtree = Subtree.init_from_args("A,B,C") lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) subtrees = subtree.consume(trees) expected_taxa = (3, 3, 3, 3, 3, 6) for t, n in zip(subtrees, expected_taxa): assert len(t.get_leaves()) == n
def test_identity(): """Make sure scaling with a factor of 1.0 changes nothing.""" lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) unscaled_trees = Scale(1.0).consume(trees) for t1, t2 in zip(trees, unscaled_trees): assert t1.write() == t2.write()
def test_annotation_prune(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) annotated = Annotate(filename="tests/argfiles/annotation.csv", key="taxon").consume(trees) pruned = Prune(attribute="f1", value="0").consume(annotated) for t in pruned: leaves = t.get_leaf_names() assert not any((x in leaves for x in ("A", "B", "C")))
def test_clades(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) supported = Support(filename="/dev/null").consume(trees) for t in supported: for n in t.traverse(): assert hasattr(n, "support") assert type(n.support) == float assert 0 <= n.support <= 1
def test_plot(dummy=False): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) with tempfile.NamedTemporaryFile() as fp: plot = Plot(dummy=dummy, output=fp.name, height=600, width=800) for x in plot.consume(trees): pass lines.close()
def test_categorical_annotation(): # This is just to make sure the clade probability calculator doesnt't # erroneously try to calculate means etc. of categorical annotations lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) for t in build_pipeline( "annotate -f tests/argfiles/categorical_annotation.csv -k taxon | clades", trees): pass
def test_annotation_subtree(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) subtrees = build_pipeline( "annotate -f tests/argfiles/annotation.csv -k taxon | subtree --attribute f1 --value 0", trees) expected_taxa = (3, 3, 3, 3, 3, 6) for t, n in zip(subtrees, expected_taxa): assert len(t.get_leaves()) == n
def test_scale(): scale_factor = 0.42 lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) old_heights = [t.get_farthest_leaf()[1] for t in trees] scaled = Scale(scale_factor).consume(trees) new_heights = [t.get_farthest_leaf()[1] for t in scaled] for old, new in zip(old_heights, new_heights): assert new == old * scale_factor
def test_rename_from_file(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) renamed = Rename(filename="tests/argfiles/rename.txt").consume(trees) for t in renamed: leaves = t.get_leaf_names() assert "A" not in leaves assert "X" in leaves assert all((x in leaves for x in ("B", "C", "D", "E", "F")))
def test_stat(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) stat = Stat() for t in stat.consume(trees): pass assert stat.tree_count == 6 assert stat.taxa_count == 6 assert stat.topology_count <= stat.tree_count
def test_annotate(treefile, argfilepath): trees = NewickParser().consume(treefile('basic.trees')) annotated = Annotate(filename=argfilepath("annotation.csv"), key="taxon").consume(trees) for t in annotated: t.write(features=[]) for l in t.get_leaves(): assert hasattr(l, "f1") assert hasattr(l, "f2") assert hasattr(l, "f3")
def test_attribute_collapse(): lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) annotated = Annotate("tests/argfiles/annotation.csv", "taxon").consume(trees) # f1 in the annotations applied above corresponds to the same left/right # split as the other tests above collapsed = Collapse(attribute="f1").consume(annotated) # These groups are monophyletic in the first 5 of the 6 basic trees, so... for n, t in enumerate(collapsed): assert len(t.get_leaves()) == (2 if n < 5 else 6)
def test_plot_annotated(dummy=False): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) annotated_trees = build_pipeline( "annotate --f tests/argfiles/annotation.csv -k taxon", source=trees) with tempfile.NamedTemporaryFile() as fp: plot = Plot(output=fp.name, attribute="f1", dummy=dummy) for x in plot.consume(annotated_trees): pass lines.close()
def test_monophyletic_dedupe(): lines = fileinput.input("tests/treefiles/monophyletic_dupe_taxa.trees") trees = list(NewickParser().consume(lines)) for t in trees: leaves = t.get_leaf_names() assert not all( (leaves.count(x) == 1 for x in ("A", "B", "C", "E", "F"))) deduped = Dedupe().consume(trees) for t in deduped: leaves = t.get_leaf_names() assert all((leaves.count(x) == 1 for x in ("A", "B", "C", "E", "F")))
def test_annotate(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) annotated = Annotate("tests/argfiles/annotation.csv", "taxon").consume(trees) for t in annotated: t.write(features=[]) for l in t.get_leaves(): assert hasattr(l, "f1") assert hasattr(l, "f2") assert hasattr(l, "f3")
def test_rename_with_remove(): lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) renamed = Rename({ "A": "X", "B": "Y", "C": "Z" }, remove=True).consume(trees) for t in renamed: leaves = t.get_leaf_names() assert all((x in leaves for x in ("X", "Y", "Z"))) assert not any((x in leaves for x in ("A", "B", "C", "D", "E", "F")))
def test_dedupe(): lines = fileinput.input("tests/treefiles/duplicate_taxa.trees") trees = list(NewickParser().consume(lines)) for t in trees: orig_leaves = t.get_leaf_names() assert len(orig_leaves) == 6 assert orig_leaves.count("A") == 2 assert all((orig_leaves.count(x) == 1 for x in ("B", "C", "E", "F"))) deduped = Dedupe().consume(trees) for t in deduped: leaves = t.get_leaf_names() assert len(leaves) == 5 assert all((leaves.count(x) == 1 for x in ("A", "B", "C", "E", "F")))
def test_min_med_max_uniq(): lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) min_uniq = Uniq(lengths="min").consume(trees) min_lengths = Length().consume(min_uniq) med_uniq = Uniq(lengths="median").consume(trees) med_lengths = Length().consume(med_uniq) max_uniq = Uniq(lengths="max").consume(trees) max_lengths = Length().consume(max_uniq) for l, m, L in zip(min_lengths, med_lengths, max_lengths): assert l <= m <= L
def test_pipeline(): """Silly long pipeline to stress test build_pipeline.""" lines = fileinput.input("tests/treefiles/basic.trees") trees = NewickParser().consume(lines) output = build_pipeline( "cat -s 2 | rename -f tests/argfiles/rename.txt | prune X,B | dedupe | uniq | support --sort | stat", source=trees) for t in output: leaves = t.get_leaf_names() assert all((leaves.count(x) == 1 for x in leaves)) assert "A" not in leaves assert "X" not in leaves assert "B" not in leaves assert all((x in leaves for x in ("C", "D", "E", "F")))
def test_roundtrip(): """Make sure scaling by x and then 1/x changes nothing.""" lines = fileinput.input("tests/treefiles/basic.trees") trees = list(NewickParser().consume(lines)) heights = Height().consume(trees) scaled_heights = build_pipeline("scale -s 2.0 | scale -s 0.5 | height", trees) for x, y in zip(heights, scaled_heights): assert x == y lengths = Length().consume(trees) scaled_lengths = build_pipeline("scale -s 2.0 | scale -s 0.5 | length", trees) for x, y in zip(lengths, scaled_lengths): assert x == y
def test_extract_annotations(treefile, argfilepath): trees = list(NewickParser().consume(treefile('basic.trees'))) with tempfile.NamedTemporaryFile(mode="r") as fp: list( build_pipeline( "annotate -f {0} -k taxon | annotate --extract -f {1}".format( argfilepath('annotation.csv'), fp.name), trees)) fp.seek(0) reader = csv.DictReader(fp) assert all( (field in reader.fieldnames for field in ("f1", "f2", "f3"))) assert "tree_number" not in reader.fieldnames for row in reader: if row["name"] == "A": assert row["f1"] == "0" assert row["f2"] == "1" assert row["f3"] == "1"