def show_difftable_topo(difftable, attr1, attr2, usecolor=False): if not difftable: return showtable = [] maxcolwidth = 80 total_dist = 0 for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True): total_dist += dist n1 = Tree(n1.write(features=[attr1])) n2 = Tree(n2.write(features=[attr2])) n1.ladderize() n2.ladderize() for leaf in n1.iter_leaves(): leaf.name = getattr(leaf, attr1) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") for leaf in n2.iter_leaves(): leaf.name = getattr(leaf, attr2) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") topo1 = n1.get_ascii(show_internal=False, compact=False) topo2 = n2.get_ascii(show_internal=False, compact=False) # This truncates too large topology strings pretending to be # scrolled to the right margin topo1_lines = topo1.split("\n") topowidth1 = max([len(l) for l in topo1_lines]) if topowidth1 > maxcolwidth: start = topowidth1 - maxcolwidth topo1 = '\n'.join([line[start + 1:] for line in topo1_lines]) topo2_lines = topo2.split("\n") topowidth2 = max([len(l) for l in topo2_lines]) if topowidth2 > maxcolwidth: start = topowidth2 - maxcolwidth topo2 = '\n'.join([line[start + 1:] for line in topo2_lines]) showtable.append([ "%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2 ]) print_table(showtable, header=["Dist", "#diffs", "Tree1", "Tree2"], max_col_width=maxcolwidth, wrap_style="wrap", row_line=True) log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
def show_difftable_topo(difftable, attr1, attr2, usecolor=False): if not difftable: return showtable = [] maxcolwidth = 80 total_dist = 0 for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True): total_dist += dist n1 = Tree(n1.write(features=[attr1])) n2 = Tree(n2.write(features=[attr2])) n1.ladderize() n2.ladderize() for leaf in n1.iter_leaves(): leaf.name = getattr(leaf, attr1) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") for leaf in n2.iter_leaves(): leaf.name = getattr(leaf, attr2) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") topo1 = n1.get_ascii(show_internal=False, compact=False) topo2 = n2.get_ascii(show_internal=False, compact=False) # This truncates too large topology strings pretending to be # scrolled to the right margin topo1_lines = topo1.split("\n") topowidth1 = max([len(l) for l in topo1_lines]) if topowidth1 > maxcolwidth: start = topowidth1 - maxcolwidth topo1 = "\n".join([line[start + 1 :] for line in topo1_lines]) topo2_lines = topo2.split("\n") topowidth2 = max([len(l) for l in topo2_lines]) if topowidth2 > maxcolwidth: start = topowidth2 - maxcolwidth topo2 = "\n".join([line[start + 1 :] for line in topo2_lines]) showtable.append( ["%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2] ) print_table( showtable, header=["Dist", "#diffs", "Tree1", "Tree2"], max_col_width=maxcolwidth, wrap_style="wrap", row_line=True, ) log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("target_trees", metavar='target_trees', type=str, nargs="*", help='a list of target tree files') parser.add_argument( "--targets_file", dest="targets_file", type=str, help="""path to a file containing target trees, one per line""") parser.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") parser.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") parser.add_argument( "--outgroup", dest="outgroup", nargs="+", help= """outgroup used to root reference and target trees before distance computation""" ) parser.add_argument("--expand_polytomies", dest="polytomies", action="store_true", help="""expand politomies if necessary""") parser.add_argument("--unrooted", dest="unrooted", action="store_true", help="""compare trees as unrooted""") parser.add_argument( "--min_support", dest="min_support", type=float, default=0.0, help= ("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)" )) parser.add_argument( "--extract_species", dest="extract_species", action="store_true", help= """When used, reference tree is assumed to contain species names, while target trees as expected to be gene trees. Species name will be extracted from gene tree nodes and treeko will be used if duplication events are found.""" ) parser.add_argument("--spname_delimiter", dest="spname_delimiter", type=str, default="_", help=("species code delimiter in node names")) parser.add_argument( "--spname_field", dest="spname_field", type=int, default=-1, help= ("position of the species code extracted from node names. -1 = last field" )) parser.add_argument("--collateral", dest="collateral", action='store_true', help=("")) parser.add_argument("--ref_attr", dest="ref_attr", type=str, help=("attribute in ref tree used as leaf name")) parser.add_argument("--target_attr", dest="target_attr", type=str, help=("attribute in target tree used as leaf name")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.targets_file and args.target_trees: print >> sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.targets_file: target_trees = tree_iterator(args.targets_file) else: target_trees = args.target_trees t = Tree(reftree) if args.ref_attr: for lf in t.iter_leaves(): lf._origname = lf.name if args.ref_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_attr) if args.outgroup: if len(args.outgroup) > 1: out = t.get_common_ancestor(args.outgroup) else: out = t.search_nodes(name=args.outgroup[0])[0] t.set_outgroup(out) ref_names = set(t.get_leaf_names()) reftree_len = len(t) reftree_edges = (reftree_len * 2) - 2 ncollapsed_branches = len([ n for n in t.traverse() if n.children and n.support < args.min_support ]) #reftree_edges -= ncollapsed_branches #if ncollapsed_branches: # print '%d branches collapsed in reference tree' %ncollapsed_branches HEADER = ("target tree", 'dups', 'subtrees', 'used trees', 'treeko', "RF", "maxRF", 'normRF', "%reftree", "%genetree", "avgSize", "minSize", "common tips", "refSize", "targetSize") if args.output: OUT = open(args.output, "w") print >> OUT, '# ' + ctime() print >> OUT, '# ' + ' '.join(sys.argv) print >> OUT, '#' + '\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None for counter, tfile in enumerate(target_trees): if args.targets_file: seedid, tfile = tfile else: seedid = None if args.extract_species: tt = PhyloTree(tfile, sp_naming_function=lambda name: name.split( args.spname_delimiter)[args.spname_field]) else: tt = Tree(tfile) if args.target_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.target_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.target_trees: fname = os.path.basename(tfile) else: fname = '%05d' % counter max_size, min_size, avg_size, common = -1, -1, -1, -1 total_rf, max_rf, norm_rf = -1, -1, -1 treeko_d = -1 ref_branches_in_target, target_branches_in_ref = -1, -1 target_tree_len = -1 used_subtrees = -1 if args.extract_species: orig_target_size = len(tt) ntrees, ndups, sp_trees = tt.get_speciation_trees( autodetect_duplications=True, newick_only=True) if ntrees < 1000: all_rf = [] ref_found = [] target_found = [] tree_sizes = [] all_max_rf = [] common_names = 0 for subtree_nw in sp_trees: if seedid and not args.collateral and (seedid not in subtree_nw): continue subtree = PhyloTree( subtree_nw, sp_naming_function=lambda name: name.split( args.spname_delimiter)[args.spname_field]) # only necessary if rf function is going to filter by support value. It slows downs the analysis, obviously if args.min_support: subtree_content = subtree.get_cached_content( store_attr='name') for n in subtree.traverse(): if n.children: n.support = tt.get_common_ancestor( subtree_content[n]).support rf, maxr, common, p1, p2, d1, d2 = t.robinson_foulds( subtree, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted, attr_t2='species', min_support_t2=args.min_support) if maxr > 0 and p1 and p2: all_rf.append(rf) tree_sizes.append(len(common)) all_max_rf.append(maxr) common_names = max(common_names, len(common)) ref_found.append(float(len(p2 & p1)) / reftree_edges) p2bis = set([ p for p in (p2 - d2) if len(p[0]) > 1 and len(p[1]) > 1 ]) # valid edges in target not leaves if p2bis: incompatible_target_branches = float( len((p2 - d2) - p1)) target_found.append(1 - (incompatible_target_branches / (len(p2 - d2)))) # valid_target = p2-d2 # valid_ref = p1-d1 # ref_found.append(float(len(valid_target & valid_ref)) / reftree_edges) # p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1]) # if p2bis-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) if all_rf: # Treeko speciation distance alld = [(all_rf[i] / float(all_max_rf[i])) for i in xrange(len(all_rf))] a = numpy.sum( [alld[i] * tree_sizes[i] for i in xrange(len(all_rf))]) b = float(numpy.sum(tree_sizes)) treeko_d = a / b total_rf = numpy.mean(all_rf) norm_rf = numpy.mean([(all_rf[i] / float(all_max_rf[i])) for i in xrange(len(all_rf))]) max_rf = numpy.max(all_max_rf) ref_branches_in_target = numpy.mean(ref_found) target_branches_in_ref = numpy.mean( target_found) if target_found else -1 target_tree_len = numpy.mean(tree_sizes) used_subtrees = len(all_rf) else: target_tree_len = len(tt) ndups, ntrees, used_subtrees = 0, 1, 1 treeko_d = -1 total_rf, max_rf, common, p1, p2, d1, d2 = tt.robinson_foulds( t, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted) common_names = len(common) if max_rf: norm_rf = total_rf / float(max_rf) if p1 and p2: sizes = [len(p) for p in p2 ^ p1] if sizes: avg_size = sum(sizes) / float(len(sizes)) max_size, min_size = max(sizes), min(sizes) else: max_size, min_size, avg_size = 0, 0, 0 ref_branches_in_target = float(len(p2 & p1)) / reftree_edges #if p2-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) else: ref_branches_in_target = 0.0 target_branches_in_ref = 0.0 max_size, min_size, avg_size = -1, -1, -1 if args.output: print >> OUT, '\t'.join( map(str, (fname, ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, ref_branches_in_target, target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len))) else: print_table([ map(istr, (fname[-30:], ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, '%0.4f' % ref_branches_in_target, '%0.4f' % target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len)) ], fix_col_width=COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
def main(argv): global args #test() parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("target_trees", type=str, nargs="+", help='a list of target tree files') parser.add_argument("-r", dest='reftree', type=str, help='The reference tree to compare with') parser.add_argument( "--ref_attr", dest="ref_attr", default="name", help=("Defines the attribute in REFERENCE tree that will be used" " to perform the comparison")) parser.add_argument( "--target_attr", dest="target_attr", default="name", help=("Defines the attribute in TARGET tree that will be used" " to perform the comparison")) parser.add_argument( "--fullsearch", dest="fullsearch", action="store_false", help=("Enable this option if duplicated attributes (i.e. name)" "exist in reference or target trees.")) parser.add_argument("--quite", dest="quite", action="store_true", help="Do not show process information") parser.add_argument("--report", dest="report", choices=["topology", "diffs", "diffs_tab", "summary"], default="topology", help="Different format for the comparison results") parser.add_argument( "--ncbi", dest="ncbi", action="store_true", help= "If enabled, it will use the ETE ncbi_taxonomy module to for ncbi taxid translation" ) parser.add_argument( "--color", dest="color", action="store_true", help="If enabled, it will use colors in some of the report") args = parser.parse_args(argv) if args.quite: logging.basicConfig(format='%(message)s', level=logging.WARNING) else: logging.basicConfig(format='%(message)s', level=logging.INFO) log = logging t1 = Tree(args.reftree) if args.ncbi: from common import ncbi ncbi.connect_database() for ttree in args.target_trees: t2 = Tree(ttree) if args.ncbi: taxids = set( [getattr(leaf, args.ref_attr) for leaf in t1.iter_leaves()]) taxids.update( [getattr(leaf, args.target_attr) for leaf in t2.iter_leaves()]) taxid2name = ncbi.get_taxid_translator(taxids) for leaf in t1.get_leaves() + t2.get_leaves(): try: leaf.name = taxid2name.get(int(leaf.name), leaf.name) except ValueError: pass difftable = treediff(t1, t2, args.ref_attr, args.target_attr, reduce_matrix=args.fullsearch) if args.report == "topology": show_difftable_topo(difftable, args.ref_attr, args.target_attr, usecolor=args.color) elif args.report == "diffs": show_difftable(difftable) elif args.report == "diffs_tab": show_difftable_tab(difftable) elif args.report == 'table': rf, rf_max, _, _, _, _, _ = t1.robinson_foulds( t2, attr_t1=args.ref_attr, attr_t2=args.target_attr)[:2] show_difftable_summary(difftable, rf, rf_max)
def main(argv): global args # test() parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("target_trees", type=str, nargs="+", help="a list of target tree files") parser.add_argument("-r", dest="reftree", type=str, help="The reference tree to compare with") parser.add_argument( "--ref_attr", dest="ref_attr", default="name", help=("Defines the attribute in REFERENCE tree that will be used" " to perform the comparison"), ) parser.add_argument( "--target_attr", dest="target_attr", default="name", help=("Defines the attribute in TARGET tree that will be used" " to perform the comparison"), ) parser.add_argument( "--fullsearch", dest="fullsearch", action="store_false", help=("Enable this option if duplicated attributes (i.e. name)" "exist in reference or target trees."), ) parser.add_argument("--quite", dest="quite", action="store_true", help="Do not show process information") parser.add_argument( "--report", dest="report", choices=["topology", "diffs", "diffs_tab", "summary"], default="topology", help="Different format for the comparison results", ) parser.add_argument( "--ncbi", dest="ncbi", action="store_true", help="If enabled, it will use the ETE ncbi_taxonomy module to for ncbi taxid translation", ) parser.add_argument( "--color", dest="color", action="store_true", help="If enabled, it will use colors in some of the report" ) args = parser.parse_args(argv) if args.quite: logging.basicConfig(format="%(message)s", level=logging.WARNING) else: logging.basicConfig(format="%(message)s", level=logging.INFO) log = logging t1 = Tree(args.reftree) if args.ncbi: from common import ncbi ncbi.connect_database() for ttree in args.target_trees: t2 = Tree(ttree) if args.ncbi: taxids = set([getattr(leaf, args.ref_attr) for leaf in t1.iter_leaves()]) taxids.update([getattr(leaf, args.target_attr) for leaf in t2.iter_leaves()]) taxid2name = ncbi.get_taxid_translator(taxids) for leaf in t1.get_leaves() + t2.get_leaves(): try: leaf.name = taxid2name.get(int(leaf.name), leaf.name) except ValueError: pass difftable = treediff(t1, t2, args.ref_attr, args.target_attr, reduce_matrix=args.fullsearch) if args.report == "topology": show_difftable_topo(difftable, args.ref_attr, args.target_attr, usecolor=args.color) elif args.report == "diffs": show_difftable(difftable) elif args.report == "diffs_tab": show_difftable_tab(difftable) elif args.report == "table": rf, rf_max, _, _, _, _, _ = t1.robinson_foulds(t2, attr_t1=args.ref_attr, attr_t2=args.target_attr)[:2] show_difftable_summary(difftable, rf, rf_max)
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("target_trees", metavar='target_trees', type=str, nargs="*", help='a list of target tree files') parser.add_argument("--targets_file", dest="targets_file", type=str, help="""path to a file containing target trees, one per line""") parser.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") parser.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") parser.add_argument("--outgroup", dest="outgroup", nargs = "+", help="""outgroup used to root reference and target trees before distance computation""") parser.add_argument("--expand_polytomies", dest="polytomies", action = "store_true", help="""expand politomies if necessary""") parser.add_argument("--unrooted", dest="unrooted", action = "store_true", help="""compare trees as unrooted""") parser.add_argument("--min_support", dest="min_support", type=float, default=0.0, help=("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)")) parser.add_argument("--extract_species", dest="extract_species", action = "store_true", help="""When used, reference tree is assumed to contain species names, while target trees as expected to be gene trees. Species name will be extracted from gene tree nodes and treeko will be used if duplication events are found.""") parser.add_argument("--spname_delimiter", dest="spname_delimiter", type=str, default="_", help=("species code delimiter in node names")) parser.add_argument("--spname_field", dest="spname_field", type=int, default=-1, help=("position of the species code extracted from node names. -1 = last field")) parser.add_argument("--collateral", dest="collateral", action='store_true', help=("")) parser.add_argument("--ref_attr", dest="ref_attr", type=str, help=("attribute in ref tree used as leaf name")) parser.add_argument("--target_attr", dest="target_attr", type=str, help=("attribute in target tree used as leaf name")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.targets_file and args.target_trees: print >>sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.targets_file: target_trees = tree_iterator(args.targets_file) else: target_trees = args.target_trees t = Tree(reftree) if args.ref_attr: for lf in t.iter_leaves(): lf._origname = lf.name if args.ref_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_attr) if args.outgroup: if len(args.outgroup) > 1: out = t.get_common_ancestor(args.outgroup) else: out = t.search_nodes(name=args.outgroup[0])[0] t.set_outgroup(out) ref_names = set(t.get_leaf_names()) reftree_len = len(t) reftree_edges = (reftree_len*2)-2 ncollapsed_branches = len([n for n in t.traverse() if n.children and n.support < args.min_support]) #reftree_edges -= ncollapsed_branches #if ncollapsed_branches: # print '%d branches collapsed in reference tree' %ncollapsed_branches HEADER = ("target tree", 'dups', 'subtrees', 'used trees', 'treeko', "RF", "maxRF", 'normRF', "%reftree", "%genetree", "avgSize", "minSize", "common tips", "refSize", "targetSize") if args.output: OUT = open(args.output, "w") print >>OUT, '# ' + ctime() print >>OUT, '# ' + ' '.join(sys.argv) print >>OUT, '#'+'\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None for counter, tfile in enumerate(target_trees): if args.targets_file: seedid, tfile = tfile else: seedid = None if args.extract_species: tt = PhyloTree(tfile, sp_naming_function = lambda name: name.split(args.spname_delimiter)[args.spname_field]) else: tt = Tree(tfile) if args.target_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.target_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.target_trees: fname = os.path.basename(tfile) else: fname = '%05d' %counter max_size, min_size, avg_size, common = -1, -1, -1, -1 total_rf, max_rf, norm_rf = -1, -1, -1 treeko_d = -1 ref_branches_in_target, target_branches_in_ref = -1, -1 target_tree_len = -1 used_subtrees = -1 if args.extract_species: orig_target_size = len(tt) ntrees, ndups, sp_trees = tt.get_speciation_trees(autodetect_duplications=True, newick_only=True) if ntrees < 1000: all_rf = [] ref_found = [] target_found = [] tree_sizes = [] all_max_rf = [] common_names = 0 for subtree_nw in sp_trees: if seedid and not args.collateral and (seedid not in subtree_nw): continue subtree = PhyloTree(subtree_nw, sp_naming_function = lambda name: name.split(args.spname_delimiter)[args.spname_field]) # only necessary if rf function is going to filter by support value. It slows downs the analysis, obviously if args.min_support: subtree_content = subtree.get_cached_content(store_attr='name') for n in subtree.traverse(): if n.children: n.support = tt.get_common_ancestor(subtree_content[n]).support rf, maxr, common, p1, p2, d1, d2 = t.robinson_foulds(subtree, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted, attr_t2='species', min_support_t2=args.min_support) if maxr > 0 and p1 and p2: all_rf.append(rf) tree_sizes.append(len(common)) all_max_rf.append(maxr) common_names = max(common_names, len(common)) ref_found.append(float(len(p2 & p1)) / reftree_edges) p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1]) # valid edges in target not leaves if p2bis: incompatible_target_branches = float(len((p2-d2) - p1)) target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) # valid_target = p2-d2 # valid_ref = p1-d1 # ref_found.append(float(len(valid_target & valid_ref)) / reftree_edges) # p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1]) # if p2bis-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) if all_rf: # Treeko speciation distance alld = [(all_rf[i]/float(all_max_rf[i])) for i in xrange(len(all_rf))] a = numpy.sum([alld[i] * tree_sizes[i] for i in xrange(len(all_rf))]) b = float(numpy.sum(tree_sizes)) treeko_d = a/b total_rf = numpy.mean(all_rf) norm_rf = numpy.mean([(all_rf[i]/float(all_max_rf[i])) for i in xrange(len(all_rf))]) max_rf = numpy.max(all_max_rf) ref_branches_in_target = numpy.mean(ref_found) target_branches_in_ref = numpy.mean(target_found) if target_found else -1 target_tree_len = numpy.mean(tree_sizes) used_subtrees = len(all_rf) else: target_tree_len = len(tt) ndups, ntrees, used_subtrees = 0, 1, 1 treeko_d = -1 total_rf, max_rf, common, p1, p2, d1, d2 = tt.robinson_foulds(t, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted) common_names = len(common) if max_rf: norm_rf = total_rf / float(max_rf) if p1 and p2: sizes = [len(p) for p in p2 ^ p1] if sizes: avg_size = sum(sizes) / float(len(sizes)) max_size, min_size = max(sizes), min(sizes) else: max_size, min_size, avg_size = 0, 0, 0 ref_branches_in_target = float(len(p2 & p1)) / reftree_edges #if p2-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) else: ref_branches_in_target = 0.0 target_branches_in_ref = 0.0 max_size, min_size, avg_size = -1, -1, -1 if args.output: print >>OUT, '\t'.join(map(str, (fname, ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, ref_branches_in_target, target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len))) else: print_table([map(istr, (fname[-30:], ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, '%0.4f' %ref_branches_in_target, '%0.4f' %target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len))], fix_col_width = COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()