Python Tree.compare примеры использования

Язык программирования: Python

Пространство имен/Пакет: ete2

Класс/Тип: Tree

Метод/Функция: compare

Примеров на hotexamples.com: 6

Python Tree.compare - 6 примеров найдено. Это лучшие примеры Python кода для ete2.Tree.compare, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Tree(30)

add_child(27)

add_feature(10)

add_features(4)

compare(3)

iter_leaf_names(3)

get_monophyletic(2)

add_face(1)

add_sister(1)

alias(1)

__len__(1)

accno(1)

convert_to_ultrametric(1)

copy(1)

count(1)

get_closest_leaf(1)

is_root(1)

common_name(1)

Пример #1

Показать файл

def tree_prop(tree, tree_file_name):
    ''' calculates the proportional representation of a tree in a nwk file.'''
    tsim = 0.0
    ttotal = 0.0
    treeFF = open(tree_file_name, 'r')
    for treeF in treeFF:
        tf = Tree(treeF)
        ttotal += 1.0
        # I use Robinson-Foulds metric to find the same trees.
        if Tree.compare(tree, tf)['norm_rf'] == 0.0:
            tsim += 1.0
    treeFF.close()
    treeProb = (float(tsim / ttotal))
    return treeProb

Пример #2

Показать файл

Файл: treecall.py Проект: anderspitman/treecall

def compare_main(args):
    	
    """compare tree topologies

    Args:
        args.tree (str): input tree(s), in Newick format
        args.ref (str): reference tree, in Newick format
        
    Prints:
        tree
        result['norm_rf']: normalized robinson-foulds distance (from 0 to 1)
        result['ref_edges_in_source']: compatibility score of the target tree with respect to the source tree (how many edges in reference are found in the source)
        result['source_edges_in_ref']: compatibility score of the source tree with respect to the reference tree (how many edges in source are found in the reference)
        dstat: sum of differences between two distance matrices / sum of ref matrix
        rstat: avg ratio between corresponding pairwise distances

    """
    
    print(args, file=sys.stderr)
    ref_tree = Tree(args.ref)
    ref_tree_leafnames = [l.name for l in ref_tree.get_leaves()]
    leaf_idx = {l:i for i,l in enumerate(ref_tree_leafnames)}  #how to get int for leaf name consistent btwn trees
    
    ref_am = tree2adjacency(ref_tree,leaf_idx)   #matrix of "distances" for ref (node counts)
    for f in args.tree:
        tree = Tree(f)
        tree_leafnames = [l.name for l in tree.get_leaves()]
        if set(tree_leafnames) != set(ref_tree_leafnames):
            print('leaf names are not the same', file=sys.stderr)
        am = tree2adjacency(tree,leaf_idx)   #matrix of "distances" for comparison
        if ref_am.shape != am.shape:
            print('%s incompatible with %s' % (f, args.ref), file=sys.stderr)
        else:
            k = ref_am > 0

            diff = np.abs(ref_am - am)
            dstat = diff[k].sum()/k.sum()

            ratio = am[k]/ref_am[k]
            ratio[ratio>1] = 1.0/ratio[ratio>1]
            rstat = np.power(ratio.prod(), 1.0/k.sum())

            result = ref_tree.compare(tree, unrooted=True)  #comparison calculated by ete2

            # <tree>,<norm_rf>,<ref_edge_in_tree>,<tree_edge_in_ref>,<diff_adj>,<ratio_adj>
            print('%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (f, result['norm_rf'], result['ref_edges_in_source'], result['source_edges_in_ref'], dstat, rstat))

Пример #3

Показать файл

Файл: treecall.py Проект: rachelss/treecall

def compare_main(args):
    	
    """compare tree topologies

    Args:
        args.tree (str): input tree(s), in Newick format
        args.ref (str): reference tree, in Newick format
        
    Prints:
        tree
        result['norm_rf']: normalized robinson-foulds distance (from 0 to 1)
        result['ref_edges_in_source']: compatibility score of the target tree with respect to the source tree (how many edges in reference are found in the source)
        result['source_edges_in_ref']: compatibility score of the source tree with respect to the reference tree (how many edges in source are found in the reference)
        dstat: sum of differences between two distance matrices / sum of ref matrix
        rstat: avg ratio between corresponding pairwise distances

    """
    
    print(args, file=sys.stderr)
    ref_tree = Tree(args.ref)
    ref_tree_leafnames = [l.name for l in ref_tree.get_leaves()]
    leaf_idx = {l:i for i,l in enumerate(ref_tree_leafnames)}  #how to get int for leaf name consistent btwn trees
    
    ref_am = tree2adjacency(ref_tree,leaf_idx)   #matrix of "distances" for ref (node counts)
    for f in args.tree:
        tree = Tree(f)
        tree_leafnames = [l.name for l in tree.get_leaves()]
        if set(tree_leafnames) != set(ref_tree_leafnames):
            print('leaf names are not the same', file=sys.stderr)
        am = tree2adjacency(tree,leaf_idx)   #matrix of "distances" for comparison
        if ref_am.shape != am.shape:
            print('%s incompatible with %s' % (f, args.ref), file=sys.stderr)
        else:
            k = ref_am > 0

            diff = np.abs(ref_am - am)
            dstat = diff[k].sum()/k.sum()

            ratio = am[k]/ref_am[k]
            ratio[ratio>1] = 1.0/ratio[ratio>1]
            rstat = np.power(ratio.prod(), 1.0/k.sum())

            result = ref_tree.compare(tree, unrooted=True)  #comparison calculated by ete2

            # <tree>,<norm_rf>,<ref_edge_in_tree>,<tree_edge_in_ref>,<diff_adj>,<ratio_adj>
            print('%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (f, result['norm_rf'], result['ref_edges_in_source'], result['source_edges_in_ref'], dstat, rstat))

Пример #4

Показать файл

Файл: treecall.py Проект: denovogear/treecall

def compare_main(args):
    print(args, file=sys.stderr)
    ref_tree = Tree(args.ref)
    ref_am = tree2adjacency(ref_tree)
    for f in args.tree:
        tree = Tree(f)
        am = tree2adjacency(tree)
        if ref_am.shape != am.shape:
            print('%s incompatible with %s' % (f, args.ref), file=sys.stderr)
        else:
            k = ref_am > 0

            diff = np.abs(ref_am - am)
            dstat = diff[k].sum()/k.sum()

            ratio = am[k]/ref_am[k]
            ratio[ratio>1] = 1.0/ratio[ratio>1]
            rstat = np.power(ratio.prod(), 1.0/k.sum())

            result = ref_tree.compare(tree, unrooted=True)

            # <tree>,<norm_rf>,<ref_edge_in_tree>,<tree_edge_in_ref>,<diff_adj>,<ratio_adj>
            print('%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (f, result['norm_rf'], result['ref_edges_in_source'], result['source_edges_in_ref'], dstat, rstat))

Пример #5

Показать файл

    for grn in range(len(groupNames)):
        GSIind = gsi(t, indNames[grn])
        GSIvalues.append(GSIind)
        GSIlists[grn].append(float(GSIind))

    # write output
    treeNameP = treeName.rstrip()  # remove \n from a string
    GSIvaluesP = '\t'.join(str(e) for e in GSIvalues)
    outputFilet.write("%s\t%s\n" % (treeNameP, GSIvaluesP))

    # count topologies
    if not topologies:
        topologies.append(t)
        treeProb = tree_prop(t, args.tree)
        treePlist.append(treeProb)
    elif not any(Tree.compare(t, tt)['norm_rf'] == 0.0 for tt in topologies):
        topologies.append(t)
        treeProb = tree_prop(t, args.tree)
        treePlist.append(treeProb)

    # track the progress:
    counter += 1
    if counter % 100 == 0:
        print str(counter), "trees processed"

# calculate the GSI Total (formula 5 in Cummings et al. 2008)

# print topologies # for debugging
for top, p in zip(topologies, treePlist):
    for grnt in range(len(groupNames)):
        gsit = gsi(top, indNames[grnt])

Пример #6

Показать файл

Файл: ete_compare.py Проект: gregcaporaso/ete

def run(args):
    from ete2 import Tree
    from ete2.utils import print_table
    
    def iter_differences(set1, set2, unrooted=False):
        for s1 in set1:
            pairs = []
            for r1 in set2:
                if unrooted:
                    d = euc_dist_unrooted(s1, r1)
                else:
                    d = euc_dist(s1, r1)
                if d < 1:
                    pairs.append((d,r1))
            yield s1, pairs

    
    col_sizes = [25, 25] + [8] * 8

    header = ['source', 'ref', 'eff.size', 'nRF',
              'RF', 'maxRF', "%src_branches",
              "%ref_branches", "subtrees", "treekoD" ]

    if args.taboutput:
        print '# ' + '\t'.join(header)
    elif args.show_mismatches or args.show_matches or args.show_edges:
        pass
    else: 
        print_table([header,
                     ["=========================="] * 10],
                    fix_col_width=col_sizes, wrap_style="cut")
    

    for stree_name in args.src_tree_iterator:
        stree = Tree(stree_name)

        # Parses attrs if necessary
        src_tree_attr = args.src_tree_attr
        if args.src_attr_parser:
            for leaf in stree:
                leaf.add_feature('_tempattr', re.search(
                    args.src_attr_parser, getattr(leaf, args.src_tree_attr)).groups()[0])
            src_tree_attr = '_tempattr'
  
        for rtree_name in args.ref_trees:
            rtree = Tree(rtree_name)

            # Parses attrs if necessary
            ref_tree_attr = args.ref_tree_attr
            if args.ref_attr_parser:
                for leaf in rtree:
                    leaf.add_feature('_tempattr', re.search(
                        args.ref_attr_parser, getattr(leaf, args.ref_tree_attr)).groups()[0])
                ref_tree_attr = '_tempattr'

            r = stree.compare(rtree, 
                              ref_tree_attr=ref_tree_attr,
                              source_tree_attr=src_tree_attr,
                              min_support_ref=args.min_support_ref,
                              min_support_source = args.min_support_src,
                              unrooted=args.unrooted,
                              has_duplications=False)


                
            if args.show_mismatches or args.show_matches or args.show_edges:
                if args.show_mismatches:
                    src = r['source_edges'] - r['ref_edges']
                    ref = r['ref_edges'] - r['source_edges']
                elif args.show_matches:
                    src = r['source_edges'] & r['ref_edges']
                    ref = r['ref_edges'] & r['source_edges']
                elif args.show_edges:
                    src = r['source_edges']
                    ref = r['ref_edges']

                if args.unrooted:
                    for tag, part in [("src: %s"%stree_name, src), ("ref: %s"%rtree_name, ref)]:
                        print "%s\t%s" %(tag, '\t'.join(
                            map(lambda x: '%s|%s' %(','.join(x[0]), ','.join(x[1])), part)))
                else:
                    for tag, part in [("src: %s"%stree_name, src), ("ref: %s"%rtree_name, ref)]:
                        print "%s\t%s" %(tag, '\t'.join([','.join(p) for p in part]))
            else:
                data = [shorten_str(stree_name,25),
                        shorten_str(rtree_name,25),
                        r['effective_tree_size'],
                        r['norm_rf'], 
                        r['rf'], r['max_rf'],
                        r["source_edges_in_ref"],
                        r["ref_edges_in_source"],
                        r['source_subtrees'],
                        r['treeko_dist']]
                if args.taboutput:                    
                    print '\t'.join(map(str, data))
                else:    
                    print_table([map(as_str, data)],
                                fix_col_width = col_sizes, wrap_style='cut')