Пример #1
0
def sort_order(records):
    """returns the sort order by id"""
    tree = DndParser("(((nosp,sp)named,notnamed)inpref,\
                       ((nosp,sp)named,notnamed)outpref);")
    for n in tree.tips():
        n.LengthsAndIds = []
    lookup = {}
    lookup[('named_isolate',True,True)] = \
            tree.Children[0].Children[0].Children[0]
    lookup[('named_isolate',True,False)] = \
            tree.Children[0].Children[0].Children[1]
    lookup[('clone',True,False)] = \
            tree.Children[0].Children[1]
    lookup[('named_isolate',False,True)] = \
            tree.Children[1].Children[0].Children[0]
    lookup[('named_isolate',False,False)] = \
            tree.Children[1].Children[0].Children[1]
    lookup[('clone',False,False)] = \
            tree.Children[1].Children[1]
                       
    for k,v in records.items():
        to_lookup = tuple(v[1:])
        lookup[to_lookup].LengthsAndIds.append((v[0],k))

    order = []
    # tips go left->right
    for n in tree.tips():
        order.extend([i for l,i in sorted(n.LengthsAndIds)[::-1]])

    return order
Пример #2
0
def load_tree(input, tipname_map, verbose=False):
    """Returns a PhyloNode tree decorated with helper attrs
    
    Helper attrs include Consensus, TipStart and TipStop. Nontips and tips that
    do not have consensus information will have [None] * len(RANK_ORDER) set 
    as Consensus
    """
    if verbose:
        print "loading tree..."
    if isinstance(input, TreeNode):
        tree = input
    else:
        tree = DndParser(input)

    tips = tree.tips()
    n_ranks = len(RANK_ORDER)

    for idx, tip in enumerate(tips):
        tip.TipStart = idx
        tip.TipStop = idx
        tip.Consensus = tipname_map.get(tip.Name, [None] * 7)

        if verbose and tip.Consensus is None:
            print "No consensus for %s" % tip.Name

    for node in tree.postorder(include_self=True):
        if node.istip():
            continue
        node.TipStart = node.Children[0].TipStart
        node.TipStop = node.Children[-1].TipStop
        node.Consensus = [None] * n_ranks

        if node.Name is None:
            node.Bootstrap = None
        else:
            try:
                node.Bootstrap = float(node.Name)
                node.Name = None
            except:
                if verbose:
                    print "Could not save bootstrap %s, node is root: %s" % \
                                       (node.Name, str(node.Parent == None))
                node.Bootstrap = None

    for tip in tree.tips():
        if tip.Name:
            tip.Name = tip.Name.replace("'","")
    return tree
Пример #3
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    moltype: cogent.core.moltype.MolType object

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Clustalw(InputHandler="_input_as_multiline_string", params=params, WorkingDir="/tmp")
    app.Parameters["-align"].off()

    # Set params to empty dict if None.
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params["-type"] = "d"
    elif moltype == PROTEIN:
        params["-type"] = "p"
    else:
        raise ValueError, "moltype must be DNA, RNA, or PROTEIN"

    # best_tree -> bootstrap
    if best_tree:
        if "-bootstrap" not in params:
            app.Parameters["-bootstrap"].on(1000)
        if "-seed" not in params:
            app.Parameters["-seed"].on(randint(0, 1000))
        if "-bootlabels" not in params:
            app.Parameters["-bootlabels"].on("nodes")
    else:
        app.Parameters["-tree"].on()

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree
Пример #4
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from alignment
    
    Will check MolType of aln object
    """
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params['-nt'] = True
    elif moltype == PROTEIN:
        params['-nt'] = False
    else:
        raise ValueError, \
                "FastTree does not support moltype: %s" % moltype.label

    if best_tree:
        params['-slow'] = True

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = aln.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)

    app = FastTree(params=params)

    result = app(int_map.toFasta())
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    #remap tip names
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    return tree
Пример #5
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from alignment
    
    Will check MolType of aln object
    """
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params["-nt"] = True
    elif moltype == PROTEIN:
        params["-nt"] = False
    else:
        raise ValueError, "FastTree does not support moltype: %s" % moltype.label

    if best_tree:
        params["-slow"] = True

    # Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = aln.getIntMap()
    # Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)

    app = FastTree(params=params)

    result = app(int_map.toFasta())
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
    # remap tip names
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    return tree
Пример #6
0
 def test_score_tree(self):
     """Determine's the tree's fmeasure score"""
     # set RankNames and RankNameScores
     # if name in RankNames, check score, look at tips, etc
     t_str = "(((a,b),(c,d))e,(f,g),h)i;"
     t = DndParser(t_str)
     t.RankNames = ['i',None,None,None] # 1.0 * 6
     t.RankNameScores = [1.0,None,None,None]
     t.Children[0].RankNames = [None,'e','foo',None] # 0.5 * 3, 0.6 * 3
     t.Children[0].RankNameScores = [None, 0.5, 0.6, None]
     t.Children[0].Children[0].RankNames = [None] * 7
     t.Children[0].Children[1].RankNames = [None] * 7
     t.Children[1].RankNames = [None] * 7
     t.Children[1].RankNameScores = [None] * 7
     tips = t.tips()
     tips[0].Consensus = [None] * 7
     tips[1].Consensus = [1,3,None,None]
     tips[2].Consensus = [2,4,5,None]
     tips[3].Consensus = [None,1,None,None]
     tips[4].Consensus = [None,1,None,None]
     tips[5].Consensus = [2,None,3,None]
     tips[6].Consensus = [None,4,None,None]
     decorate_ntips(t)
     exp = ((1.0 * 6) + (0.5 * 3) + (0.6 * 3)) / (6 + 3 + 3)
     obs = score_tree(t)
     self.assertEqual(obs, exp)
Пример #7
0
def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
    """Returns a tree from Alignment object aln with bootstrap support values.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    seed: an interger, seed value to use

    num_trees: an integer, number of trees to bootstrap against

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.

    If seed is not specifed in params, a random integer between 0-1000 is used.
    """
    # Create instance of controllor, enable bootstrap, disable alignment,tree
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()
    app.Parameters['-tree'].off()

    if app.Parameters['-bootstrap'].isOff():
        if num_trees is None:
            num_trees = 1000

        app.Parameters['-bootstrap'].on(num_trees)

    if app.Parameters['-seed'].isOff():
        if seed is None:
            seed = randint(0,1000)

        app.Parameters['-seed'].on(seed)

    if app.Parameters['-bootlabels'].isOff():
        app.Parameters['-bootlabels'].on("node")

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result, int_map, int_keys)

    return tree
Пример #8
0
def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
    """Returns a tree from Alignment object aln with bootstrap support values.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    seed: an interger, seed value to use
    
    num_trees: an integer, number of trees to bootstrap against

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.

    If seed is not specifed in params, a random integer between 0-1000 is used.
    """
    # Create instance of controllor, enable bootstrap, disable alignment,tree
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()
    app.Parameters['-tree'].off()

    if app.Parameters['-bootstrap'].isOff():
        if num_trees is None:
            num_trees = 1000

        app.Parameters['-bootstrap'].on(num_trees)

    if app.Parameters['-seed'].isOff():
        if seed is None:
            seed = randint(0, 1000)

        app.Parameters['-seed'].on(seed)

    if app.Parameters['-bootlabels'].isOff():
        app.Parameters['-bootlabels'].on("node")

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree
Пример #9
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params={}):
    """Returns a tree from Alignment object aln.
    
    aln: an xxx.Alignment object, or data that can be used to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: best_tree suppport is currently not implemented
    
    params: dict of parameters to pass in to the RAxML app controller.
    
    The result will be an xxx.Alignment object, or None if tree fails.
    """
    if best_tree:
        raise NotImplementedError

    if '-m' not in params:
        if moltype == DNA or moltype == RNA:
            #params["-m"] = 'GTRMIX'
            # in version 7.2.3, GTRMIX is no longer supported but says GTRCAT
            # behaves like GTRMIX (http://www.phylo.org/tools/raxmlhpc2.html)
            params["-m"] = 'GTRGAMMA'
        elif moltype == PROTEIN:
            params["-m"] = 'PROTGAMMAmatrixName'
        else:
            raise ValueError("Moltype must be either DNA, RNA, or PROTEIN")

    if not hasattr(aln, 'toPhylip'):
        aln = Alignment(aln)
    seqs, align_map = aln.toPhylip()

    # generate temp filename for output
    params["-w"] = "/tmp/"
    params["-n"] = get_tmp_filename().split("/")[-1]
    params["-k"] = True
    params["-p"] = randint(1, 100000)
    params["-x"] = randint(1, 100000)

    ih = '_input_as_multiline_string'

    raxml_app = Raxml(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=True,
                      SuppressStdout=True)

    raxml_result = raxml_app(seqs)

    tree = DndParser(raxml_result['Bootstrap'], constructor=PhyloNode)

    for node in tree.tips():
        node.Name = align_map[node.Name]

    raxml_result.cleanUp()

    return tree
Пример #10
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params={}):
    """Returns a tree from Alignment object aln.
    
    aln: an xxx.Alignment object, or data that can be used to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: best_tree suppport is currently not implemented
    
    params: dict of parameters to pass in to the RAxML app controller.
    
    The result will be an xxx.Alignment object, or None if tree fails.
    """
    if best_tree:
        raise NotImplementedError

    if '-m' not in params:
        if moltype == DNA or moltype == RNA:
            #params["-m"] = 'GTRMIX'
            # in version 7.2.3, GTRMIX is no longer supported but says GTRCAT
            # behaves like GTRMIX (http://www.phylo.org/tools/raxmlhpc2.html)
            params["-m"] = 'GTRGAMMA'
        elif moltype == PROTEIN:
            params["-m"] = 'PROTGAMMAmatrixName'
        else:
            raise ValueError("Moltype must be either DNA, RNA, or PROTEIN")

    if not hasattr(aln, 'toPhylip'):
        aln = Alignment(aln)
    seqs, align_map = aln.toPhylip()

    # generate temp filename for output    
    params["-w"] = "/tmp/"    
    params["-n"] = get_tmp_filename().split("/")[-1]
    params["-k"] = True
    params["-p"] = randint(1,100000)
    params["-x"] = randint(1,100000)
    
    ih = '_input_as_multiline_string'    

    raxml_app = Raxml(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=True,
                      SuppressStdout=True)
                      
    raxml_result = raxml_app(seqs)
    
    tree = DndParser(raxml_result['Bootstrap'], constructor=PhyloNode)
    
    for node in tree.tips():
        node.Name = align_map[node.Name]

    raxml_result.cleanUp()

    return tree
Пример #11
0
def assign_tax_labels_to_tree(tree, std):
    """Puts new tip labels onto tree
        tree : newick string
        std : output from shorten_taxonomy_strings
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'')  #incase there are actual quotes
        tax = std[label]
        new_label = str(label) + '_' + tax
        node.Name = new_label
    return tree_nodes
def assign_tax_labels_to_tree(tree,std):
    """Puts new tip labels onto tree
        tree : newick string
        std : output from shorten_taxonomy_strings
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'') #incase there are actual quotes
        tax = std[label]
        new_label = str(label) + '_' + tax
        node.Name = new_label 
    return tree_nodes
Пример #13
0
def remove_taxonomy(tree, regex_string):
    """Puts new tip labels onto tree
        tree : LoadTree object
        regex_string : 
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'')  # incase there are actual quotes
        p = re.compile(regex_string)
        new_label = p.sub('', label)
        #print new_label
        node.Name = new_label
    return tree_nodes
def remove_taxonomy(tree, regex_string):
    """Puts new tip labels onto tree
        tree : LoadTree object
        regex_string : 
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'') # incase there are actual quotes
        p = re.compile(regex_string)
        new_label = p.sub('', label)
        #print new_label
        node.Name = new_label 
    return tree_nodes
Пример #15
0
def build_tree_from_distance_matrix(matrix, best_tree=False, params={}, working_dir="/tmp"):
    """Returns a tree from a distance matrix.

    matrix: a square Dict2D object (cogent.util.dict2d)

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params["--out"] = get_tmp_filename(working_dir)

    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(
        InputHandler="_input_as_multiline_string",
        params=params,
        WorkingDir=working_dir,
        SuppressStdout=True,
        SuppressStderr=True,
    )
    # Turn off input as alignment
    app.Parameters["-a"].off()
    # Input is a distance matrix
    app.Parameters["-d"].on()

    if best_tree:
        app.Parameters["-N"].on()

    # Turn the dict2d object into the expected input format
    matrix_input, int_keys = _matrix_input_from_dict2d(matrix)

    # Collect result
    result = app(matrix_input)

    # Build tree
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)

    # reassign to original names
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (app, result, params)

    return tree
Пример #16
0
 def test_shuffle_tipnames(self):
     """shuffle_tipnames should return copy of tree w/ labels permuted"""
     #Note: this should never fail but is technically still stochastic
     #5! is 120 so repeating 5 times should fail about 1 in 10^10.
     for i in range(5):
         try:
             t = DndParser(self.t_str)
             result = shuffle_tipnames(t)
             orig_names = [n.Name for n in t.tips()]
             new_names = [n.Name for n in result.tips()]
             self.assertIsPermutation(orig_names, new_names)
             return
         except AssertionError:
             continue
     raise AssertionError, "Produced same permutation in 5 tries: broken?"
Пример #17
0
 def test_shuffle_tipnames(self):
     """shuffle_tipnames should return copy of tree w/ labels permuted"""
     #Note: this should never fail but is technically still stochastic
     #5! is 120 so repeating 5 times should fail about 1 in 10^10.
     for i in range(5):
         try:
             t = DndParser(self.t_str)
             result = shuffle_tipnames(t)
             orig_names = [n.Name for n in t.tips()]
             new_names = [n.Name for n in result.tips()]
             self.assertIsPermutation(orig_names, new_names)
             return
         except AssertionError:
             continue
     raise AssertionError("Produced same permutation in 5 tries: broken?")
Пример #18
0
def convert_tree_tips(align_map,tree_fp):
    """ rename the starting tree to correspond to the new phylip names, 
        which are assigned to each sequence """
    
    # flip key value pairs
    tree_tip_to_seq_name={}
    for i in align_map:
        tree_tip_to_seq_name[align_map[i]] = i

    # change the tip labels to phylip labels
    open_tree=open(tree_fp)
    tree=DndParser(open_tree, constructor=PhyloNode)
    for node in tree.tips():
        node.Name = tree_tip_to_seq_name[node.Name]
    
    return tree
Пример #19
0
def wagner_for_picrust(tree_path,
                       trait_table_path,
                       gain=None,
                       max_paralogs=None,
                       HALT_EXEC=False):
    '''Runs count application controller given path of tree and trait table and returns a Table'''
    #initialize Count app controller
    count = Count(HALT_EXEC=HALT_EXEC)

    #set the parameters
    if gain:
        count.Parameters['-gain'].on(gain)
    if max_paralogs:
        count.Parameters['-max_paralogs'].on(max_paralogs)

    ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes.
    table = LoadTable(filename=trait_table_path, header=True, sep='\t')

    #get the first column (containing row ids)
    genome_ids = table.getRawData(table.Header[0])
    #remove single quotes from the id if they exist
    genome_ids = [str(id).strip('\'') for id in genome_ids]
    #transpose the matrix
    table = table.transposed(new_column_name=table.Header[0])
    #Change the headers
    table = table.withNewHeader(table.Header[1:], genome_ids)
    #write the modified table to a tmp file
    tmp_table_path = get_tmp_filename()
    table.writeToFile(tmp_table_path, sep='\t')

    #Run Count here
    result = count(data=(tree_path, tmp_table_path))

    #Remove tmp file
    remove(tmp_table_path)

    #tree=LoadTree(tree_path)
    tree = DndParser(open(tree_path))

    #parse the results into a Cogent Table
    asr_table = parse_wagner_parsimony_output(result["StdOut"].readlines(),
                                              remove_num_tips=len(tree.tips()))

    #transpose the table
    asr_table = asr_table.transposed(new_column_name='nodes')

    return asr_table
Пример #20
0
def build_tree_from_distance_matrix(matrix, best_tree=False, params={},\
    working_dir='/tmp'):
    """Returns a tree from a distance matrix.

    matrix: a square Dict2D object (cogent.util.dict2d)
    
    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params['--out'] = get_tmp_filename(working_dir)

    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir=working_dir, SuppressStdout=True,\
                   SuppressStderr=True)
    #Turn off input as alignment
    app.Parameters['-a'].off()
    #Input is a distance matrix
    app.Parameters['-d'].on()

    if best_tree:
        app.Parameters['-N'].on()

    # Turn the dict2d object into the expected input format
    matrix_input, int_keys = _matrix_input_from_dict2d(matrix)

    # Collect result
    result = app(matrix_input)

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)

    # reassign to original names
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (app, result, params)

    return tree
Пример #21
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.
    
    aln: a cogent.core.alignment.Alignment object, or data that can be used 
    to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: unsupported
    
    params: dict of parameters to pass in to the Muscle app controller.
    
    The result will be an cogent.core.tree.PhyloNode object, or None if tree 
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')

    app.Parameters['-cluster'].on()
    app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
    app.Parameters['-seqtype'].on(moltype.label)

    seq_collection = SequenceCollection(aln, MolType=moltype)

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)


    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
    
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result)

    return tree
Пример #22
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.
    
    aln: a cogent.core.alignment.Alignment object, or data that can be used 
    to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: unsupported
    
    params: dict of parameters to pass in to the Muscle app controller.
    
    The result will be an cogent.core.tree.PhyloNode object, or None if tree 
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')

    app.Parameters['-clusteronly'].on()
    app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
    app.Parameters['-seqtype'].on(moltype.label)

    seq_collection = SequenceCollection(aln, MolType=moltype)

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)


    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
    
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result)

    return tree
Пример #23
0
 def test_decorate_ntips(self):
     """correctly decorate the tree with the NumTips param"""
     input = "(((a,b)c,(d,e,f)g)h,(i,j)k)l;"
     tree = DndParser(input)
     tips = dict([(tip.Name, tip) for tip in tree.tips()])
     tips['a'].Consensus = [1,2,3,4,5,6,7]
     tips['b'].Consensus = [None,None,None,5,None,None,None]
     tips['d'].Consensus = [1,2,3,4,5,6,8]
     tips['e'].Consensus = [None, None,None,None,None,None,None]
     tips['f'].Consensus = [1,2,3,4,5,6,8]
     tips['i'].Consensus = [1,2,3,4,5,6,8]
     tips['j'].Consensus = [1,2,3,4,5,6,8]
     decorate_ntips(tree)
     self.assertEqual(tree.NumTips, 6)
     self.assertEqual(tree.Children[0].NumTips, 4)
     self.assertEqual(tree.Children[1].NumTips, 2)
     self.assertEqual(tree.Children[0].Children[0].NumTips, 2)
     self.assertEqual(tree.Children[0].Children[1].NumTips, 2)
Пример #24
0
def wagner_for_picrust(tree_path,trait_table_path,gain=None,max_paralogs=None,HALT_EXEC=False):
    '''Runs count application controller given path of tree and trait table and returns a Table'''
    #initialize Count app controller
    count=Count(HALT_EXEC=HALT_EXEC)

    #set the parameters
    if gain:
        count.Parameters['-gain'].on(gain)
    if max_paralogs:
        count.Parameters['-max_paralogs'].on(max_paralogs)

    ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes.
    table = LoadTable(filename=trait_table_path,header=True,sep='\t')

    #get the first column (containing row ids)
    genome_ids = table.getRawData(table.Header[0])
    #remove single quotes from the id if they exist
    genome_ids=[str(id).strip('\'') for id in genome_ids]
    #transpose the matrix
    table = table.transposed(new_column_name=table.Header[0])
    #Change the headers
    table=table.withNewHeader(table.Header[1:],genome_ids)
    #write the modified table to a tmp file
    tmp_table_path =get_tmp_filename()
    table.writeToFile(tmp_table_path,sep='\t')
       
    #Run Count here
    result = count(data=(tree_path,tmp_table_path))

    #Remove tmp file
    remove(tmp_table_path)

    #tree=LoadTree(tree_path)
    tree=DndParser(open(tree_path))
    
    #parse the results into a Cogent Table
    asr_table= parse_wagner_parsimony_output(result["StdOut"].readlines(),remove_num_tips=len(tree.tips()))

    #transpose the table
    asr_table = asr_table.transposed(new_column_name='nodes')

    return asr_table
Пример #25
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params={},\
    working_dir='/tmp'):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.
        -  Clearcut only accepts aligned sequences.  Alignment object used to
        handle unaligned sequences.
    
    moltype: a cogent.core.moltype object.
        - NOTE: If moltype = RNA, we must convert to DNA since Clearcut v1.0.8
        gives incorrect results if RNA is passed in.  'U' is treated as an 
        incorrect character and is excluded from distance calculations.

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params['--out'] = get_tmp_filename(working_dir)
    
    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir=working_dir, SuppressStdout=True,\
                   SuppressStderr=True)
    #Input is an alignment
    app.Parameters['-a'].on()
    #Turn off input as distance matrix
    app.Parameters['-d'].off()
    
    #If moltype = RNA, we must convert to DNA.
    if moltype == RNA:
        moltype = DNA
    
    if best_tree:
        app.Parameters['-N'].on()
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()    

    # Setup mapping. Clearcut clips identifiers. We will need to remap them.
    # Clearcut only accepts aligned sequences.  Let Alignment object handle
    # unaligned sequences.
    seq_aln = Alignment(aln,MolType=moltype)
    #get int mapping
    int_map, int_keys = seq_aln.getIntMap()
    #create new Alignment object with int_map
    int_map = Alignment(int_map)

    # Collect result
    result = app(int_map.toFasta())
    
    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del(seq_aln, app, result, int_map, int_keys, params)

    return tree
Пример #26
0
    def test_unifrac_make_subtree(self):
        """unifrac result should not depend on make_subtree
        
        environment M contains only tips not in tree, tip j, k is in no envs
        one clade is missing entirely
        values were calculated by hand
        we also test that we still have a valid tree at the end
        """
        t1 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\
            UniFracTreeNode) # note c,j is len 0 node
        #           /-------- /-a
        # ---------|          \-b
        #          |          /-------- /-c
        #           \--------|          \mt------ /-j
        #                    |                    \-k
        #                     \-------- /-d
        #                               \-e
        #

        env_str = """
        a   A   1
        a   C   2
        b   A   1
        b   B   1
        c   B   1
        d   B   3
        e   C   1
        m   M   88"""
        env_counts = count_envs(env_str.splitlines())
        self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=False)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
        self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=True)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
        # changing tree topology relative to c,j tips shouldn't change anything
        t2 = DndParser('((a:1,b:2):4,((c:2, (j:1,k:2)mt:17):1,(d:1,e:1):2):3)', \
            UniFracTreeNode)
        self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=False)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
        self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=True)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))

        # ensure we haven't meaningfully changed the tree
        # by passing it to unifrac
        t3 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\
            UniFracTreeNode) # note c,j is len 0 node
        t1_tips = [tip.Name for tip in t1.tips()]
        t1_tips.sort()
        t3_tips = [tip.Name for tip in t3.tips()]
        t3_tips.sort()

        self.assertEqual(t1_tips, t3_tips)
        tipj3 = t3.getNodeMatchingName('j')
        tipb3 = t3.getNodeMatchingName('b')
        tipj1 = t1.getNodeMatchingName('j')
        tipb1 = t1.getNodeMatchingName('b')
        self.assertFloatEqual(tipj1.distance(tipb1), tipj3.distance(tipb3))
Пример #27
0
def main():
  usage = "%prog [options] tree_to_midpoint_reroot"
  opt_parser = OptionParser(usage=usage)
  (options, args) = opt_parser.parse_args()

  if len(args) != 1:
    opt_parser.error('Incorrect number of arguments')
  if not os.path.exists(args[0]):
    opt_parser.error('Tree file %s not found' % args[0])

  f = open(args[0])
  tree_string = f.read()
  f.close()

  unrooted_tree = DndParser(tree_string, PhyloNode)
  breadth_first_visit_order, visit_order_of_node, branch_length_of, \
      child_visit_orders_of, num_nodes \
      = get_breadth_first_visit_order(unrooted_tree)

  # We will refer to the node objects by their index in the visit order

  tip_node_objects = unrooted_tree.tips()
  num_tips = len(tip_node_objects)

  # We will refer to the tip objects by their index in the tip_node_objects
  # list

  distance_from_node_to_tip = numpy.zeros((num_nodes, num_tips))
  stepping_stone_from_node_to_tip = numpy.zeros((num_nodes, num_tips))
  tips_connected_to_node = {}
  
  for node in xrange(num_nodes):
    tips_connected_to_node[node] = set()
    for tip in xrange(num_tips):
      distance_from_node_to_tip[node,tip] = -1.0
      stepping_stone_from_node_to_tip[node,tip] = -1

  for tip in xrange(num_tips):
    tip_as_node = visit_order_of_node[tip_node_objects[tip]]
    distance_from_node_to_tip[tip_as_node,tip] = 0.0
    stepping_stone_from_node_to_tip[tip_as_node,tip] = tip_as_node
    tips_connected_to_node[tip_as_node].add(tip)

  for parent in reversed(xrange(num_nodes)):
    for child in child_visit_orders_of[parent]:
      child_to_parent_distance = branch_length_of[child]
      for tip in tips_connected_to_node[child]:
        tip_distance_to_child = distance_from_node_to_tip[child, tip]
        tip_to_parent_distance_through_child \
          = tip_distance_to_child + child_to_parent_distance
        if tip in tips_connected_to_node[parent]:
          tip_distance_to_parent = distance_from_node_to_tip[parent, tip]
          if tip_to_parent_distance_through_child < tip_distance_to_parent:
            distance_from_node_to_tip[parent, tip] \
                = tip_to_parent_distance_through_child
            stepping_stone_from_node_to_tip[parent, tip] = child
        else:
          distance_from_node_to_tip[parent, tip] \
              = tip_to_parent_distance_through_child
          stepping_stone_from_node_to_tip[parent, tip] = child
          tips_connected_to_node[parent].add(tip)

  for parent in xrange(num_nodes):
    for child in child_visit_orders_of[parent]:
      child_to_parent_distance = branch_length_of[child]
      for tip in tips_connected_to_node[parent]:
        tip_distance_to_parent = distance_from_node_to_tip[parent, tip]
        tip_to_child_distance_through_parent \
          = tip_distance_to_parent + child_to_parent_distance
        if tip in tips_connected_to_node[child]:
          tip_distance_to_child = distance_from_node_to_tip[child, tip]
          if tip_to_child_distance_through_parent < tip_distance_to_child:
            distance_from_node_to_tip[child, tip] \
                = tip_to_child_distance_through_parent
            stepping_stone_from_node_to_tip[child, tip] = parent
        else:
          distance_from_node_to_tip[child, tip] \
              = tip_to_child_distance_through_parent
          stepping_stone_from_node_to_tip[child, tip] = parent
          tips_connected_to_node[child].add(tip)

  max_distance = 0.0
  max_tip0 = None
  max_tip1 = None

  for tip0 in xrange(num_tips):
    tip0_as_node = visit_order_of_node[tip_node_objects[tip0]]
    for tip1 in xrange(num_tips):
      tip0_tip1_distance = distance_from_node_to_tip[tip0_as_node,tip1]
      if tip0_tip1_distance > max_distance:
        max_distance = tip0_tip1_distance
        max_tip0 = tip0
        max_tip1 = tip1

  midpoint_distance = max_distance / 2
  tip0 = max_tip0
  tip1 = max_tip1
  node_closer_to_tip0 = visit_order_of_node[tip_node_objects[tip0]]
  node_closer_to_tip1 = node_closer_to_tip0
  distance_to_tip1 = distance_from_node_to_tip[node_closer_to_tip0, tip1]
  node_even_closer_to_tip1 \
      = stepping_stone_from_node_to_tip[node_closer_to_tip0, tip1]
  previous_distance_to_tip1 = distance_to_tip1
  while distance_to_tip1 > midpoint_distance:
    node_closer_to_tip0 = node_closer_to_tip1
    node_closer_to_tip1 = node_even_closer_to_tip1
    previous_distance_to_tip1 = distance_to_tip1
    distance_to_tip1 = distance_from_node_to_tip[node_closer_to_tip1, tip1]
    node_even_closer_to_tip1 \
        = stepping_stone_from_node_to_tip[node_closer_to_tip1, tip1]

  node_object_closer_to_tip0 = breadth_first_visit_order[node_closer_to_tip0]
  node_object_closer_to_tip1 = breadth_first_visit_order[node_closer_to_tip1]
  if node_object_closer_to_tip1 == node_object_closer_to_tip0._parent:
    theParent = node_object_closer_to_tip1
    theChild = node_object_closer_to_tip0
    distance_from_new_root_to_parent \
      = midpoint_distance - distance_to_tip1
    distance_from_new_root_to_child \
      = previous_distance_to_tip1 - midpoint_distance
  elif node_object_closer_to_tip0 == node_object_closer_to_tip1._parent:
    theParent = node_object_closer_to_tip0
    theChild = node_object_closer_to_tip1
    distance_from_new_root_to_parent \
      = previous_distance_to_tip1 - midpoint_distance
    distance_from_new_root_to_child \
      = midpoint_distance - distance_to_tip1
  else:
    # Should never get here
    raise AssertionError('Adjacent nodes on maximum span not parent-child')

  sys.stdout.write('(')
  # omit the branch length from theChild to its parent, since this is the
  # branch being broken in two
  sys.stdout.write(':'.join(
    theChild.getNewick(with_distances=True,semicolon=False).split(':')[:-1]))
  sys.stdout.write(":%g" % distance_from_new_root_to_child)
  sys.stdout.write(',')
  def print_rotated_node(child, parent, distance_from_parent_to_new_parent):
    sys.stdout.write('(')
    sys.stdout.write(','.join([other_child.getNewick(with_distances=True,
                                                    semicolon=False)
                                for other_child in parent.Children
                                if other_child != child]))
    if parent._parent:
      sys.stdout.write(',')
      print_rotated_node(parent, parent._parent, parent.params['length'])
    sys.stdout.write(')')
    if parent.Name:
      sys.stdout.write(parent.Name)
    sys.stdout.write(":%g" % distance_from_parent_to_new_parent)
  print_rotated_node(theChild, theParent, distance_from_new_root_to_parent)
  sys.stdout.write(');\n')
Пример #28
0
class fast_tree_tests(TestCase):
    """Tests of top-level functions"""
    def setUp(self):
        """Define a couple of standard trees"""
        self.t1 = DndParser('(((a,b),c),(d,e))', UniFracTreeNode)
        self.t2 = DndParser('(((a,b),(c,d)),(e,f))', UniFracTreeNode)
        self.t3 = DndParser('(((a,b,c),(d)),(e,f))', UniFracTreeNode)
        self.t4 = DndParser('((c)b,((f,g,h)e,i)d)', UniFracTreeNode)
        self.t4.Name = 'a'
        self.t_str = '((a:1,b:2):4,(c:3,(d:1,e:1):2):3)'

        self.t = DndParser(self.t_str, UniFracTreeNode)
        self.env_str = """
a   A   1
a   C   2
b   A   1
b   B   1
c   B   1
d   B   3
e   C   1"""
        self.env_counts = count_envs(self.env_str.splitlines())
        self.node_index, self.nodes = index_tree(self.t)
        self.count_array, self.unique_envs, self.env_to_index, \
            self.node_to_index = index_envs(self.env_counts, self.node_index)
        self.branch_lengths = get_branch_lengths(self.node_index)

        self.old_t_str = '((org1:0.11,org2:0.22,(org3:0.12,org4:0.23)g:0.33)b:0.2,(org5:0.44,org6:0.55)c:0.3,org7:0.4)'

        self.old_t = DndParser(self.old_t_str, UniFracTreeNode)
        self.old_env_str = """
org1    env1    1
org1    env2    1
org2    env2    1
org3    env2    1
org4    env3    1
org5    env1    1
org6    env1    1
org7    env3    1
"""
        self.old_env_counts = count_envs(self.old_env_str.splitlines())
        self.old_node_index, self.old_nodes = index_tree(self.old_t)
        self.old_count_array, self.old_unique_envs, self.old_env_to_index, \
            self.old_node_to_index = index_envs(self.old_env_counts, self.old_node_index)
        self.old_branch_lengths = get_branch_lengths(self.old_node_index)

    def test_traverse(self):
        """traverse should work iterative or recursive"""
        stti = self.t4.traverse
        stt = self.t4.traverse_recursive
        obs = [i.Name for i in stt(self_before=False, self_after=False)]
        exp = [i.Name for i in stti(self_before=False, self_after=False)]
        self.assertEqual(obs, exp)
        obs = [i.Name for i in stt(self_before=True, self_after=False)]
        exp = [i.Name for i in stti(self_before=True, self_after=False)]
        self.assertEqual(obs, exp)
        obs = [i.Name for i in stt(self_before=False, self_after=True)]
        exp = [i.Name for i in stti(self_before=False, self_after=True)]
        self.assertEqual(obs, exp)
        obs = [i.Name for i in stt(self_before=True, self_after=True)]
        exp = [i.Name for i in stti(self_before=True, self_after=True)]
        self.assertEqual(obs, exp)

    def test_count_envs(self):
        """count_envs should return correct counts from lines"""
        envs = """
a   A   3   some other junk
a   B 
a   C   1
b   A   2

skip
c   B
d
b   A   99
"""
        result = count_envs(envs.splitlines())
        self.assertEqual(result, \
            {'a':{'A':3,'B':1,'C':1},'b':{'A':99},'c':{'B':1}})

    def test_sum_env_dict(self):
        """sum_env_dict should return correct counts from env_dict"""
        envs = """
a   A   3   some other junk
a   B 
a   C   1
b   A   2

skip
c   B
d
b   A   99
"""
        result = count_envs(envs.splitlines())
        sum_ = sum_env_dict(result)
        self.assertEqual(sum_, 105)

    def test_index_envs(self):
        """index_envs should map envs and taxa onto indices"""
        self.assertEqual(self.unique_envs, ['A', 'B', 'C'])
        self.assertEqual(self.env_to_index, {'A': 0, 'B': 1, 'C': 2})
        self.assertEqual(self.node_to_index, {
            'a': 0,
            'b': 1,
            'c': 4,
            'd': 2,
            'e': 3
        })
        self.assertEqual(self.count_array, \
            array([[1,0,2],[1,1,0],[0,3,0],[0,0,1], \
            [0,1,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]))

    def test_get_branch_lengths(self):
        """get_branch_lengths should make array of branch lengths from index"""
        result = get_branch_lengths(self.node_index)
        self.assertEqual(result, array([1, 2, 1, 1, 3, 2, 4, 3, 0]))

    def test_env_unique_fraction(self):
        """should report unique fraction of bl in each env """
        # testing old unique fraction
        cur_count_array = self.count_array.copy()
        bound_indices = bind_to_array(self.nodes, cur_count_array)
        total_bl = sum(self.branch_lengths)
        bool_descendants(bound_indices)
        env_bl_sums, env_bl_ufracs = env_unique_fraction(
            self.branch_lengths, cur_count_array)
        # env A has 0 unique bl, B has 4, C has 1
        self.assertEqual(env_bl_sums, [0, 4, 1])
        self.assertEqual(env_bl_ufracs, [0, 4 / 17.0, 1 / 17.0])

        cur_count_array = self.old_count_array.copy()
        bound_indices = bind_to_array(self.old_nodes, cur_count_array)
        total_bl = sum(self.old_branch_lengths)
        bool_descendants(bound_indices)

        env_bl_sums, env_bl_ufracs = env_unique_fraction(
            self.old_branch_lengths, cur_count_array)
        # env A has 0 unique bl, B has 4, C has 1
        self.assertEqual(env_bl_sums, env_bl_sums)
        self.assertEqual(env_bl_sums, [1.29, 0.33999999999999997, 0.63])
        self.assertEqual(env_bl_ufracs,
                         [1.29 / 2.9, 0.33999999999999997 / 2.9, 0.63 / 2.9])

    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        #test for first tree: contains singleton outgroup
        t1 = self.t1
        id_1, child_1 = index_tree(t1)
        nodes_1 = [n._leaf_index for n in t1.traverse(self_before=False, \
            self_after=True)]
        self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8])
        self.assertEqual(child_1, [(2, 0, 1), (6, 2, 3), (7, 4, 5), (8, 6, 7)])
        #test for second tree: strictly bifurcating
        t2 = self.t2
        id_2, child_2 = index_tree(t2)
        nodes_2 = [n._leaf_index for n in t2.traverse(self_before=False, \
            self_after=True)]
        self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_2, [(4, 0, 1), (5, 2, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])
        #test for third tree: contains trifurcation and single-child parent
        t3 = self.t3
        id_3, child_3 = index_tree(t3)
        nodes_3 = [n._leaf_index for n in t3.traverse(self_before=False, \
            self_after=True)]
        self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_3, [(4, 0, 2), (5, 3, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])

    def test_bind_to_array(self):
        """bind_to_array should return correct array ranges"""
        a = reshape(arange(33), (11, 3))
        id_, child = index_tree(self.t3)
        bindings = bind_to_array(child, a)
        self.assertEqual(len(bindings), 5)
        self.assertEqual(bindings[0][0], a[4])
        self.assertEqual(bindings[0][1], a[0:3])
        self.assertEqual(bindings[0][1].shape, (3, 3))
        self.assertEqual(bindings[1][0], a[5])
        self.assertEqual(bindings[1][1], a[3:4])
        self.assertEqual(bindings[1][1].shape, (1, 3))
        self.assertEqual(bindings[2][0], a[8])
        self.assertEqual(bindings[2][1], a[4:6])
        self.assertEqual(bindings[2][1].shape, (2, 3))
        self.assertEqual(bindings[3][0], a[9])
        self.assertEqual(bindings[3][1], a[6:8])
        self.assertEqual(bindings[3][1].shape, (2, 3))
        self.assertEqual(bindings[4][0], a[10])
        self.assertEqual(bindings[4][1], a[8:10])
        self.assertEqual(bindings[4][1].shape, (2, 3))

    def test_bind_to_parent_array(self):
        """bind_to_parent_array should bind tree to array correctly"""
        a = reshape(arange(33), (11, 3))
        index_tree(self.t3)
        bindings = bind_to_parent_array(self.t3, a)
        self.assertEqual(len(bindings), 10)
        self.assertEqual(bindings[0][0], a[8])
        self.assertEqual(bindings[0][1], a[10])
        self.assertEqual(bindings[1][0], a[4])
        self.assertEqual(bindings[1][1], a[8])
        self.assertEqual(bindings[2][0], a[0])
        self.assertEqual(bindings[2][1], a[4])
        self.assertEqual(bindings[3][0], a[1])
        self.assertEqual(bindings[3][1], a[4])
        self.assertEqual(bindings[4][0], a[2])
        self.assertEqual(bindings[4][1], a[4])
        self.assertEqual(bindings[5][0], a[5])
        self.assertEqual(bindings[5][1], a[8])
        self.assertEqual(bindings[6][0], a[3])
        self.assertEqual(bindings[6][1], a[5])
        self.assertEqual(bindings[7][0], a[9])
        self.assertEqual(bindings[7][1], a[10])
        self.assertEqual(bindings[8][0], a[6])
        self.assertEqual(bindings[8][1], a[9])
        self.assertEqual(bindings[9][0], a[7])
        self.assertEqual(bindings[9][1], a[9])

    def test_delete_empty_parents(self):
        """delete_empty_parents should remove empty parents from bound indices"""
        id_to_node, node_first_last = index_tree(self.t)
        bound_indices = bind_to_array(node_first_last, self.count_array[:,
                                                                        0:1])
        bool_descendants(bound_indices)
        self.assertEqual(len(bound_indices), 4)
        deleted = delete_empty_parents(bound_indices)
        self.assertEqual(len(deleted), 2)
        for d in deleted:
            self.assertEqual(d[0][0], 1)

    def test_traverse_reduce(self):
        """traverse_reduce should reduce array in traversal order."""
        id_, child = index_tree(self.t3)
        a = zeros((11, 3)) + 99  #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0, 1, 0]
        a[3] = [1, 0, 0]
        a[6] = [0, 0, 1]
        f = logical_or.reduce
        traverse_reduce(bindings, f)
        self.assertEqual(a,\
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,1,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,1,0],[0,1,1],[1,1,1]])
        )
        f = sum
        traverse_reduce(bindings, f)
        self.assertEqual( a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,3,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,3,0],[0,1,1],[1,4,1]])
        )

    def test_bool_descendants(self):
        """bool_descendants should be true if any descendant true"""
        #self.t3 = DndParser('(((a,b,c),(d)),(e,f))', UniFracTreeNode)
        id_, child = index_tree(self.t3)
        a = zeros((11, 3)) + 99  #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0, 1, 0]
        a[3] = [1, 0, 0]
        a[6] = [0, 0, 1]
        bool_descendants(bindings)
        self.assertEqual(a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,1,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,1,0],[0,1,1],[1,1,1]])
        )

    def test_sum_descendants(self):
        """sum_descendants should sum total descendants w/ each state"""
        id_, child = index_tree(self.t3)
        a = zeros((11, 3)) + 99  #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0, 1, 0]
        a[3] = [1, 0, 0]
        a[6] = [0, 0, 1]
        sum_descendants(bindings)
        self.assertEqual(a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,3,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,3,0],[0,1,1],[1,4,1]])
        )

    def test_fitch_descendants(self):
        """fitch_descendants should assign states by fitch parsimony, ret. #"""
        id_, child = index_tree(self.t3)
        a = zeros((11, 3)) + 99  #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0, 1, 0]
        a[3] = [1, 0, 0]
        a[6] = [0, 0, 1]
        changes = fitch_descendants(bindings)
        self.assertEqual(changes, 2)
        self.assertEqual(a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,1,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,1,0],[0,1,1],[0,1,0]])
        )

    def test_fitch_descendants_missing_data(self):
        """fitch_descendants should work with missing data"""
        #tree and envs for testing missing values
        t_str = '(((a:1,b:2):4,(c:3,d:1):2):1,(e:2,f:1):3);'
        env_str = """a   A
b   B
c   D
d   C
e   C
f   D"""
        t = DndParser(t_str, UniFracTreeNode)
        node_index, nodes = index_tree(t)
        env_counts = count_envs(env_str.split('\n'))

        count_array, unique_envs, env_to_index, node_to_index = \
            index_envs(env_counts, node_index)

        branch_lengths = get_branch_lengths(node_index)
        #test just the AB pair
        ab_counts = count_array[:, 0:2]
        bindings = bind_to_array(nodes, ab_counts)
        changes = fitch_descendants(bindings, counter=FitchCounter)
        self.assertEqual(changes, 1)
        orig_result = ab_counts.copy()
        #check that the original Fitch counter gives the expected
        #incorrect parsimony result
        changes = fitch_descendants(bindings, counter=FitchCounterDense)
        self.assertEqual(changes, 5)
        new_result = ab_counts.copy()
        #check that the two versions fill the array with the same values
        self.assertEqual(orig_result, new_result)

    def test_tip_distances(self):
        """tip_distances should set tips to correct distances."""
        t = self.t
        bl = self.branch_lengths.copy()[:, newaxis]
        bindings = bind_to_parent_array(t, bl)
        tips = []
        for n in t.traverse(self_before=False, self_after=True):
            if not n.Children:
                tips.append(n._leaf_index)
        tip_distances(bl, bindings, tips)
        self.assertEqual(bl, array([5, 6, 6, 6, 6, 0, 0, 0, 0])[:, newaxis])

    def test_permute_selected_rows(self):
        """permute_selected_rows should switch just the selected rows in a"""
        orig = reshape(arange(8), (4, 2))
        new = orig.copy()
        fake_permutation = lambda a: range(a)[::-1]  #reverse order
        permute_selected_rows([0, 2], orig, new, fake_permutation)
        self.assertEqual(new, array([[4, 5], [2, 3], [0, 1], [6, 7]]))
        #make sure we didn't change orig
        self.assertEqual(orig, reshape(arange(8), (4, 2)))

    def test_prep_items_for_jackknife(self):
        """prep_items_for_jackknife should expand indices of repeated counts"""
        a = array([0, 1, 0, 1, 2, 0, 3])
        #          0 1 2 3 4 5 6
        result = prep_items_for_jackknife(a)
        exp = array([1, 3, 4, 4, 6, 6, 6])
        self.assertEqual(result, exp)

    def test_jackknife_bool(self):
        """jackknife_bool should make a vector with right number of nonzeros"""
        fake_permutation = lambda a: range(a)[::-1]  #reverse order
        orig_vec = array([0, 0, 1, 0, 1, 1, 0, 1, 1])
        orig_items = flatnonzero(orig_vec)
        length = len(orig_vec)
        result = jackknife_bool(orig_items, 3, len(orig_vec), fake_permutation)
        self.assertEqual(result, array([0, 0, 0, 0, 0, 1, 0, 1, 1]))
        #returns the original if trying to take too many
        self.assertEqual(jackknife_bool(orig_items, 20, len(orig_vec)), \
            orig_vec)

    def test_jackknife_int(self):
        """jackknife_int should make a vector with right counts"""
        orig_vec = array([0, 2, 1, 0, 3, 1])
        orig_items = array([1, 1, 2, 4, 4, 4, 5])
        #                   0 1 2 3 4 5 6
        fake_permutation = lambda a: a == 7 and array([4, 6, 3, 1, 2, 6, 5])
        result = jackknife_int(orig_items, 4, len(orig_vec), fake_permutation)
        self.assertEqual(result, array([0, 1, 0, 0, 2, 1]))
        #returns the original if trying to take too many
        self.assertEqual(jackknife_int(orig_items, 20, len(orig_vec)), \
            orig_vec)

    def test_jackknife_array(self):
        """jackknife_array should make a new array with right counts"""

        orig_vec1 = array([0, 2, 2, 3, 1])
        orig_vec2 = array([2, 2, 1, 2, 2])
        test_array = array([orig_vec1, orig_vec2])

        # implement this, just doing by eye now
        #perm_fn = fake_permutation
        perm_fn = permutation

        #print "need to test with fake permutation!!"

        new_mat1 = jackknife_array(test_array,
                                   1,
                                   axis=1,
                                   jackknife_f=jackknife_int,
                                   permutation_f=permutation)
        self.assertEqual(new_mat1.sum(axis=0), [1, 1, 1, 1, 1])

        new_mat2 = jackknife_array(test_array,
                                   2,
                                   axis=1,
                                   jackknife_f=jackknife_int,
                                   permutation_f=permutation)
        self.assertEqual(new_mat2.sum(axis=0), [2, 2, 2, 2, 2])

        new_mat3 = jackknife_array(test_array,
                                   2,
                                   axis=0,
                                   jackknife_f=jackknife_int,
                                   permutation_f=permutation)
        self.assertEqual(new_mat3.sum(axis=1), [2, 2])

        # test that you get orig mat back if too many
        self.assertEqual(jackknife_array(test_array, 20, axis=1), test_array)

    def test_unifrac(self):
        """unifrac should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(unifrac(bl, m[:, 0], m[:, 1]), 10 / 16.0)
        self.assertEqual(unifrac(bl, m[:, 0], m[:, 2]), 8 / 13.0)
        self.assertEqual(unifrac(bl, m[:, 1], m[:, 2]), 8 / 17.0)

    def test_unnormalized_unifrac(self):
        """unnormalized unifrac should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(unnormalized_unifrac(bl, m[:, 0], m[:, 1]), 10 / 17.)
        self.assertEqual(unnormalized_unifrac(bl, m[:, 0], m[:, 2]), 8 / 17.)
        self.assertEqual(unnormalized_unifrac(bl, m[:, 1], m[:, 2]), 8 / 17.)

    def test_PD(self):
        """PD should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(PD(bl, m[:, 0]), 7)
        self.assertEqual(PD(bl, m[:, 1]), 15)
        self.assertEqual(PD(bl, m[:, 2]), 11)

    def test_G(self):
        """G should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(G(bl, m[:, 0], m[:, 0]), 0)
        self.assertEqual(G(bl, m[:, 0], m[:, 1]), 1 / 16.0)
        self.assertEqual(G(bl, m[:, 1], m[:, 0]), 9 / 16.0)

    def test_unnormalized_G(self):
        """unnormalized_G should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(unnormalized_G(bl, m[:, 0], m[:, 0]), 0 / 17.)
        self.assertEqual(unnormalized_G(bl, m[:, 0], m[:, 1]), 1 / 17.)
        self.assertEqual(unnormalized_G(bl, m[:, 1], m[:, 0]), 9 / 17.)

    def test_unifrac_matrix(self):
        """unifrac_matrix should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        result = unifrac_matrix(bl, m)
        self.assertEqual(result, array([[0, 10/16.,8/13.],[10/16.,0,8/17.],\
            [8/13.,8/17.,0]]))
        #should work if we tell it the measure is asymmetric
        result = unifrac_matrix(bl, m, is_symmetric=False)
        self.assertEqual(result, array([[0, 10/16.,8/13.],[10/16.,0,8/17.],\
            [8/13.,8/17.,0]]))
        #should work if the measure really is asymmetric
        result = unifrac_matrix(bl,
                                m,
                                metric=unnormalized_G,
                                is_symmetric=False)
        self.assertEqual(result, array([[0, 1/17.,2/17.],[9/17.,0,6/17.],\
            [6/17.,2/17.,0]]))
        #should also match web site calculations
        envs = self.count_array
        bound_indices = bind_to_array(self.nodes, envs)
        bool_descendants(bound_indices)
        result = unifrac_matrix(bl, envs)
        exp = array([[0, 0.6250, 0.6154], [0.6250, 0, \
            0.4706], [0.6154, 0.4707, 0]])
        assert (abs(result - exp)).max() < 0.001

    def test_unifrac_vector(self):
        """unifrac_vector should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        result = unifrac_vector(bl, m)
        self.assertFloatEqual(result, array([10. / 17, 6. / 17, 7. / 17]))

    def test_PD_vector(self):
        """PD_vector should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        result = PD_vector(bl, m)
        self.assertFloatEqual(result, array([7, 15, 11]))

    def test_weighted_unifrac_matrix(self):
        """weighted unifrac matrix should return correct results for model tree"""
        #should match web site calculations
        envs = self.count_array
        bound_indices = bind_to_array(self.nodes, envs)
        sum_descendants(bound_indices)
        bl = self.branch_lengths
        tip_indices = [n._leaf_index for n in self.t.tips()]
        result = weighted_unifrac_matrix(bl, envs, tip_indices)
        exp = array([[0, 9.1, 4.5], [9.1, 0, \
            6.4], [4.5, 6.4, 0]])
        assert (abs(result - exp)).max() < 0.001
        #should work with branch length corrections
        td = bl.copy()[:, newaxis]
        tip_bindings = bind_to_parent_array(self.t, td)
        tips = [n._leaf_index for n in self.t.tips()]
        tip_distances(td, tip_bindings, tips)
        result = weighted_unifrac_matrix(bl,
                                         envs,
                                         tip_indices,
                                         bl_correct=True,
                                         tip_distances=td)
        exp = array([[0, 9.1/11.5, 4.5/(10.5+1./3)], [9.1/11.5, 0, \
            6.4/(11+1./3)], [4.5/(10.5+1./3), 6.4/(11+1./3), 0]])
        assert (abs(result - exp)).max() < 0.001

    def test_weighted_unifrac_vector(self):
        """weighted_unifrac_vector should return correct results for model tree"""
        envs = self.count_array
        bound_indices = bind_to_array(self.nodes, envs)
        sum_descendants(bound_indices)
        bl = self.branch_lengths
        tip_indices = [n._leaf_index for n in self.t.tips()]
        result = weighted_unifrac_vector(bl, envs, tip_indices)
        self.assertFloatEqual(
            result[0],
            sum([
                abs(1. / 2 - 2. / 8) * 1,
                abs(1. / 2 - 1. / 8) * 2,
                abs(0 - 1. / 8) * 3,
                abs(0 - 3. / 8) * 1,
                abs(0 - 1. / 8) * 1,
                abs(0 - 4. / 8) * 2,
                abs(2. / 2 - 3. / 8) * 4,
                abs(0. - 5. / 8) * 3.
            ]))

        self.assertFloatEqual(
            result[1],
            sum([
                abs(0 - .6) * 1,
                abs(.2 - .2) * 2,
                abs(.2 - 0) * 3,
                abs(.6 - 0) * 1,
                abs(0 - .2) * 1,
                abs(.6 - .2) * 2,
                abs(.2 - .8) * 4,
                abs(.8 - .2) * 3
            ]))

        self.assertFloatEqual(
            result[2],
            sum([
                abs(2. / 3 - 1. / 7) * 1,
                abs(0 - 2. / 7) * 2,
                abs(0 - 1. / 7) * 3,
                abs(0 - 3. / 7) * 1,
                abs(1. / 3 - 0) * 1,
                abs(1. / 3 - 3. / 7) * 2,
                abs(2. / 3 - 3. / 7) * 4,
                abs(1. / 3 - 4. / 7) * 3
            ]))
Пример #29
0
    def test_unifrac_make_subtree(self):
        """unifrac result should not depend on make_subtree
        
        environment M contains only tips not in tree, tip j, k is in no envs
        one clade is missing entirely
        values were calculated by hand
        we also test that we still have a valid tree at the end
        """
        t1 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\
            UniFracTreeNode) # note c,j is len 0 node
        #           /-------- /-a
        # ---------|          \-b
        #          |          /-------- /-c
        #           \--------|          \mt------ /-j
        #                    |                    \-k
        #                     \-------- /-d
        #                               \-e
        # 

        env_str = """
        a   A   1
        a   C   2
        b   A   1
        b   B   1
        c   B   1
        d   B   3
        e   C   1
        m   M   88"""
        env_counts = count_envs(env_str.splitlines())
        self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=False)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
        self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=True)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
        # changing tree topology relative to c,j tips shouldn't change anything
        t2 = DndParser('((a:1,b:2):4,((c:2, (j:1,k:2)mt:17):1,(d:1,e:1):2):3)', \
            UniFracTreeNode)
        self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=False)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
        self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=True)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))

        # ensure we haven't meaningfully changed the tree 
        # by passing it to unifrac
        t3 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\
            UniFracTreeNode) # note c,j is len 0 node
        t1_tips = [tip.Name for tip in t1.tips()]
        t1_tips.sort()
        t3_tips = [tip.Name for tip in t3.tips()]
        t3_tips.sort()
        
        self.assertEqual(t1_tips, t3_tips)
        tipj3 = t3.getNodeMatchingName('j')
        tipb3 = t3.getNodeMatchingName('b')
        tipj1 = t1.getNodeMatchingName('j')
        tipb1 = t1.getNodeMatchingName('b')
        self.assertFloatEqual(tipj1.distance(tipb1), tipj3.distance(tipb3))
Пример #30
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    moltype: cogent.core.moltype.MolType object

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()

    #Set params to empty dict if None.
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params['-type'] = 'd'
    elif moltype == PROTEIN:
        params['-type'] = 'p'
    else:
        raise ValueError, "moltype must be DNA, RNA, or PROTEIN"

    # best_tree -> bootstrap
    if best_tree:
        if '-bootstrap' not in params:
            app.Parameters['-bootstrap'].on(1000)
        if '-seed' not in params:
            app.Parameters['-seed'].on(randint(0, 1000))
        if '-bootlabels' not in params:
            app.Parameters['-bootlabels'].on('nodes')
    else:
        app.Parameters['-tree'].on()

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree
Пример #31
0
class fast_tree_tests(TestCase):
    """Tests of top-level functions"""
    def setUp(self):
        """Define a couple of standard trees"""
        self.t1 = DndParser('(((a,b),c),(d,e))', UniFracTreeNode)
        self.t2 = DndParser('(((a,b),(c,d)),(e,f))', UniFracTreeNode)
        self.t3 = DndParser('(((a,b,c),(d)),(e,f))', UniFracTreeNode)
        self.t4 = DndParser('((c)b,((f,g,h)e,i)d)', UniFracTreeNode)
        self.t4.Name = 'a'
        self.t_str = '((a:1,b:2):4,(c:3,(d:1,e:1):2):3)'

        self.t = DndParser(self.t_str, UniFracTreeNode)
        self.env_str = """
a   A   1
a   C   2
b   A   1
b   B   1
c   B   1
d   B   3
e   C   1"""
        self.env_counts = count_envs(self.env_str.splitlines())
        self.node_index, self.nodes = index_tree(self.t)
        self.count_array, self.unique_envs, self.env_to_index, \
            self.node_to_index = index_envs(self.env_counts, self.node_index)
        self.branch_lengths = get_branch_lengths(self.node_index)

        self.old_t_str = '((org1:0.11,org2:0.22,(org3:0.12,org4:0.23)g:0.33)b:0.2,(org5:0.44,org6:0.55)c:0.3,org7:0.4)'


        self.old_t = DndParser(self.old_t_str, UniFracTreeNode)
        self.old_env_str = """
org1    env1    1
org1    env2    1
org2    env2    1
org3    env2    1
org4    env3    1
org5    env1    1
org6    env1    1
org7    env3    1
"""
        self.old_env_counts = count_envs(self.old_env_str.splitlines())
        self.old_node_index, self.old_nodes = index_tree(self.old_t)
        self.old_count_array, self.old_unique_envs, self.old_env_to_index, \
            self.old_node_to_index = index_envs(self.old_env_counts, self.old_node_index)
        self.old_branch_lengths = get_branch_lengths(self.old_node_index)




    def test_traverse(self):
        """traverse should work iterative or recursive"""
        stti = self.t4.traverse
        stt = self.t4.traverse_recursive
        obs = [i.Name for i in stt(self_before=False, self_after=False)]
        exp = [i.Name for i in stti(self_before=False, self_after=False)]
        self.assertEqual(obs, exp)
        obs = [i.Name for i in stt(self_before=True, self_after=False)]
        exp = [i.Name for i in stti(self_before=True, self_after=False)]
        self.assertEqual(obs, exp)
        obs = [i.Name for i in stt(self_before=False, self_after=True)]
        exp = [i.Name for i in stti(self_before=False, self_after=True)]
        self.assertEqual(obs, exp)
        obs = [i.Name for i in stt(self_before=True, self_after=True)]
        exp = [i.Name for i in stti(self_before=True, self_after=True)]
        self.assertEqual(obs, exp)

    def test_count_envs(self):
        """count_envs should return correct counts from lines"""
        envs = """
a   A   3   some other junk
a   B 
a   C   1
b   A   2

skip
c   B
d
b   A   99
"""
        result = count_envs(envs.splitlines())
        self.assertEqual(result, \
            {'a':{'A':3,'B':1,'C':1},'b':{'A':99},'c':{'B':1}})

    def test_sum_env_dict(self):
        """sum_env_dict should return correct counts from env_dict"""
        envs = """
a   A   3   some other junk
a   B 
a   C   1
b   A   2

skip
c   B
d
b   A   99
"""
        result = count_envs(envs.splitlines())
        sum_ = sum_env_dict(result)
        self.assertEqual(sum_, 105) 

    def test_index_envs(self):
        """index_envs should map envs and taxa onto indices"""
        self.assertEqual(self.unique_envs, ['A','B','C'])
        self.assertEqual(self.env_to_index, {'A':0, 'B':1, 'C':2})
        self.assertEqual(self.node_to_index,{'a':0, 'b':1, 'c':4, 'd':2, 'e':3})
        self.assertEqual(self.count_array, \
            array([[1,0,2],[1,1,0],[0,3,0],[0,0,1], \
            [0,1,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]))

    def test_get_branch_lengths(self):
        """get_branch_lengths should make array of branch lengths from index"""
        result = get_branch_lengths(self.node_index)
        self.assertEqual(result, array([1,2,1,1,3,2,4,3,0]))

    def test_env_unique_fraction(self):
        """should report unique fraction of bl in each env """
        # testing old unique fraction   
        cur_count_array = self.count_array.copy()
        bound_indices = bind_to_array(self.nodes, cur_count_array) 
        total_bl = sum(self.branch_lengths)
        bool_descendants(bound_indices)
        env_bl_sums, env_bl_ufracs = env_unique_fraction(self.branch_lengths, cur_count_array)
        # env A has 0 unique bl, B has 4, C has 1        
        self.assertEqual(env_bl_sums, [0,4,1])
        self.assertEqual(env_bl_ufracs, [0,4/17.0,1/17.0])

        cur_count_array = self.old_count_array.copy()
        bound_indices = bind_to_array(self.old_nodes, cur_count_array) 
        total_bl = sum(self.old_branch_lengths)
        bool_descendants(bound_indices)

        env_bl_sums, env_bl_ufracs = env_unique_fraction(self.old_branch_lengths, cur_count_array)
        # env A has 0 unique bl, B has 4, C has 1        
        self.assertEqual(env_bl_sums, env_bl_sums) 
        self.assertEqual(env_bl_sums, [1.29, 0.33999999999999997, 0.63])
        self.assertEqual(env_bl_ufracs, [1.29/2.9,0.33999999999999997/2.9, 0.63/2.9])

    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        #test for first tree: contains singleton outgroup
        t1 = self.t1
        id_1, child_1 = index_tree(t1)
        nodes_1 = [n._leaf_index for n in t1.traverse(self_before=False, \
            self_after=True)]
        self.assertEqual(nodes_1, [0,1,2,3,6,4,5,7,8])
        self.assertEqual(child_1, [(2,0,1),(6,2,3),(7,4,5),(8,6,7)])
        #test for second tree: strictly bifurcating
        t2 = self.t2
        id_2, child_2 = index_tree(t2)
        nodes_2 = [n._leaf_index for n in t2.traverse(self_before=False, \
            self_after=True)]
        self.assertEqual(nodes_2, [0,1,4,2,3,5,8,6,7,9,10])
        self.assertEqual(child_2, [(4,0,1),(5,2,3),(8,4,5),(9,6,7),(10,8,9)])
        #test for third tree: contains trifurcation and single-child parent
        t3 = self.t3
        id_3, child_3 = index_tree(t3)
        nodes_3 = [n._leaf_index for n in t3.traverse(self_before=False, \
            self_after=True)]
        self.assertEqual(nodes_3, [0,1,2,4,3,5,8,6,7,9,10])
        self.assertEqual(child_3, [(4,0,2),(5,3,3),(8,4,5),(9,6,7),(10,8,9)])

    def test_bind_to_array(self):
        """bind_to_array should return correct array ranges"""
        a = reshape(arange(33), (11,3))
        id_, child = index_tree(self.t3)
        bindings = bind_to_array(child, a)
        self.assertEqual(len(bindings), 5)
        self.assertEqual(bindings[0][0], a[4])
        self.assertEqual(bindings[0][1], a[0:3])
        self.assertEqual(bindings[0][1].shape, (3,3))
        self.assertEqual(bindings[1][0], a[5])
        self.assertEqual(bindings[1][1], a[3:4])
        self.assertEqual(bindings[1][1].shape, (1,3))
        self.assertEqual(bindings[2][0], a[8])
        self.assertEqual(bindings[2][1], a[4:6])
        self.assertEqual(bindings[2][1].shape, (2,3))
        self.assertEqual(bindings[3][0], a[9])
        self.assertEqual(bindings[3][1], a[6:8])
        self.assertEqual(bindings[3][1].shape, (2,3))
        self.assertEqual(bindings[4][0], a[10])
        self.assertEqual(bindings[4][1], a[8:10])
        self.assertEqual(bindings[4][1].shape, (2,3))

    def test_bind_to_parent_array(self):
        """bind_to_parent_array should bind tree to array correctly"""
        a = reshape(arange(33), (11,3))
        index_tree(self.t3)
        bindings = bind_to_parent_array(self.t3, a)
        self.assertEqual(len(bindings), 10)
        self.assertEqual(bindings[0][0], a[8])
        self.assertEqual(bindings[0][1], a[10])
        self.assertEqual(bindings[1][0], a[4])
        self.assertEqual(bindings[1][1], a[8])
        self.assertEqual(bindings[2][0], a[0])
        self.assertEqual(bindings[2][1], a[4])
        self.assertEqual(bindings[3][0], a[1])
        self.assertEqual(bindings[3][1], a[4])
        self.assertEqual(bindings[4][0], a[2])
        self.assertEqual(bindings[4][1], a[4])
        self.assertEqual(bindings[5][0], a[5])
        self.assertEqual(bindings[5][1], a[8])
        self.assertEqual(bindings[6][0], a[3])
        self.assertEqual(bindings[6][1], a[5])
        self.assertEqual(bindings[7][0], a[9])
        self.assertEqual(bindings[7][1], a[10])
        self.assertEqual(bindings[8][0], a[6])
        self.assertEqual(bindings[8][1], a[9])
        self.assertEqual(bindings[9][0], a[7])
        self.assertEqual(bindings[9][1], a[9])

    def test_delete_empty_parents(self):
        """delete_empty_parents should remove empty parents from bound indices"""
        id_to_node, node_first_last = index_tree(self.t)
        bound_indices = bind_to_array(node_first_last, self.count_array[:,0:1])
        bool_descendants(bound_indices)
        self.assertEqual(len(bound_indices), 4)
        deleted = delete_empty_parents(bound_indices)
        self.assertEqual(len(deleted), 2)
        for d in deleted:
            self.assertEqual(d[0][0], 1)

    def test_traverse_reduce(self):
        """traverse_reduce should reduce array in traversal order."""
        id_, child = index_tree(self.t3)
        a = zeros((11,3)) + 99    #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0,1,0]
        a[3] = [1,0,0]
        a[6] = [0,0,1]
        f = logical_or.reduce
        traverse_reduce(bindings, f)
        self.assertEqual(a,\
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,1,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,1,0],[0,1,1],[1,1,1]])
        )
        f = sum
        traverse_reduce(bindings, f)
        self.assertEqual( a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,3,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,3,0],[0,1,1],[1,4,1]])
        )

    def test_bool_descendants(self):
        """bool_descendants should be true if any descendant true"""
        #self.t3 = DndParser('(((a,b,c),(d)),(e,f))', UniFracTreeNode)
        id_, child = index_tree(self.t3)
        a = zeros((11,3)) + 99    #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0,1,0]
        a[3] = [1,0,0]
        a[6] = [0,0,1]
        bool_descendants(bindings)
        self.assertEqual(a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,1,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,1,0],[0,1,1],[1,1,1]])
        )

    def test_sum_descendants(self):
        """sum_descendants should sum total descendants w/ each state"""
        id_, child = index_tree(self.t3)
        a = zeros((11,3)) + 99    #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0,1,0]
        a[3] = [1,0,0]
        a[6] = [0,0,1]
        sum_descendants(bindings)
        self.assertEqual(a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,3,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,3,0],[0,1,1],[1,4,1]])
        )

    def test_fitch_descendants(self):
        """fitch_descendants should assign states by fitch parsimony, ret. #"""
        id_, child = index_tree(self.t3)
        a = zeros((11,3)) + 99    #fill with junk
        bindings = bind_to_array(child, a)
        #load in leaf envs
        a[0] = a[1] = a[2] = a[7] = [0,1,0]
        a[3] = [1,0,0]
        a[6] = [0,0,1]
        changes = fitch_descendants(bindings)
        self.assertEqual(changes, 2)
        self.assertEqual(a, \
            array([[0,1,0],[0,1,0],[0,1,0],[1,0,0],[0,1,0],[1,0,0],\
            [0,0,1],[0,1,0],[1,1,0],[0,1,1],[0,1,0]])
        )

    def test_fitch_descendants_missing_data(self):
        """fitch_descendants should work with missing data"""
        #tree and envs for testing missing values
        t_str = '(((a:1,b:2):4,(c:3,d:1):2):1,(e:2,f:1):3);'
        env_str = """a   A
b   B
c   D
d   C
e   C
f   D"""
        t = DndParser(t_str, UniFracTreeNode)
        node_index, nodes = index_tree(t)
        env_counts = count_envs(env_str.split('\n'))
    
        count_array, unique_envs, env_to_index, node_to_index = \
            index_envs(env_counts, node_index)    

        branch_lengths = get_branch_lengths(node_index)
        #test just the AB pair
        ab_counts = count_array[:, 0:2]
        bindings = bind_to_array(nodes, ab_counts)
        changes = fitch_descendants(bindings, counter=FitchCounter)
        self.assertEqual(changes, 1)
        orig_result = ab_counts.copy()
        #check that the original Fitch counter gives the expected 
        #incorrect parsimony result
        changes = fitch_descendants(bindings, counter=FitchCounterDense)
        self.assertEqual(changes, 5)
        new_result = ab_counts.copy()
        #check that the two versions fill the array with the same values
        self.assertEqual(orig_result, new_result)

    def test_tip_distances(self):
        """tip_distances should set tips to correct distances."""
        t = self.t
        bl = self.branch_lengths.copy()[:,newaxis]
        bindings = bind_to_parent_array(t, bl)
        tips = []
        for n in t.traverse(self_before=False, self_after=True):
            if not n.Children:
                tips.append(n._leaf_index)
        tip_distances(bl, bindings, tips)
        self.assertEqual(bl, array([5,6,6,6,6,0,0,0,0])[:,newaxis])

    def test_permute_selected_rows(self):
        """permute_selected_rows should switch just the selected rows in a"""
        orig = reshape(arange(8),(4,2))
        new = orig.copy()
        fake_permutation = lambda a: range(a)[::-1] #reverse order
        permute_selected_rows([0,2], orig, new, fake_permutation)
        self.assertEqual(new,  array([[4,5],[2,3],[0,1],[6,7]]))
        #make sure we didn't change orig
        self.assertEqual(orig, reshape(arange(8), (4,2)))

    def test_prep_items_for_jackknife(self):
        """prep_items_for_jackknife should expand indices of repeated counts"""
        a = array([0,1,0,1,2,0,3])
        #          0 1 2 3 4 5 6
        result = prep_items_for_jackknife(a)
        exp = array([1,3,4,4,6,6,6])
        self.assertEqual(result, exp)

    def test_jackknife_bool(self):
        """jackknife_bool should make a vector with right number of nonzeros"""
        fake_permutation = lambda a: range(a)[::-1] #reverse order
        orig_vec = array([0,0,1,0,1,1,0,1,1])
        orig_items = flatnonzero(orig_vec)
        length = len(orig_vec)
        result = jackknife_bool(orig_items, 3, len(orig_vec), fake_permutation)
        self.assertEqual(result, array([0,0,0,0,0,1,0,1,1]))
        #returns the original if trying to take too many
        self.assertEqual(jackknife_bool(orig_items, 20, len(orig_vec)), \
            orig_vec)

    def test_jackknife_int(self):
        """jackknife_int should make a vector with right counts"""
        orig_vec = array([0,2,1,0,3,1])
        orig_items = array([1,1,2,4,4,4,5])
        #                   0 1 2 3 4 5 6
        fake_permutation = lambda a: a == 7 and array([4,6,3,1,2,6,5])
        result = jackknife_int(orig_items, 4, len(orig_vec), fake_permutation)
        self.assertEqual(result, array([0,1,0,0,2,1]))
        #returns the original if trying to take too many
        self.assertEqual(jackknife_int(orig_items, 20, len(orig_vec)), \
            orig_vec)
     
    def test_jackknife_array(self):
        """jackknife_array should make a new array with right counts"""

        orig_vec1 = array([0,2,2,3,1])
        orig_vec2 = array([2,2,1,2,2])
        test_array = array([orig_vec1, orig_vec2])

        # implement this, just doing by eye now
        #perm_fn = fake_permutation
        perm_fn = permutation

        #print "need to test with fake permutation!!"


        new_mat1 = jackknife_array(test_array, 1, axis=1, jackknife_f=jackknife_int, permutation_f=permutation)  
        self.assertEqual(new_mat1.sum(axis=0), [1,1,1,1,1])
        
        new_mat2 = jackknife_array(test_array, 2, axis=1, jackknife_f=jackknife_int, permutation_f=permutation)  
        self.assertEqual(new_mat2.sum(axis=0), [2,2,2,2,2])

        new_mat3 = jackknife_array(test_array, 2, axis=0, jackknife_f=jackknife_int, permutation_f=permutation)  
        self.assertEqual(new_mat3.sum(axis=1), [2,2])

        # test that you get orig mat back if too many
        self.assertEqual(jackknife_array(test_array, 20, axis=1), test_array)

    def test_unifrac(self):
        """unifrac should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(unifrac(bl, m[:,0], m[:,1]), 10/16.0)
        self.assertEqual(unifrac(bl, m[:,0], m[:,2]), 8/13.0)
        self.assertEqual(unifrac(bl, m[:,1], m[:,2]), 8/17.0)

    def test_unnormalized_unifrac(self):
        """unnormalized unifrac should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(unnormalized_unifrac(bl, m[:,0], m[:,1]), 10/17.)
        self.assertEqual(unnormalized_unifrac(bl, m[:,0], m[:,2]), 8/17.)
        self.assertEqual(unnormalized_unifrac(bl, m[:,1], m[:,2]), 8/17.)

    def test_PD(self):
        """PD should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(PD(bl, m[:,0]), 7)
        self.assertEqual(PD(bl, m[:,1]), 15)
        self.assertEqual(PD(bl, m[:,2]), 11)

    def test_G(self):
        """G should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(G(bl, m[:,0], m[:,0]), 0)
        self.assertEqual(G(bl, m[:,0], m[:,1]), 1/16.0)
        self.assertEqual(G(bl, m[:,1], m[:,0]), 9/16.0)

    def test_unnormalized_G(self):
        """unnormalized_G should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        self.assertEqual(unnormalized_G(bl, m[:,0], m[:,0]), 0/17.)
        self.assertEqual(unnormalized_G(bl, m[:,0], m[:,1]), 1/17.)
        self.assertEqual(unnormalized_G(bl, m[:,1], m[:,0]), 9/17.)

    def test_unifrac_matrix(self):
        """unifrac_matrix should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        result = unifrac_matrix(bl, m)
        self.assertEqual(result, array([[0, 10/16.,8/13.],[10/16.,0,8/17.],\
            [8/13.,8/17.,0]]))
        #should work if we tell it the measure is asymmetric
        result = unifrac_matrix(bl, m, is_symmetric=False)
        self.assertEqual(result, array([[0, 10/16.,8/13.],[10/16.,0,8/17.],\
            [8/13.,8/17.,0]]))
        #should work if the measure really is asymmetric
        result = unifrac_matrix(bl,m,metric=unnormalized_G,is_symmetric=False)
        self.assertEqual(result, array([[0, 1/17.,2/17.],[9/17.,0,6/17.],\
            [6/17.,2/17.,0]]))
        #should also match web site calculations
        envs = self.count_array
        bound_indices = bind_to_array(self.nodes, envs)
        bool_descendants(bound_indices)
        result = unifrac_matrix(bl, envs)
        exp = array([[0, 0.6250, 0.6154], [0.6250, 0, \
            0.4706], [0.6154, 0.4707, 0]])
        assert (abs(result - exp)).max() < 0.001

    def test_unifrac_vector(self):
        """unifrac_vector should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        result = unifrac_vector(bl, m)
        self.assertFloatEqual(result, array([10./17,6./17,7./17]))

    def test_PD_vector(self):
        """PD_vector should return correct results for model tree"""
        m = array([[1,0,1],[1,1,0],[0,1,0],[0,0,1],[0,1,0],[0,1,1],[1,1,1],\
            [0,1,1],[1,1,1]])
        bl = self.branch_lengths
        result = PD_vector(bl, m)
        self.assertFloatEqual(result, array([7,15,11]))


    def test_weighted_unifrac_matrix(self):
        """weighted unifrac matrix should return correct results for model tree"""
        #should match web site calculations
        envs = self.count_array
        bound_indices = bind_to_array(self.nodes, envs)
        sum_descendants(bound_indices)
        bl = self.branch_lengths
        tip_indices = [n._leaf_index for n in self.t.tips()]
        result = weighted_unifrac_matrix(bl, envs, tip_indices)
        exp = array([[0, 9.1, 4.5], [9.1, 0, \
            6.4], [4.5, 6.4, 0]])
        assert (abs(result - exp)).max() < 0.001
        #should work with branch length corrections
        td = bl.copy()[:,newaxis]
        tip_bindings = bind_to_parent_array(self.t, td)
        tips = [n._leaf_index for n in self.t.tips()]
        tip_distances(td, tip_bindings, tips)
        result = weighted_unifrac_matrix(bl, envs, tip_indices, bl_correct=True,
            tip_distances=td)
        exp = array([[0, 9.1/11.5, 4.5/(10.5+1./3)], [9.1/11.5, 0, \
            6.4/(11+1./3)], [4.5/(10.5+1./3), 6.4/(11+1./3), 0]])
        assert (abs(result - exp)).max() < 0.001

    def test_weighted_unifrac_vector(self):
        """weighted_unifrac_vector should return correct results for model tree"""
        envs = self.count_array
        bound_indices = bind_to_array(self.nodes, envs)
        sum_descendants(bound_indices)
        bl = self.branch_lengths
        tip_indices = [n._leaf_index for n in self.t.tips()]
        result = weighted_unifrac_vector(bl, envs, tip_indices)
        self.assertFloatEqual(result[0], sum([
            abs(1./2 - 2./8)*1,
            abs(1./2 - 1./8)*2,
            abs(0 - 1./8)*3,
            abs(0 - 3./8)*1,
            abs(0 - 1./8)*1,
            abs(0 - 4./8)*2,
            abs(2./2 - 3./8)*4,
            abs(0. - 5./8)*3.]))

        self.assertFloatEqual(result[1], sum([
            abs(0-.6)*1,
            abs(.2-.2)*2,
            abs(.2-0)*3,
            abs(.6-0)*1,
            abs(0-.2)*1,
            abs(.6-.2)*2,
            abs(.2-.8)*4,
            abs(.8-.2)*3]))

        self.assertFloatEqual(result[2], sum([
            abs(2./3-1./7)*1,
            abs(0-2./7)*2,
            abs(0-1./7)*3,
            abs(0-3./7)*1,
            abs(1./3-0)*1,
            abs(1./3-3./7)*2,
            abs(2./3-3./7)*4,
            abs(1./3-4./7)*3]))
Пример #32
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params={},\
    working_dir='/tmp'):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.
        -  Clearcut only accepts aligned sequences.  Alignment object used to
        handle unaligned sequences.
    
    moltype: a cogent.core.moltype object.
        - NOTE: If moltype = RNA, we must convert to DNA since Clearcut v1.0.8
        gives incorrect results if RNA is passed in.  'U' is treated as an 
        incorrect character and is excluded from distance calculations.

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params['--out'] = get_tmp_filename(working_dir)

    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir=working_dir, SuppressStdout=True,\
                   SuppressStderr=True)
    #Input is an alignment
    app.Parameters['-a'].on()
    #Turn off input as distance matrix
    app.Parameters['-d'].off()

    #If moltype = RNA, we must convert to DNA.
    if moltype == RNA:
        moltype = DNA

    if best_tree:
        app.Parameters['-N'].on()

    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()

    # Setup mapping. Clearcut clips identifiers. We will need to remap them.
    # Clearcut only accepts aligned sequences.  Let Alignment object handle
    # unaligned sequences.
    seq_aln = Alignment(aln, MolType=moltype)
    #get int mapping
    int_map, int_keys = seq_aln.getIntMap()
    #create new Alignment object with int_map
    int_map = Alignment(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_aln, app, result, int_map, int_keys, params)

    return tree