def testAsDefaultUnrootedFalse(self):
     ri = RootingInterpreter(default_as_unrooted=False)
     self.assertEqual(ri._as_rooted, None)
     self.assertEqual(ri._default_as_rooted, True)
 def testAsDefaultRootedTrue(self):
     ri = RootingInterpreter(default_as_rooted=True)
     self.assertEqual(ri._as_rooted, None)
     self.assertEqual(ri._default_as_rooted, True)
 def testAsUnrootedTrue(self):
     ri = RootingInterpreter(as_unrooted=True)
     self.assertEqual(ri._as_rooted, False)
     self.assertEqual(ri._default_as_rooted, False)
 def testAsRootedFalse(self):
     ri = RootingInterpreter(as_rooted=False)
     self.assertEqual(ri._as_rooted, False)
     self.assertEqual(ri._default_as_rooted, False)
 def testDefault(self):
     ri = RootingInterpreter()
     self.assertEqual(ri._as_rooted, None)
     self.assertEqual(ri._default_as_rooted, False)
Exemplo n.º 6
0
def tree_from_token_stream(stream_tokenizer, **kwargs):
    """
    Processes a (SINGLE) TREE statement. Assumes that the input stream is
    located at the beginning of the statement (i.e., the first non-comment
    token should be the opening parenthesis of the tree definition).

    str_to_taxon kwarg (if used) must supply the StrToTaxon interface).
    """
    translate_dict = kwargs.get("translate_dict", None)
    encode_splits = kwargs.get("encode_splits", False)
    rooting_interpreter = kwargs.get("rooting_interpreter",
                                     RootingInterpreter(**kwargs))
    finish_node_func = kwargs.get("finish_node_func", None)
    edge_len_type = kwargs.get("edge_len_type", float)
    taxon_set = kwargs.get("taxon_set", None)
    suppress_internal_node_taxa = kwargs.get("suppress_internal_node_taxa",
                                             False)
    store_tree_weights = kwargs.get("store_tree_weights", False)
    extract_comment_metadata = kwargs.get('extract_comment_metadata', False)
    case_sensitive_taxon_labels = kwargs.get('case_sensitive_taxon_labels',
                                             False)
    allow_repeated_use = kwargs.get('allow_repeated_use', False)
    stream_tokenizer_extract_comment_metadata_setting = stream_tokenizer.extract_comment_metadata
    stream_tokenizer.extract_comment_metadata = extract_comment_metadata
    if taxon_set is None:
        taxon_set = dataobject.TaxonSet()
    tree = dataobject.Tree(taxon_set=taxon_set)

    stream_tokenizer.tree_rooting_comment = None  # clear previous comment
    stream_tokenizer.clear_comment_metadata()
    token = stream_tokenizer.read_next_token()
    if not token:
        return None
    tree.is_rooted = rooting_interpreter.interpret_as_rooted(
        stream_tokenizer.tree_rooting_comment)
    #    if stream_tokenizer.tree_rooting_comment is not None:
    #        tree.is_rooted = rooting_interpreter.interpret_as_rooted(stream_tokenizer.tree_rooting_comment)
    #    elif rooting_interpreter.interpret_as_rooted(stream_tokenizer.tree_rooting_comment):
    #        tree_is_rooted = True

    if store_tree_weights and stream_tokenizer.tree_weight_comment is not None:
        try:
            weight_expression = stream_tokenizer.tree_weight_comment.split(
                ' ')[1]
            tree.weight = eval("/".join(
                ["float(%s)" % cv for cv in weight_expression.split('/')]))
        except IndexError:
            pass
        except ValueError:
            pass
        stream_tokenizer.tree_weight_comment = None

    if encode_splits:
        if len(taxon_set) == 0:
            raise Exception("When encoding splits on a tree as it is being parsed, a "
                + "fully pre-populated TaxonSet object must be specified using the 'taxon_set' keyword " \
                + "to avoid taxon/split bitmask values changing as new Taxon objects are created " \
                + "and added to the TaxonSet.")
        if tree.is_rooted:
            tree.split_edges = {}
        else:
            atb = taxon_set.all_taxa_bitmask()
            d = containers.NormalizedBitmaskDict(mask=atb)
            tree.split_edges = d
        split_map = tree.split_edges

    stt = kwargs.get('str_to_taxon')
    if stt is None:
        stt = StrToTaxon(taxon_set,
                         translate_dict,
                         allow_repeated_use=allow_repeated_use,
                         case_sensitive=case_sensitive_taxon_labels)

    tree.seed_node = dataobject.Node()
    curr_node = tree.seed_node
    if encode_splits:
        curr_node.edge.split_bitmask = 0L

    ### NHX format support ###
    def store_node_comments(active_node):
        if stream_tokenizer.comments:
            active_node.comments.extend(stream_tokenizer.comments)

    def store_comment_metadata(target):
        if extract_comment_metadata:
            if stream_tokenizer.has_comment_metadata():
                comment_metadata = stream_tokenizer.comment_metadata
                try:
                    target.comment_metadata.update(comment_metadata)
                except AttributeError:
                    target.comment_metadata = comment_metadata
                stream_tokenizer.clear_comment_metadata()
            elif not hasattr(target, "comment_metadata"):
                target.comment_metadata = {}

    # store and clear comments
    tree.comments = stream_tokenizer.comments
    stream_tokenizer.clear_comments()
    store_comment_metadata(tree)

    while True:
        if not token or token == ';':
            if curr_node is not tree.seed_node:
                raise stream_tokenizer.data_format_error(
                    "Unbalanced parentheses -- not enough ')' characters found in tree description"
                )
            if encode_splits:
                split_map[curr_node.edge.split_bitmask] = curr_node.edge
            break
        if token == '(':
            if not curr_node.parent_node:
                if curr_node.child_nodes():
                    raise stream_tokenizer.data_format_error(
                        "Unexpected '(' after the tree description.  Expecting a label for the root or a ;"
                    )
            tmp_node = dataobject.Node()
            if encode_splits:
                tmp_node.edge.split_bitmask = 0L
            curr_node.add_child(tmp_node)
            curr_node = tmp_node
            token = stream_tokenizer.read_next_token()
            store_node_comments(curr_node)
            store_comment_metadata(curr_node)
        elif token == ',':
            tmp_node = dataobject.Node()
            if curr_node.is_leaf() and not curr_node.taxon:
                #                 curr_node.taxon = taxon_set.Taxon(oid="UNAMED_" + str(id(curr_node)), label='')
                #                 taxon_set.add(curr_node.taxon)
                raise stream_tokenizer.data_format_error(
                    "Missing taxon specifier in a tree -- found either a '(,' or ',,' construct."
                )
            p = curr_node.parent_node
            if not p:
                raise stream_tokenizer.data_format_error(
                    "Comma found one the 'outside' of a newick tree description"
                )
            if encode_splits:
                tmp_node.edge.split_bitmask = 0L
                e = curr_node.edge
                u = e.split_bitmask
                split_map[u] = e
                p.edge.split_bitmask |= u
            if finish_node_func is not None:
                finish_node_func(curr_node, tree)
            p.add_child(tmp_node)
            curr_node = tmp_node
            token = stream_tokenizer.read_next_token()
            store_node_comments(curr_node)
            store_comment_metadata(curr_node)
        else:
            if token == ')':
                if curr_node.is_leaf() and not curr_node.taxon:
                    raise stream_tokenizer.data_format_error(
                        "Missing taxon specifier in a tree -- found either a '(,' or ',,' construct."
                    )
                p = curr_node.parent_node
                if not p:
                    raise stream_tokenizer.data_format_error(
                        "Unbalanced parentheses -- too many ')' characters found in tree description"
                    )
                if encode_splits:
                    e = curr_node.edge
                    u = e.split_bitmask
                    p.edge.split_bitmask |= u
                    split_map[u] = curr_node.edge
                if finish_node_func is not None:
                    finish_node_func(curr_node, tree)
                curr_node = p
            else:
                is_leaf = curr_node.is_leaf()
                if is_leaf:
                    if curr_node.taxon:
                        raise stream_tokenizer.data_format_error(
                            "Multiple labels found for the same leaf (taxon '%s' and label '%s')"
                            % (str(curr_node.taxon), token))
                    try:
                        t = stt_require_taxon(stt, label=token)
                    except StrToTaxon.MultipleTaxonUseError, e:
                        raise stream_tokenizer.data_format_error(e.msg)
                else:
                    if curr_node.label:
                        raise stream_tokenizer.data_format_error(
                            "Multiple labels found for the same leaf (taxon '%s' and label '%s')"
                            % (curr_node.label, token))
                    if suppress_internal_node_taxa:
                        t = None
                    else:
                        try:
                            t = stt.get_taxon(label=token)
                        except StrToTaxon.MultipleTaxonUseError, e:
                            raise stream_tokenizer.data_format_error(e.msg)