Пример #1
0
def write_newick_ott(out, ott, ott_id2children, root_ott_id, label_style,
                     prune_flags):
    '''`out` is an output stream
    `ott` is an OTT instance used for translating labels
    `ott_id2children` is a dict mapping an OTT ID to the IDs of its children
    `root_ott_id` is the root of the subtree to write.
    `label_style` is a facet of OTULabelStyleEnum
    `prune_flags` is a set strings (flags) or OTTFlagUnion instance or None
    '''
    if prune_flags is not None:
        if not isinstance(prune_flags, OTTFlagUnion):
            prune_flags = ott.convert_flag_string_set_to_union(prune_flags)
        if ott.has_flag_set_key_intersection(root_ott_id, prune_flags):
            return
    else:
        prune_flags = None
    stack = [root_ott_id]
    first_children = set(stack)
    last_children = set(stack)
    out.write('(')
    while stack:
        ott_id = stack.pop()
        if isinstance(ott_id, tuple):
            ott_id = ott_id[0]
        else:
            children = ott_id2children[ott_id]
            if prune_flags is not None:
                children = [
                    i for i in children
                    if not ott.has_flag_set_key_intersection(i, prune_flags)
                ]
            if ott_id not in first_children:
                out.write(',')
            else:
                first_children.remove(ott_id)
            if bool(children):
                out.write('(')
                first_children.add(children[0])
                last_children.add(children[-1])
                stack.append(
                    (ott_id, ))  # a tuple will signal exiting a node...
                stack.extend([i for i in reversed(children)])
                continue
        n = ott.get_label(ott_id, label_style)
        n = quote_newick_name(n)
        out.write(n)
        if ott_id in last_children:
            out.write(')')
            last_children.remove(ott_id)
    out.write(';')
Пример #2
0
def write_newick_ott(out, ott, ott_id2children, root_ott_id, label_style, prune_flags):
    '''`out` is an output stream
    `ott` is an OTT instance used for translating labels
    `ott_id2children` is a dict mapping an OTT ID to the IDs of its children
    `root_ott_id` is the root of the subtree to write.
    `label_style` is a facet of OTULabelStyleEnum
    `prune_flags` is a set strings (flags) or OTTFlagUnion instance or None
    '''
    if prune_flags is not None:
        if not isinstance(prune_flags, OTTFlagUnion):
            prune_flags = ott.convert_flag_string_set_to_union(prune_flags)
        if ott.has_flag_set_key_intersection(root_ott_id, prune_flags):
            return
    else:
        prune_flags = None
    stack = [root_ott_id]
    first_children = set(stack)
    last_children = set(stack)
    out.write('(')
    while stack:
        ott_id = stack.pop()
        if isinstance(ott_id, tuple):
            ott_id = ott_id[0]
        else:
            children = ott_id2children[ott_id]
            if prune_flags is not None:
                children = [i for i in children if not ott.has_flag_set_key_intersection(i, prune_flags)]
            if ott_id not in first_children:
                out.write(',')
            else:
                first_children.remove(ott_id)
            if bool(children):
                out.write('(')
                first_children.add(children[0])
                last_children.add(children[-1])
                stack.append((ott_id,)) # a tuple will signal exiting a node...
                stack.extend([i for i in reversed(children)])
                continue
        n = ott.get_label(ott_id, label_style)
        n = quote_newick_name(n)
        out.write(n)
        if ott_id in last_children:
            out.write(')')
            last_children.remove(ott_id)
    out.write(';')
Пример #3
0
def write_newick_ott(out,
                     ott,
                     ott_id2children,
                     root_ott_id,
                     label_style,
                     prune_flags,
                     create_log_dict=False):
    """`out` is an output stream
    `ott` is an OTT instance used for translating labels
    `ott_id2children` is a dict mapping an OTT ID to the IDs of its children
    `root_ott_id` is the root of the subtree to write.
    `label_style` is a facet of OTULabelStyleEnum
    `prune_flags` is a set strings (flags) or OTTFlagUnion instance or None
    if `create_log_dict` is True, a dict will be returned that contains statistics
        about the pruning.
    """
    # create to_prune_fsi_set a set of flag set indices to prune...
    if prune_flags:
        flags_to_prune_list = list(prune_flags)
        to_prune_fsi_set = ott.convert_flag_string_set_to_union(
            flags_to_prune_list)
    else:
        flags_to_prune_list = []
        to_prune_fsi_set = None
    flags_to_prune_set = frozenset(flags_to_prune_list)
    pfd = {}
    log_dict = None
    if create_log_dict:
        log_dict = {
            'version': ott.version,
            'flags_to_prune': flags_to_prune_list
        }
        fsi_to_str_flag_set = {}
        for k, v in dict(ott.flag_set_id_to_flag_set).items():
            fsi_to_str_flag_set[k] = frozenset(list(v))
        if to_prune_fsi_set:
            pfd = {}
            for f in to_prune_fsi_set.keys():
                s = fsi_to_str_flag_set[f]
                str_flag_intersection = flags_to_prune_set.intersection(s)
                pfd[f] = list(str_flag_intersection)
                pfd[f].sort()
        # log_dict['prune_flags_d'] = d
        # log_dict['pfd'] = pfd
        pruned_dict = {}
    num_tips = 0
    num_pruned_anc_nodes = 0
    num_nodes = 0
    num_monotypic_nodes = 0
    if to_prune_fsi_set and ott.has_flag_set_key_intersection(
            root_ott_id, to_prune_fsi_set):
        # entire taxonomy is pruned off
        if log_dict is not None:
            fsi = ott.get_flag_set_key(root_ott_id)
            pruned_dict[fsi] = {'': [root_ott_id]}
        num_pruned_anc_nodes += 1
    else:
        stack = [root_ott_id]
        first_children = set(stack)
        last_children = set()
        while stack:
            ott_id = stack.pop()
            if isinstance(ott_id, tuple):
                ott_id = ott_id[0]
            else:
                num_nodes += 1
                children = ott_id2children[ott_id]
                if to_prune_fsi_set is not None:
                    c = []
                    for child_id in children:
                        if ott.has_flag_set_key_intersection(
                                child_id, to_prune_fsi_set):
                            if log_dict is not None:
                                fsi = ott.get_flag_set_key(child_id)
                                fd = pruned_dict.get(fsi)
                                if fd is None:
                                    pruned_dict[fsi] = {
                                        'anc_ott_id_pruned': [child_id]
                                    }
                                else:
                                    fd['anc_ott_id_pruned'].append(child_id)
                            num_pruned_anc_nodes += 1
                        else:
                            c.append(child_id)
                    children = c
                    nc = len(children)
                    if nc < 2:
                        if nc == 1:
                            num_monotypic_nodes += 1
                        else:
                            num_tips += 1
                if ott_id not in first_children:
                    out.write(',')
                else:
                    first_children.remove(ott_id)
                if bool(children):
                    out.write('(')
                    first_children.add(children[0])
                    last_children.add(children[-1])
                    stack.append(
                        (ott_id, ))  # a tuple will signal exiting a node...
                    stack.extend([i for i in reversed(children)])
                    continue
            n = ott.get_label(ott_id, label_style)
            n = quote_newick_name(n)
            out.write(n)
            if ott_id in last_children:
                out.write(')')
                last_children.remove(ott_id)
        out.write(';')
    if create_log_dict:
        log_dict['pruned'] = {}
        for fsi, obj in pruned_dict.items():
            f = pfd[fsi]
            f.sort()
            obj['flags_causing_prune'] = f
            nk = ','.join(f)
            log_dict['pruned'][nk] = obj
        log_dict['num_tips'] = num_tips
        log_dict['num_pruned_anc_nodes'] = num_pruned_anc_nodes
        log_dict['num_nodes'] = num_nodes
        log_dict['num_non_leaf_nodes'] = num_nodes - num_tips
        log_dict[
            'num_non_leaf_nodes_with_multiple_children'] = num_nodes - num_tips - num_monotypic_nodes
        log_dict['num_monotypic_nodes'] = num_monotypic_nodes
    return log_dict
Пример #4
0
    # read pruned labelled synthesis tree
    ottpattern = re.compile(r"([(,)])(ott)(\d+)")
    mrcapattern = re.compile(r"([(,)])mrcaott(\d+)ott(\d+)")
    pos = 0
    ott = OTT(ott_dir=args.ott_dir)
    # load up the OTT dictionary...
    d = ott.ott_id_to_names
    outfile = codecs.open('ottnamelabelledtree.tre', 'w', encoding='utf-8')
    with open(args.newick_file, 'r') as f:
        newick = f.read()
        for m in re.finditer(ottpattern, newick):
            #print m.group(1),m.group(2),m.group(3)
            ottid = int(m.group(3))
            ottresults = d[ottid]
            ottname = ottresults
            if isinstance(ottresults, tuple):
                ottname = ottresults[0]
            print m.group(3), ottname
            skippedchars = newick[pos:m.start()]
            outfile.write(skippedchars)
            outfile.write(m.group(1))
            if args.keep_ottids:
                label = quote_newick_name('{} ott{}'.format(ottname, ottid))
            else:
                label = quote_newick_name('{}'.format(ottname))
            outfile.write(label)
            pos = m.end()
        outfile.write(newick[pos:])
    f.close()
    outfile.close()
Пример #5
0
def write_newick_ott(out,
                     ott,
                     ott_id2children,
                     root_ott_id,
                     label_style,
                     prune_flags,
                     create_log_dict=False):
    """`out` is an output stream
    `ott` is an OTT instance used for translating labels
    `ott_id2children` is a dict mapping an OTT ID to the IDs of its children
    `root_ott_id` is the root of the subtree to write.
    `label_style` is a facet of OTULabelStyleEnum
    `prune_flags` is a set strings (flags) or OTTFlagUnion instance or None
    if `create_log_dict` is True, a dict will be returned that contains statistics
        about the pruning.
    """
    # create to_prune_fsi_set a set of flag set indices to prune...
    if prune_flags:
        flags_to_prune_list = list(prune_flags)
        to_prune_fsi_set = ott.convert_flag_string_set_to_union(flags_to_prune_list)
    else:
        flags_to_prune_list = []
        to_prune_fsi_set = None
    flags_to_prune_set = frozenset(flags_to_prune_list)
    pfd = {}
    log_dict = None
    if create_log_dict:
        log_dict = {'version': ott.version, 'flags_to_prune': flags_to_prune_list}
        fsi_to_str_flag_set = {}
        for k, v in dict(ott.flag_set_id_to_flag_set).items():
            fsi_to_str_flag_set[k] = frozenset(list(v))
        if to_prune_fsi_set:
            pfd = {}
            for f in to_prune_fsi_set.keys():
                s = fsi_to_str_flag_set[f]
                str_flag_intersection = flags_to_prune_set.intersection(s)
                pfd[f] = list(str_flag_intersection)
                pfd[f].sort()
        # log_dict['prune_flags_d'] = d
        # log_dict['pfd'] = pfd
        pruned_dict = {}
    num_tips = 0
    num_pruned_anc_nodes = 0
    num_nodes = 0
    num_monotypic_nodes = 0
    if to_prune_fsi_set and ott.has_flag_set_key_intersection(root_ott_id, to_prune_fsi_set):
        # entire taxonomy is pruned off
        if log_dict is not None:
            fsi = ott.get_flag_set_key(root_ott_id)
            pruned_dict[fsi] = {'': [root_ott_id]}
        num_pruned_anc_nodes += 1
    else:
        stack = [root_ott_id]
        first_children = set(stack)
        last_children = set()
        while stack:
            ott_id = stack.pop()
            if isinstance(ott_id, tuple):
                ott_id = ott_id[0]
            else:
                num_nodes += 1
                children = ott_id2children[ott_id]
                if to_prune_fsi_set is not None:
                    c = []
                    for child_id in children:
                        if ott.has_flag_set_key_intersection(child_id, to_prune_fsi_set):
                            if log_dict is not None:
                                fsi = ott.get_flag_set_key(child_id)
                                fd = pruned_dict.get(fsi)
                                if fd is None:
                                    pruned_dict[fsi] = {'anc_ott_id_pruned': [child_id]}
                                else:
                                    fd['anc_ott_id_pruned'].append(child_id)
                            num_pruned_anc_nodes += 1
                        else:
                            c.append(child_id)
                    children = c
                    nc = len(children)
                    if nc < 2:
                        if nc == 1:
                            num_monotypic_nodes += 1
                        else:
                            num_tips += 1
                if ott_id not in first_children:
                    out.write(',')
                else:
                    first_children.remove(ott_id)
                if bool(children):
                    out.write('(')
                    first_children.add(children[0])
                    last_children.add(children[-1])
                    stack.append((ott_id,))  # a tuple will signal exiting a node...
                    stack.extend([i for i in reversed(children)])
                    continue
            n = ott.get_label(ott_id, label_style)
            n = quote_newick_name(n)
            out.write(n)
            if ott_id in last_children:
                out.write(')')
                last_children.remove(ott_id)
        out.write(';')
    if create_log_dict:
        log_dict['pruned'] = {}
        for fsi, obj in pruned_dict.items():
            f = pfd[fsi]
            f.sort()
            obj['flags_causing_prune'] = f
            nk = ','.join(f)
            log_dict['pruned'][nk] = obj
        log_dict['num_tips'] = num_tips
        log_dict['num_pruned_anc_nodes'] = num_pruned_anc_nodes
        log_dict['num_nodes'] = num_nodes
        log_dict['num_non_leaf_nodes'] = num_nodes - num_tips
        log_dict['num_non_leaf_nodes_with_multiple_children'] = num_nodes - num_tips - num_monotypic_nodes
        log_dict['num_monotypic_nodes'] = num_monotypic_nodes
    return log_dict