Exemplo n.º 1
0
def CreatePhyloGeneticTree(inputfile, outputfile, size):
    f = open(inputfile, "r")
    data = f.readlines()[0]
    f.close()
    tree = Tree(data)
    tree.set_outgroup(tree.get_midpoint_outgroup())
    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = False
    ts.show_branch_support = False
    ts.optimal_scale_level = "mid"
    t = tree.render(str(outputfile), w=size, units="px", tree_style=None)
Exemplo n.º 2
0
def draw_ete3_tree(organism, snplist, tree_file_name, config, c):
	'''Draws a phylogenetic tree using ETE3

	Keyword arguments:
	organism -- the organism of which to make a tree
	snplist -- a list of the SNP names, positions and state
	file_name -- the name of the out-file _tree.pdf will be added

	'''
	newick = tree_to_newick(organism, config, c)
	tree = Tree(newick, format=1)
	tree_depth = int(tree.get_distance(tree.get_farthest_leaf()[0]))
	for n in tree.traverse():
		# Nodes are set to red colour
		nstyle = NodeStyle()
		nstyle["fgcolor"] = "#BE0508"
		nstyle["size"] = 10
		nstyle["vt_line_color"] = "#000000"
		nstyle["hz_line_color"] = "#000000"
		nstyle["vt_line_type"] = 0
		nstyle["hz_line_type"] = 0
		nstyle["vt_line_width"] = 2
		nstyle["hz_line_width"] = 2
		## ['B.3', 'T', 'C', 'A']
		for snp in snplist.keys():
			if n.name == snp and snplist[snp] == 0:
				# If the SNP is missing due to a gap, make it grey
				nstyle["fgcolor"] = "#DDDDDD"
				nstyle["size"] = 10
				nstyle["vt_line_color"] = "#DDDDDD"
				nstyle["hz_line_color"] = "#DDDDDD"
				nstyle["vt_line_type"] = 1
				nstyle["hz_line_type"] = 1
			elif n.name == snp and snplist[snp] == 1:
				nstyle["fgcolor"] = "#99FF66"
				nstyle["size"] = 15
				nstyle["vt_line_color"] = "#000000"
				nstyle["hz_line_color"] = "#000000"
				nstyle["vt_line_type"] = 0
				nstyle["hz_line_type"] = 0

		n.set_style(nstyle)
	ts = TreeStyle()
	ts.show_leaf_name = False  # Do not print(leaf names, they are added in layout)
	ts.show_scale = False  # Do not show the scale
	ts.layout_fn = self.CanSNPer_tree_layout  # Use the custom layout
	ts.optimal_scale_level = 'full'  # Fully expand the branches of the tree
	if config["dev"]:
		print("#[DEV] Tree file: %s" % tree_file_name)
	tree.render(tree_file_name, tree_style=ts, width=tree_depth * 500)
Exemplo n.º 3
0
    return mappedCols


## ETE3 TREE-VIZ FUNCTIONS ##

# basic tree style
tree_style = TreeStyle()
tree_style.show_leaf_name = False
tree_style.show_branch_length = False
tree_style.draw_guiding_lines = True
tree_style.complete_branch_lines_when_necessary = True

# make tree grow upward
tree_style.rotation = 270
# and make it appear ultrametric (which it is!)
tree_style.optimal_scale_level = "full"

# internal node style
nstyle = NodeStyle()
nstyle["fgcolor"] = "black"
nstyle["size"] = 0

# terminal node style
nstyle_L = NodeStyle()
nstyle["fgcolor"] = "black"
nstyle["size"] = 0


### Draw a tree with nodes color-coded and labeled by trait value
def traitTree(traits, mapper, outDir):
    ### Take dict of traits and [R,G,B]-returning function
def bub_tree(tree, fasta, outfile1, root, types, c_dict, show, size, colours,
             field1, field2, scale, multiplier, dna):
    """
    :param tree: tree object from ete
    :param fasta: the fasta file used to make the tree
    :param outfile1: outfile suffix
    :param root: sequence name to use as root
    :param types: tree type: circular (c) or rectangle (r)
    :param c_dict: dictionary mapping colour to time point (from col_map)
    :param show: show the tree in a gui (y/n)
    :param size: scale the terminal nodes by frequency information (y/n)
    :param colours: if using a matched fasta file, colour the sequence by charge/IUPAC
    :param field1: the field that contains the size/frequency value
    :param field2: the field that contains the size/frequency value
    :param scale: how much to scale the x axis
    :param multiplier
    :param dna true/false, is sequence a DNA sequence?
    :param t_list list of time points
    :return: None, outputs svg/pdf image of the tree
    """

    if multiplier is None:
        mult = 500
    else:
        mult = multiplier

    if dna:
        dna_prot = 'dna'
        bg_c = {
            'A': 'green',
            'C': 'blue',
            'G': 'black',
            'T': 'red',
            '-': 'grey',
            'X': 'white'
        }

        fg_c = {
            'A': 'black',
            'C': 'black',
            'G': 'black',
            'T': 'black',
            '-': 'black',
            'X': 'white'
        }
    else:
        dna_prot = 'aa'
        bg_c = {
            'K': '#145AFF',
            'R': '#145AFF',
            'H': '#8282D2',
            'E': '#E60A0A',
            'D': '#E60A0A',
            'N': '#00DCDC',
            'Q': '#00DCDC',
            'S': '#FA9600',
            'T': '#FA9600',
            'L': '#0F820F',
            'I': '#0F820F',
            'V': '#0F820F',
            'Y': '#3232AA',
            'F': '#3232AA',
            'W': '#B45AB4',
            'C': '#E6E600',
            'M': '#E6E600',
            'A': '#C8C8C8',
            'G': '#EBEBEB',
            'P': '#DC9682',
            '-': 'grey',
            'X': 'white'
        }

        fg_c = {
            'K': 'black',
            'R': 'black',
            'H': 'black',
            'E': 'black',
            'D': 'black',
            'N': 'black',
            'Q': 'black',
            'S': 'black',
            'T': 'black',
            'L': 'black',
            'I': 'black',
            'V': 'black',
            'Y': 'black',
            'F': 'black',
            'W': 'black',
            'C': 'black',
            'M': 'black',
            'A': 'black',
            'G': 'black',
            'P': 'black',
            '-': 'grey',
            'X': 'white'
        }

    if colours == 3:
        bg_c = None
        fg_c = None

    # outfile3 = str(outfile1.replace(".svg", ".nwk"))

    tstyle = TreeStyle()
    tstyle.force_topology = False
    tstyle.mode = types
    tstyle.scale = scale
    tstyle.min_leaf_separation = 0
    tstyle.optimal_scale_level = 'full'  # 'mid'
    # tstyle.complete_branch_lines_when_necessary = False
    if types == 'c':
        tstyle.root_opening_factor = 0.25

    tstyle.draw_guiding_lines = False
    tstyle.guiding_lines_color = 'slateblue'
    tstyle.show_leaf_name = False
    tstyle.allow_face_overlap = True
    tstyle.show_branch_length = False
    tstyle.show_branch_support = False
    TreeNode(format=0, support=True)
    # tnode = TreeNode()

    if root is not None:
        tree.set_outgroup(root)
    # else:
    #     r = tnode.get_midpoint_outgroup()
    #     print("r", r)
    #     tree.set_outgroup(r)
    time_col = []
    for node in tree.traverse():
        # node.ladderize()
        if node.is_leaf() is True:
            try:
                name = node.name.split("_")
                time = name[field2]
                kind = name[3]
                # print(name)
            except:
                time = 'zero'
                name = node.name
                print("Incorrect name format for ", node.name)

            if size is True:
                try:
                    s = 20 + float(name[field1]) * mult
                except:
                    s = 20
                    print("No frequency information for ", node.name)
            else:
                s = 20

            colour = c_dict[time]
            time_col.append((time, colour))
            nstyle = NodeStyle()
            nstyle["fgcolor"] = colour
            nstyle["size"] = s
            nstyle["hz_line_width"] = 10
            nstyle["vt_line_width"] = 10
            nstyle["hz_line_color"] = colour
            nstyle["vt_line_color"] = 'black'
            nstyle["hz_line_type"] = 0
            nstyle["vt_line_type"] = 0
            node.set_style(nstyle)

            if root is not None and node.name == root:  # place holder in case you want to do something with the root leaf
                print('root is ', node.name)
                # nstyle["shape"] = "square"
                # nstyle["fgcolor"] = "black"
                # nstyle["size"] = s
                # nstyle["shape"] = "circle"
                # node.set_style(nstyle)

            else:
                nstyle["shape"] = "circle"
                node.set_style(nstyle)

            if fasta is not None:
                seq = fasta[str(node.name)]
                seqFace = SequenceFace(seq,
                                       seqtype=dna_prot,
                                       fsize=10,
                                       fg_colors=fg_c,
                                       bg_colors=bg_c,
                                       codon=None,
                                       col_w=40,
                                       alt_col_w=3,
                                       special_col=None,
                                       interactive=True)
                # seqFace = SeqMotifFace(seq=seq, motifs=None, seqtype=dna_prot, gap_format=' ', seq_format='()', scale_factor=20,
                #              height=20, width=50, fgcolor='white', bgcolor='grey', gapcolor='white', )
                # seqFace = SeqMotifFace(seq, seq_format="seq", fgcolor=fg_c, bgcolor=bg_c) #interactive=True

                (tree & node.name).add_face(seqFace, 0, "aligned")

        else:
            nstyle = NodeStyle()
            nstyle["size"] = 0.1
            nstyle["hz_line_width"] = 10
            nstyle["vt_line_width"] = 10
            node.set_style(nstyle)
            continue
    tree.ladderize()
    # tnode.ladderize()
    legendkey = sorted(set(time_col))
    legendkey = [(tp, col) for tp, col in legendkey]
    # legendkey.insert(0, ('Root', 'black'))
    legendkey.append(('', 'white'))

    for tm, clr in legendkey:
        tstyle.legend.add_face(faces.CircleFace(30, clr), column=0)
        tstyle.legend.add_face(faces.TextFace('\t' + tm,
                                              ftype='Arial',
                                              fsize=60,
                                              fgcolor='black',
                                              tight_text=True),
                               column=1)
    if show is True:
        tree.show(tree_style=tstyle)

    tree.render(outfile1, dpi=600, tree_style=tstyle)
Exemplo n.º 5
0
    if n.name in tree_weights:
        n.add_features(weight=np.float(tree_weights[n.name]))
    else:
        n.add_features(weight=np.float(0))

# Checking order
for n in test.traverse():
    print(n.weight, n.name)  # .get_leaf_names()

# Create an empty TreeStyle
ts = TreeStyle()

# Set our custom layout function
ts.layout_fn = layout

# Draw a tree
ts.mode = "r"

# We will add node names manually
ts.show_leaf_name = False
# Show branch data
ts.show_branch_length = False
ts.show_branch_support = False
ts.optimal_scale_level = True
ts.aligned_table_style = True
# ts.complete_branch_lines_when_necessary = True

# test.render("circle_map.genomic.pdf", w=10000, dpi=1000, tree_style=ts)
# test.render("circle_map.up_families.M-P.pdf", w=10000, dpi=1000, tree_style=ts)
test.render("circle_map.pdf", w=10000, dpi=1000, tree_style=ts)
Exemplo n.º 6
0
    L = len(seq_record)  # length of alignment (nt)

T = Tree(infile_tree, format=1)
T.set_outgroup("germline")
repair_multifurcations(T, L)
# delete_long_branches(T, 0.2)
T.ladderize()

# T.dist = 0.0 # set germline distance to 0

# T.write(format=1, outfile=tree_file_name+".multifurc")

ts = TreeStyle()
# ts.mode = "c"
ts.scale = 500
ts.optimal_scale_level = "full"
# ts.arc_start = 180 # -180
# ts.arc_span = 180 # 359
ts.show_leaf_name = False
ts.show_branch_length = False
ts.show_branch_support = False
# ts.root_opening_factor = 0.75
ts.draw_guiding_lines = False
ts.margin_left = 50
ts.margin_right = 50
ts.margin_top = 50
ts.margin_bottom = 50
ts.rotation = 0

path_to_sequence_string_uid_to_isotype_map = "/Users/lime/Dropbox/quake/Bcell/selection/figures/treePlots/v2/Bcell_flu_high_res.sequences.isotypeDict.V6_Full.csv"
sequence_string_uid_to_isotype = {}
Exemplo n.º 7
0
        new_l['unsat'] += l[col]

new_l['genus'] = [get_genus(i) for i in new_l['*Tax ID']]
lookup = {
    r['genus']: dict(r)
    for _, r in new_l.groupby('genus').mean().reset_index().iterrows()
}

hug_tree = Tree(HUG_TREE)

shared_genus = set(hug_tree.get_leaf_names()).intersection(new_l['genus'])
hug_tree.prune(shared_genus)

ts = TreeStyle()
ts.show_leaf_name = False
ts.optimal_scale_level = 'full'
ts.min_leaf_separation = 2
ts.mode = "c"

for n in hug_tree.traverse():
    nstyle = NodeStyle()
    if n.is_leaf():
        r = logt(lookup[n.name]['br'])
        g = lookup[n.name]['unsat']
        b = lookup[n.name]['long']
        bright_color = '#{0:02x}{1:02x}{2:02x}'.format(int(r * 2.55),
                                                       int(g * 2.55),
                                                       int(b * 2.55))
        color = '#{0:02x}{1:02x}{2:02x}'.format(
            int(r) + 155,
            int(g) + 155,
def plot_phylogeny(
    data: RawData,
    out: str = 'phylogeny.pdf',
    live_color: str = 'green',
    dead_color: str = 'black',
    ignore_color: str = 'lightgray',
    time_range: Tuple[float, float] = (0, 1)
) -> Tuple[TreeNode, pd.DataFrame]:
    '''Plot phylogenetic tree from an experiment.

    Args:
        data: The simulation data.
        out: Path to the output file. File type will be inferred from
            the file name.
        live_color: Color for nodes representing cells that survive
            until division.
        dead_color: Color for nodes representing cells that die.
        ignore_color: Color for nodes outside the time range considered.
        time_range: Tuple specifying the range of times to consider.
            Range values specified as fractions of the final
            timepointpoint.
    '''
    agent_ids: Set[str] = set()
    dead_ids: Set[str] = set()
    in_time_range_ids: Set[str] = set()
    end_time = max(data.keys())
    for time, time_data in data.items():
        agents_data = get_in(time_data, AGENTS_PATH)
        assert agents_data is not None
        agent_ids |= set(agents_data.keys())

        if time_range[0] * end_time <= time <= time_range[1] * end_time:
            in_time_range_ids |= set(agents_data.keys())
            for agent_id, agent_data in agents_data.items():
                if get_in(agent_data, PATH_TO_DEAD, False):
                    dead_ids.add(agent_id)

    trees = make_ete_trees(agent_ids)
    assert len(trees) == 1
    tree = trees[0]

    # Set style for overall figure
    tstyle = TreeStyle()
    tstyle.show_scale = False
    tstyle.show_leaf_name = False
    tstyle.scale = None
    tstyle.optimal_scale_level = 'full'  # Avoid artificial branches
    tstyle.mode = 'c'
    legend = {
        'Die': dead_color,
        'Survive': live_color,
        'Divided Before Antibiotics Appeared': ignore_color,
    }
    for label, color in legend.items():
        tstyle.legend.add_face(CircleFace(5, color), column=0)
        tstyle.legend.add_face(TextFace(' ' + label, ftype=FONT), column=1)

    # Set styles for each node
    for node in tree.traverse():
        nstyle = NodeStyle()
        nstyle['size'] = 5
        nstyle['vt_line_width'] = 1
        nstyle['hz_line_width'] = 1
        if node.name in in_time_range_ids:
            if node.name in dead_ids:
                nstyle['fgcolor'] = dead_color
            else:
                nstyle['fgcolor'] = live_color
        else:
            nstyle['fgcolor'] = ignore_color
        node.set_style(nstyle)
    tree.render(out, tree_style=tstyle, w=400)
    survive_col = []
    agents_col = []
    for agent in in_time_range_ids:
        agents_col.append(agent)
        survive_col.append(0 if agent in dead_ids else 1)
    df = pd.DataFrame({'agents': agents_col, 'survival': survive_col})
    return tree, df