Exemplo n.º 1
0
def _get_motif_tree(tree, data, circle=True, vmin=None, vmax=None):
    try:
        from ete3 import Tree, NodeStyle, TreeStyle
    except ImportError:
        print("Please install ete3 to use this functionality")
        sys.exit(1)

    t = Tree(tree)

    # Determine cutoff for color scale
    if not (vmin and vmax):
        for i in range(90, 101):
            minmax = np.percentile(data.values, i)
            if minmax > 0:
                break
    if not vmin:
        vmin = -minmax
    if not vmax:
        vmax = minmax

    norm = Normalize(vmin=vmin, vmax=vmax, clip=True)
    mapper = cm.ScalarMappable(norm=norm, cmap="RdBu_r")

    m = 25 / data.values.max()

    for node in t.traverse("levelorder"):
        val = data[[l.name for l in node.get_leaves()]].values.mean()
        style = NodeStyle()
        style["size"] = 0

        style["hz_line_color"] = to_hex(mapper.to_rgba(val))
        style["vt_line_color"] = to_hex(mapper.to_rgba(val))

        v = max(np.abs(m * val), 5)
        style["vt_line_width"] = v
        style["hz_line_width"] = v

        node.set_style(style)

    ts = TreeStyle()

    ts.layout_fn = _tree_layout
    ts.show_leaf_name = False
    ts.show_scale = False
    ts.branch_vertical_margin = 10

    if circle:
        ts.mode = "c"
        ts.arc_start = 180  # 0 degrees = 3 o'clock
        ts.arc_span = 180

    return t, ts
Exemplo n.º 2
0
def _get_motif_tree(tree, data, circle=True, vmin=None, vmax=None):
    try:
        from ete3 import Tree, NodeStyle, TreeStyle
    except ImportError:
        print("Please install ete3 to use this functionality")
        sys.exit(1)

    t = Tree(tree)
    
    # Determine cutoff for color scale
    if not(vmin and vmax):
        for i in range(90, 101):
            minmax = np.percentile(data.values, i)
            if minmax > 0:
                break
    if not vmin:
        vmin = -minmax
    if not vmax:
        vmax = minmax
    
    norm = Normalize(vmin=vmin, vmax=vmax, clip=True)
    mapper = cm.ScalarMappable(norm=norm, cmap="RdBu_r")
    
    m = 25 / data.values.max()
    
    for node in t.traverse("levelorder"):
        val = data[[l.name for l in node.get_leaves()]].values.mean()
        style = NodeStyle()
        style["size"] = 0
        
        style["hz_line_color"] = to_hex(mapper.to_rgba(val))
        style["vt_line_color"] = to_hex(mapper.to_rgba(val))
        
        v = max(np.abs(m * val), 5)
        style["vt_line_width"] = v
        style["hz_line_width"] = v

        node.set_style(style)
    
    ts = TreeStyle()

    ts.layout_fn = _tree_layout
    ts.show_leaf_name= False
    ts.show_scale = False
    ts.branch_vertical_margin = 10

    if circle:
        ts.mode = "c"
        ts.arc_start = 180 # 0 degrees = 3 o'clock
        ts.arc_span = 180
    
    return t, ts
Exemplo n.º 3
0
def plot_tree(tree, save=False, path=''):

    # style
    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.mode = "c"
    ts.arc_start = -180
    ts.arc_span = 360

    #plot tree
    if save:
        tree.render(file_name=path, tree_style=ts)

    tree.show(tree_style=ts)
Exemplo n.º 4
0
def set_default_TreeStyle(tree, draw_nodes):
    ts = TreeStyle()
    ts.mode = "c"
    ts.arc_start = -180
    ts.arc_span = 180
    ts.root_opening_factor = 1
    ts.show_branch_length = False
    ts.show_branch_support = True
    ts.force_topology = False
    ts.show_leaf_name = False
    ts.min_leaf_separation = 10
    ts.root_opening_factor = 1
    ts.complete_branch_lines_when_necessary = True

    return ts, tree
Exemplo n.º 5
0
def plot_newick(aug_cluster_list, mode='c', db=-1):
    circular_style = TreeStyle()
    circular_style.mode = mode  # draw tree in circular mode
    circular_style.scale = 20
    circular_style.arc_span = 360

    if db > 0:
        newick = convert_to_newick_db(aug_cluster_list,
                                      len(aug_cluster_list) - 1, db)
        circular_style.mode = mode
    else:
        newick = convert_to_newick(aug_cluster_list, len(aug_cluster_list) - 1)
    newick = newick + ':0;'

    t = Tree(newick, format=1)
    t.show(tree_style=circular_style)
    if False:
        t.render('tree3.png', w=100, units='in', tree_style=circular_style)
Exemplo n.º 6
0
    def visualize(self, group1=None, group2=None):
        import matplotlib
        import matplotlib.pyplot as plt

        # annotate the cluster roots with their fractions
        if group1 or group2:
            for i, cluster_root in enumerate(self.cluster_roots):
                # count downstream conditions in the leafs
                datapoints_in_cluster = list(self.nodes2leaves[cluster_root])
                cluster_root.add_face(
                    TextFace(f"Group1: {len(group1)}// Group2:{len(group2)}"),
                    column=0,
                    position="branch-right")

        def _custom_layout(node):
            cmap_cluster = plt.cm.tab10(
                np.linspace(0, 1, len(self.cluster_roots)))
            cmap_treated = plt.cm.viridis(np.linspace(0, 1, 2))

            if node.is_leaf():
                c_cluster = matplotlib.colors.rgb2hex(
                    cmap_cluster[node.clustering, :])
                c_treat = matplotlib.colors.rgb2hex(
                    cmap_treated[node.treated, :])
                node.img_style["fgcolor"] = c_treat
                node.img_style["bgcolor"] = c_cluster

            if 'is_cluster_root' in node.features:
                c_cluster = matplotlib.colors.rgb2hex(
                    cmap_cluster[node.is_cluster_root, :])
                node.img_style["bgcolor"] = c_cluster
                node.img_style["draw_descendants"] = False
                node.add_face(TextFace(f"#data:{node.n_datapoints}"),
                              column=0,
                              position="branch-right")

        ts = TreeStyle()
        ts.mode = "r"
        ts.show_leaf_name = False
        ts.arc_start = -180  # 0 degrees = 3 o'clock
        ts.arc_span = 270
        ts.layout_fn = _custom_layout
        self.tree.show(tree_style=ts)
Exemplo n.º 7
0
from ete3 import Tree, TreeStyle

file = open("gisaid_cov2020_sequences_filtered_8312_2.nw")
tree = file.read()
file.close()

t = Tree(tree)
t.populate(30)
ts = TreeStyle()
ts.show_leaf_name = True
ts.mode = "c"
ts.arc_start = -180
ts.arc_span = 180
t.show(tree_style=ts)
Exemplo n.º 8
0
#define tree style
ts = TreeStyle()
ts.show_leaf_name = True
ts.min_leaf_separation = 1
#ts.show_branch_length = True
ts.show_branch_support = True
ts.scale =  200 # 100 pixels per branch length unit
ts.min_leaf_separation = 0.5
ts.branch_vertical_margin = 0 # 10 pixels between adjacent branches
ts.title.add_face(TextFace(filename, fsize=20), column=0)
circular_style = TreeStyle()
circular_style.mode = "c" # draw tree in circular mode
circular_style.scale = 100
circular_style.arc_start = -90 # 0 degrees = 3 o'clock
circular_style.arc_span = 330

pruned = []
#now for something completely different, tree traverse
for node in t.traverse():
    if node.is_leaf():
        pruned.append(node)
        local = locdata.get(node.name, "no pred")
        leafcolor = colors.get(local, "black")
        node.add_features(color=leafcolor)
        node.img_style["size"] = 0
        node.img_style["fgcolor"] = leafcolor
        node.img_style["vt_line_color"] = leafcolor
        node.img_style["hz_line_color"] = leafcolor
        node.img_style["hz_line_width"] = 2
        #node.name = tag_replace(node.name) #make s
Exemplo n.º 9
0
    def Surviving_Phylogenetic_Tree(self):
        C_IDs = Tumour_Evolution.keys()
        C_Times = []
        ID_lenghts = []
        Phylogeny = []

        C_IDs.remove("P-0:0")
        main_branches = []

        #while loop

        regex_str = 'PC[0-9]+'
        branch = ',[0-9]+'
        entering_flag = True
        Phylo_Struct = []
        i = 0

        while (C_IDs):
            if (entering_flag):
                entering_flag = False
                searchRegex = re.compile(regex_str + '-.*')
                matches = [
                    m.group(0) for l in C_IDs for m in [searchRegex.search(l)]
                    if m
                ]

                for m in matches:
                    C_IDs.remove(m)

                Phylo_Struct.append(matches)
            else:
                regex_str = regex_str + branch
                searchRegex = re.compile(regex_str + '-.*')
                matches = [
                    m.group(0) for l in C_IDs for m in [searchRegex.search(l)]
                    if m
                ]

                for m in matches:
                    C_IDs.remove(m)

                Phylo_Struct.append(matches)

        #print "PS",Phylo_Struct
        # trabnsalte this into a tree
        main_branches = Phylo_Struct[0]

        branch_ids = []
        for clone in main_branches:
            branch_ids.append(clone[0:3])

        Phylogeny = []

        initial_step = Phylo_Struct.pop(0)
        initial_step = natsorted(initial_step, key=lambda y: y.lower())
        #print "I", initial_step

        ID_time = dict()
        regex_str = 'PC[0-9]+'
        for clone in initial_step:
            parent_str = re.search(regex_str, clone)
            ID_time[parent_str.group(0)] = int(
                clone.split("-", 1)[1].split(":")[0])
            Phylogeny.append(
                ("P", clone, 100 -
                 int(clone.split("-", 1)[1].split(":")[0])))  ## Year length

        #print ID_time
        #print "Remaining ", Phylo_Struct

        ## Generating Phylogenetic Tree
        regex_str = 'PC[0-9]+'
        branch = ',[0-9]+'
        for step in Phylo_Struct:
            step = natsorted(step, key=lambda y: y.lower())
            for clone in step:
                clone_str = re.search(regex_str, clone)
                for _parent in initial_step:
                    parent_str = re.search(regex_str, _parent)
                    if (clone_str.group(0) == parent_str.group(0)):
                        main_parent = re.search('PC[0-9]+', clone)
                        clone_year = int(clone.split("-", 1)[1].split(":")[0])
                        Phylogeny.append(
                            (_parent, clone,
                             abs(ID_time[main_parent.group(0)] -
                                 clone_year)))  ## year lengt normalised

            regex_str = regex_str + branch
            initial_step = step

        t = Tree.from_parent_child_table(Phylogeny)  #a=np.unique(t).tolist()
        ts = TreeStyle()
        ts.show_leaf_name = True

        #ts.rotation = 90

        ts.mode = "c"
        ts.arc_start = -180  # 0 degrees = 3 o'clock
        ts.arc_span = 180

        t.show(tree_style=ts)
Exemplo n.º 10
0
    def plot_tree(self,
                  output_filepath,
                  time_series_info=None,
                  tree_style='horizontal_right',
                  leaf_size_map_to=None,
                  show_leaf_names=False,
                  color_branches_by=None,
                  line_width=1,
                  start_color='red',
                  end_color='purple',
                  ladderize=True):
        """
		This method plots the phylogenetic tree as a dendogram. Uses the ete3 package to do this.
		output_filepath - This is the path to the output image file.
		time_series_info - This tells where the information of time-point can be found (if at all) in each of the element IDs. Acceptable values are:
			None - (default) This means there is no time information in the tree
			'start_of_id' - This means the time info is at the very beginning of each element ID, and is separated by a '_'. For example: '45.3_blahblahblah' would have a time point of 45.3.
		tree_style - This give the style of tree plotting, i.e. circular, horizontal, etc. Acceptable values are:
			'half_circle' - Tree is plotted as a half circle, where branches are radiating outward and upward.
			'horizontal_right' - Tree is plotted as a normal dendogram where branching occurs from left to right.
		leaf_size_map_to - If defined (default, None), this will inform what attribute of the node the leaf size maps to. Acceptable values are:
			None - No leaf size info. All leaves the same size
			'count' - leaf size proportional to the count attribute
			'freq' - leaf size proportional to the freq attribute
		show_leaf_names - If True (default, False), then will plot the names of each of the leafs of the tree.
		color_branches_by - If defined (default, None) this will give how the leaf branches will be colored, if at all. Acceptable values are:
			None - Default. No branch coloring
			'time_point' - This means that the branches will be colored according to the 'time_point' attribute of each leaf node. There must be a 'time_point' attribute in the leaf nodes for this to work. So, one should run the 'add_time_info' method before doing this. Alternatively, one can define the 'time_series_info' parameter for this method, and this will be taken care of.
			any other string - This will give the name of any other attribute of the leaf nodes for which to map the value to the leaf branch color.
		then this will instruct to color the different time-points with different colors. This is ignored if 'time_series_info' is False
		line_width - controls the line width. Default, 1
		start_color - This gives the staring color. This should be a string the spells out the name of a color. Most simple colors should be fine. Uses the 'Color' module from 'colour'.
		end_color - This gives the ending color. This should be a string the spells out the name of a color. Most simple colors should be fine. Uses the 'Color' module from 'colour'. The colors used for each unique attribute that gives the colors will span the spectrom from 'start_color' to 'end_color'.
		ladderize - If True (default), this will ladderize the tree. That is it will sort the partitions of the internal nodes based upon number of decendant nodes in the child nodes.
		"""
        if not self.time_points and time_series_info:
            self.add_time_info(time_series_info=time_series_info)
        #set time-point colors, if desired
        if color_branches_by:
            start_color = Color(start_color)
            end_color = Color(end_color)
            if color_branches_by == 'time_point' and time_series_info:
                colors = list(
                    start_color.range_to(end_color, len(self.time_points)))
                hex_colors = [i.hex_l for i in colors]
                tpoint_to_color_dic = {}
                for index, i in enumerate(sorted(self.time_points)):
                    tpoint_to_color_dic[i] = hex_colors[index]
            else:
                #check if attribute already exists in the tree data. if not, add it
                for leaf in self.tree:
                    if not color_branches_by in leaf.features:
                        self.add_attribute_to_leaves(
                            attribute_name=color_branches_by)
                    break
                colors = list(
                    start_color.range_to(
                        end_color,
                        len(self.extra_leaf_features[color_branches_by])))
                hex_colors = [i.hex_l for i in colors]
                attribute_to_color_dic = {}
                for index, i in enumerate(
                        sorted(self.extra_leaf_features[color_branches_by])):
                    attribute_to_color_dic[i] = hex_colors[index]
        #set node styles
        most_dist_leaf, size_to_tree_size_scaler = self.tree.get_farthest_leaf(
        )  #need to scale sizes by the length (divergence) of the tree
        for node in self.tree.traverse():
            node_style = NodeStyle()
            #do stuff to leaf nodes
            if node.is_leaf():
                if color_branches_by:
                    if color_branches_by == 'time_point':
                        color = tpoint_to_color_dic[node.time_point]
                    else:
                        color = attribute_to_color_dic[getattr(
                            node, color_branches_by)]
                else:
                    color = Color('black')
                    color = color.hex_l
                node_style['hz_line_color'] = color
                node_style['vt_line_color'] = color
                if leaf_size_map_to:
                    if leaf_size_map_to == 'count':
                        radius = size_to_tree_size_scaler * 100 * math.log(
                            node.count)
                    elif leaf_size_map_to == 'freq':
                        radius = size_to_tree_size_scaler * 100 * node.freq
                    c = CircleFace(radius=radius, color=color, style='circle')
                    c.opacity = 0.3
                    node.add_face(c, 0, position='branch-right')
                    node_style['size'] = 0
            node_style[
                'hz_line_width'] = size_to_tree_size_scaler * 10 * line_width
            node_style[
                'vt_line_width'] = size_to_tree_size_scaler * 10 * line_width
            node.set_style(node_style)
        #set tree style
        tree_steeze = TreeStyle()
        if tree_style == 'half_circle':
            tree_steeze.mode = 'c'
            tree_steeze.arc_start = -180
            tree_steeze.arc_span = 180
        elif tree_style == 'horizontal_right':
            pass
        if show_leaf_names:
            tree_steeze.show_leaf_name = True
        else:
            tree_steeze.show_leaf_name = False
        self.tree.ladderize()
        self.tree.render(output_filepath,
                         w=700,
                         h=700,
                         units='mm',
                         tree_style=tree_steeze)
        return
Exemplo n.º 11
0
def main(args):
	if args.alignment:
		t = PhyloTree(args.tree, alignment=args.alignment, alg_format='fasta')
	else:
		t = PhyloTree(args.tree)

	if args.highlight_new:
		runs = read_runs(args.highlight_new)

	t.set_outgroup('EM_079422')
	t.ladderize()

	ts = TreeStyle()
	ts.show_leaf_name = False
	ts.show_branch_support = False
	ts.layout_fn = layout

	thick_hz_line = NodeStyle()
	thick_hz_line["hz_line_width"] = 8
	t.set_style(thick_hz_line)
	#t.children[0].set_style(thick_hz_line)
	#t.children[1].set_style(thick_hz_line)

	thick_vt_line = NodeStyle()
	thick_vt_line["vt_line_width"] = 4
	t.set_style(thick_vt_line)

	# header
	if not args.hide_annotations:
		ts.aligned_header.add_face(MyTextFace('Sample identifier', fstyle='Bold', fsize=8, tight_text=False), column = 1)
		ts.aligned_header.add_face(MyTextFace('Prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 2)
		ts.aligned_header.add_face(MyTextFace('Sous-prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 3)
		ts.aligned_header.add_face(MyTextFace('Village', fstyle='Bold', fsize=8, tight_text=False), column = 4)
		ts.aligned_header.add_face(MyTextFace('Sample received', fstyle='Bold', fsize=8, tight_text=False), column = 5)

	if args.positions:
		positions = read_positions(args.positions)

		alg_header = RulerFace(positions,
                              col_width=11,
                              height=0, # set to 0 if dont want to use values
                              kind="stick",
                              hlines = [0],
                              hlines_col = ["white"], # trick to hide hz line
                              )

		ts.aligned_header.add_face(alg_header, 6)

	#legend
	if args.legend:
		legend = {}
		for s in samples.values():
			legend[s['prefec']] = s['prefec__colour']
		for p in sorted(legend.keys()):
			ts.legend.add_face(CircleFace(4, legend[p]), column=0)
			ts.legend.add_face(MyTextFace(p, fsize=6, tight_text=False), column=1)	
		ts.legend_position=1

	if args.circular:
		ts.mode = "c"
		ts.arc_start = -180 # 0 degrees = 3 o'clock
		ts.arc_span = 180

#	t.show(tree_style=ts)
	t.render(args.output, tree_style=ts, w=1024)
Exemplo n.º 12
0
def search(tree, topHit1, topHit2):
    colors = {"hopID": "#a50026", "canID": "#313695"}

    hopCount = 0
    canCount = 0

    for leaf in tree.traverse():
        leaf.img_style['size'] = 0
        if "F" in leaf.name:
            if leaf.is_leaf():
                geneID = 'hopID'
                color = colors.get(geneID, None)
                if color:
                    style1 = NodeStyle()
                    style1["fgcolor"] = "#a50026"
                    style1["size"] = 0
                    style1["vt_line_color"] = "#a50026"
                    style1["hz_line_color"] = "#a50026"
                    style1["vt_line_width"] = 2
                    style1["hz_line_width"] = 2
                    style1["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
                    style1["hz_line_type"] = 0
                    leaf.set_style(style1)
                    if leaf.name in topHit1:
                        hopCount += 1
                        uniprotID1, uniprotDesc1, eValue1, bitScore1 = topHit1[
                            leaf.name]
                        if 'Berberine' in uniprotDesc1:
                            newLeafName = 'H. lupulus, BBE-like, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style1["bgcolor"] = "#e0f3f8"
                            leaf.set_style(style1)
                        elif 'Inactive tetrahydrocannabinolic acid synthase' in uniprotDesc1:
                            newLeafName = 'H. lupulus, Inactive THCAS, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style1["bgcolor"] = "#4393c3"
                            leaf.set_style(style1)
                        elif 'Tetrahydrocannabinolic acid synthase ' in uniprotDesc1:
                            newLeafName = 'H. lupulus, THCAS, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style1["bgcolor"] = "#4393c3"
                            leaf.set_style(style1)
                        elif 'Cannabidiolic acid synthase-like' in uniprotDesc1:
                            newLeafName = 'H. lupulus, CBDAS-like, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style1["bgcolor"] = "#92c5de"
                            leaf.set_style(style1)
                        elif 'Cannabidiolic acid synthase ' in uniprotDesc1:
                            newLeafName = 'H. lupulus, CBDAS, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style1["bgcolor"] = "#92c5de"
                            leaf.set_style(style1)
                        else:
                            print(leaf.name, uniprotID1, uniprotDesc1)
        else:
            if leaf.is_leaf():
                geneID = 'canID'
                color = colors.get(geneID, None)
                if color:
                    style2 = NodeStyle()
                    style2["fgcolor"] = "#313695"
                    style2["size"] = 0
                    style2["vt_line_color"] = "#313695"
                    style2["hz_line_color"] = "#313695"
                    style2["vt_line_width"] = 2
                    style2["hz_line_width"] = 2
                    style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
                    style2["hz_line_type"] = 0
                    leaf.set_style(style2)
                    if leaf.name in topHit2:
                        canCount += 1
                        uniprotID2, uniprotDesc2, eValue2, bitScore2 = topHit2[
                            leaf.name]
                        if 'Berberine' in uniprotDesc2:
                            newLeafName = 'C. sativa, BBE-like, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style2["bgcolor"] = "#e0f3f8"
                            leaf.set_style(style2)
                        elif 'Inactive tetrahydrocannabinolic acid synthase' in uniprotDesc2:
                            newLeafName = 'C. sativa, Inactive THCAS, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style2["bgcolor"] = "#4393c3"
                            leaf.set_style(style2)
                        elif 'Tetrahydrocannabinolic acid synthase ' in uniprotDesc2:
                            newLeafName = 'C. sativa, THCAS, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style2["bgcolor"] = "#4393c3"
                            leaf.set_style(style2)
                        elif 'Cannabidiolic acid synthase-like' in uniprotDesc2:
                            newLeafName = 'C. sativa, CBDAS-like, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style2["bgcolor"] = "#92c5de"
                            leaf.set_style(style2)
                        elif 'Cannabidiolic acid synthase ' in uniprotDesc2:
                            newLeafName = 'C. sativa, CBDAS, ' + leaf.name
                            name_face = TextFace(newLeafName,
                                                 fgcolor=color,
                                                 fsize=12)
                            leaf.add_face(name_face,
                                          column=0,
                                          position='branch-right')
                            style2["bgcolor"] = "#92c5de"
                            leaf.set_style(style2)
                        else:
                            print(leaf.name, uniprotID1, uniprotDesc1)

    ts = TreeStyle()
    #ts.rotation = 90
    ts.mode = "c"
    ts.arc_start = -180
    ts.arc_span = 180
    # ts.optimal_scale_level = 'mid'

    ts.branch_vertical_margin = 10
    # ts.scale = 180
    ts.scale = 225
    ts.show_leaf_name = False
    ts.show_scale = False
    tree.render("hop_vs_can_tree_v5.svg", tree_style=ts, w=600)
    tree.render("hop_vs_can_tree_v5.pdf", tree_style=ts, w=600)
Exemplo n.º 13
0
def deepbiome_draw_phylogenetic_tree(
        log,
        network_info,
        path_info,
        num_classes,
        file_name="%%inline",
        img_w=500,
        branch_vertical_margin=20,
        arc_start=0,
        arc_span=360,
        node_name_on=True,
        name_fsize=10,
        tree_weight_on=True,
        tree_weight=None,
        tree_level_list=['Genus', 'Family', 'Order', 'Class', 'Phylum'],
        weight_opacity=0.4,
        weight_max_radios=10,
        phylum_background_color_on=True,
        phylum_color=[],
        phylum_color_legend=False,
        show_covariates=True,
        verbose=True):
    """
    Draw phylogenetic tree

    Parameters
    ----------
    log (logging instance) :
        python logging instance for logging
    network_info (dictionary) :
        python dictionary with network_information
    path_info (dictionary):
        python dictionary with path_information
    num_classes (int):
        number of classes for the network. 0 for regression, 1 for binary classificatin.
    file_name (str):
        name of the figure for save.
        - "*.png", "*.jpg"
        - "%%inline" for notebook inline output.
        default="%%inline"
    img_w (int):
        image width (pt)
        default=500
    branch_vertical_margin (int):
        vertical margin for branch
        default=20
    arc_start (int):
        angle that arc start
        default=0
    arc_span (int):
        total amount of angle for the arc span
        default=360
    node_name_on (boolean):
        show the name of the last leaf node if True
        default=False
    name_fsize (int):
        font size for the name of the last leaf node
        default=10
    tree_weight_on (boolean):
        show the amount and the direction of the weight for each edge in the tree by circle size and color.
        default=True
    tree_weight (ndarray):
        reference tree weights
        default=None
    tree_level_list (list):
        name of each level of the given reference tree weights
        default=['Genus', 'Family', 'Order', 'Class', 'Phylum']
    weight_opacity  (float):
        opacity for weight circle
        default= 0.4
    weight_max_radios (int):
        maximum radios for weight circle
        default= 10
    phylum_background_color_on (boolean):
        show the background color for each phylum based on `phylumn_color`.
        default= True
    phylum_color (list):
        specify the list of background colors for phylum level. If `phylumn_color` is empty, it will arbitrarily assign the color for each phylum.
        default= []
    phylum_color_legend (boolean):
        show the legend for the background colors for phylum level
        default= False
    show_covariates (boolean):
        show the effect of the covariates
        default= True
    verbose (boolean):
        show the log if True
        default=True
    Returns
    -------
    
    Examples
    --------
    Draw phylogenetic tree
    
    deepbiome_draw_phylogenetic_tree(log, network_info, path_info, num_classes, file_name = "%%inline")
    """

    os.environ[
        'QT_QPA_PLATFORM'] = 'offscreen'  # for tree figure (https://github.com/etetoolkit/ete/issues/381)
    reader_class = getattr(readers,
                           network_info['model_info']['reader_class'].strip())
    reader = reader_class(log, path_info, verbose=verbose)
    data_path = path_info['data_info']['data_path']
    try:
        count_path = path_info['data_info']['count_path']
        x_list = np.array(
            pd.read_csv(path_info['data_info']['count_list_path'],
                        header=None).iloc[:, 0])
        x_path = np.array([
            '%s/%s' % (count_path, x_list[fold])
            for fold in range(x_list.shape[0]) if '.csv' in x_list[fold]
        ])
    except:
        x_path = np.array([
            '%s/%s' % (data_path, path_info['data_info']['x_path'])
            for fold in range(1)
        ])

    reader.read_dataset(x_path[0], None, 0)

    network_class = getattr(
        build_network, network_info['model_info']['network_class'].strip())
    network = network_class(network_info,
                            path_info,
                            log,
                            fold=0,
                            num_classes=num_classes,
                            tree_level_list=tree_level_list,
                            is_covariates=reader.is_covariates,
                            covariate_names=reader.covariate_names,
                            verbose=False)

    if len(phylum_color) == 0:
        colors = mcolors.CSS4_COLORS
        colors_name = list(colors.keys())
        if reader.is_covariates and show_covariates:
            phylum_color = np.random.choice(
                colors_name,
                network.phylogenetic_tree_info['Phylum_with_covariates'].
                unique().shape[0])
        else:
            phylum_color = np.random.choice(
                colors_name,
                network.phylogenetic_tree_info['Phylum'].unique().shape[0])

    basic_st = NodeStyle()
    basic_st['size'] = weight_max_radios * 0.5
    basic_st['shape'] = 'circle'
    basic_st['fgcolor'] = 'black'

    t = Tree()
    root_st = NodeStyle()
    root_st["size"] = 0
    t.set_style(root_st)

    tree_node_dict = {}
    tree_node_dict['root'] = t

    upper_class = 'root'
    lower_class = tree_level_list[-1]
    lower_layer_names = tree_weight[-1].columns.to_list()

    layer_tree_node_dict = {}
    phylum_color_dict = {}
    for j, val in enumerate(lower_layer_names):
        t.add_child(name=val)
        leaf_t = t.get_leaves_by_name(name=val)[0]
        leaf_t.set_style(basic_st)
        layer_tree_node_dict[val] = leaf_t
        if lower_class == 'Phylum' and phylum_background_color_on:
            phylum_st = copy.deepcopy(basic_st)
            phylum_st["bgcolor"] = phylum_color[j]
            phylum_color_dict[val] = phylum_color[j]
            leaf_t.set_style(phylum_st)
    tree_node_dict[lower_class] = layer_tree_node_dict
    upper_class = lower_class
    upper_layer_names = lower_layer_names

    for i in range(len(tree_level_list) - 1):
        lower_class = tree_level_list[-2 - i]
        if upper_class == 'Disease' and show_covariates == False:
            lower_layer_names = network.phylogenetic_tree_info[
                lower_class].unique()
        else:
            lower_layer_names = tree_weight[-i - 1].index.to_list()

        layer_tree_node_dict = {}
        for j, val in enumerate(upper_layer_names):
            parient_t = tree_node_dict[upper_class][val]
            if upper_class == 'Disease':
                child_class = lower_layer_names
            else:
                child_class = network.phylogenetic_tree_info[lower_class][
                    network.phylogenetic_tree_info[upper_class] ==
                    val].unique()

            for k, child_val in enumerate(child_class):
                parient_t.add_child(name=child_val)
                leaf_t = parient_t.get_leaves_by_name(name=child_val)[0]
                if lower_class == 'Phylum' and phylum_background_color_on:
                    phylum_st = copy.deepcopy(basic_st)
                    phylum_st["bgcolor"] = phylum_color[k]
                    phylum_color_dict[child_val] = phylum_color[k]
                    leaf_t.set_style(phylum_st)
                else:
                    leaf_t.set_style(basic_st)
                if tree_weight_on:
                    edge_weights = np.array(tree_weight[-1 - i])
                    edge_weights *= (weight_max_radios / np.max(edge_weights))
                    if upper_class == 'Disease':
                        upper_num = 0
                    else:
                        upper_num = network.phylogenetic_tree_dict[
                            upper_class][val]
                    if upper_class == 'Disease' and reader.is_covariates == True and show_covariates:
                        lower_num = network.phylogenetic_tree_dict[
                            '%s_with_covariates' % lower_class][child_val]
                    else:
                        lower_num = network.phylogenetic_tree_dict[
                            lower_class][child_val]
                    leaf_t.add_features(weight=edge_weights[lower_num,
                                                            upper_num])
                layer_tree_node_dict[child_val] = leaf_t
        tree_node_dict[lower_class] = layer_tree_node_dict
        upper_class = lower_class
        upper_layer_names = lower_layer_names

    def layout(node):
        if "weight" in node.features:
            # Creates a sphere face whose size is proportional to node's
            # feature "weight"
            color = {1: "RoyalBlue", 0: "Red"}[int(node.weight > 0)]
            C = CircleFace(radius=node.weight, color=color, style="circle")
            # Let's make the sphere transparent
            C.opacity = weight_opacity
            # And place as a float face over the tree
            faces.add_face_to_node(C, node, 0, position="float")

        if node_name_on & node.is_leaf():
            # Add node name to laef nodes
            N = AttrFace("name", fsize=name_fsize, fgcolor="black")
            faces.add_face_to_node(N, node, 0)

    ts = TreeStyle()

    ts.show_leaf_name = False
    ts.mode = "c"
    ts.arc_start = arc_start
    ts.arc_span = arc_span
    ts.layout_fn = layout
    ts.branch_vertical_margin = branch_vertical_margin
    ts.show_scale = False

    if phylum_color_legend:
        for phylum_name in np.sort(list(phylum_color_dict.keys())):
            color_name = phylum_color_dict[phylum_name]
            ts.legend.add_face(CircleFace(weight_max_radios * 1, color_name),
                               column=0)
            ts.legend.add_face(TextFace(" %s" % phylum_name, fsize=name_fsize),
                               column=1)

    return t.render(file_name=file_name, w=img_w, tree_style=ts)


# #########################################################################################################################
# if __name__ == "__main__":
#     argdict = argv_parse(sys.argv)
#     try: gpu_memory_fraction = float(argdict['gpu_memory_fraction'][0])
#     except: gpu_memory_fraction = None
#     try: max_queue_size=int(argdict['max_queue_size'][0])
#     except: max_queue_size=10
#     try: workers=int(argdict['workers'][0])
#     except: workers=1
#     try: use_multiprocessing=argdict['use_multiprocessing'][0]=='True'
#     except: use_multiprocessing=False

#     ### Logger ############################################################################################
#     logger = logging_daily.logging_daily(argdict['log_info'][0])
#     logger.reset_logging()
#     log = logger.get_logging()
#     log.setLevel(logging_daily.logging.INFO)

#     log.info('Argument input')
#     for argname, arg in argdict.items():
#         log.info('    {}:{}'.format(argname,arg))

#     ### Configuration #####################################################################################
#     config_data = configuration.Configurator(argdict['path_info'][0], log)
#     config_data.set_config_map(config_data.get_section_map())
#     config_data.print_config_map()

#     config_network = configuration.Configurator(argdict['network_info'][0], log)
#     config_network.set_config_map(config_network.get_section_map())
#     config_network.print_config_map()

#     path_info = config_data.get_config_map()
#     network_info = config_network.get_config_map()
#     test_evaluation, train_evaluation, network = deepbiome_train(log, network_info, path_info, number_of_fold=20)
Exemplo n.º 14
0
def build_colorful_tree(newick, filename=""):
    """
    Note that these will fail if we dont have all the pre-reqs and it is not triival to get them all.
    This stuff is NOT general purpose.
    """
    from ete3 import Tree, TreeStyle, CircleFace, TextFace
    tree = Tree(newick)

    #setup colors and treestyle
    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.mode = "c"
    ts.arc_start = -180  # 0 degrees = 3 o'clock
    ts.force_topology = True
    ts.arc_span = 360

    face = CircleFace(30, "MediumSeaGreen")
    face.margin_top = 1000
    ts.legend.add_face(face, column=0)
    face = TextFace("Normal B-cell", fsize=64)
    face.margin_right = 100
    face.margin_top = 1000
    ts.legend.add_face(face, column=1)

    ts.legend.add_face(CircleFace(30, "SeaGreen"), column=0)
    face = TextFace("Normal B CD19pcell", fsize=64)
    face.margin_right = 100
    ts.legend.add_face(face, column=1)

    ts.legend.add_face(CircleFace(30, "ForestGreen"), column=0)
    face = TextFace("Normal B CD19pCD27pcell", fsize=64)
    face.margin_right = 100
    ts.legend.add_face(face, column=1)

    ts.legend.add_face(CircleFace(30, "Green"), column=0)
    face = TextFace("Normal B CD19pCD27mcell", fsize=64)
    face.margin_right = 100
    ts.legend.add_face(face, column=1)

    ts.legend.add_face(CircleFace(30, "RoyalBlue"), column=0)
    face = TextFace("CLL all-batches", fsize=64)
    face.margin_right = 100
    ts.legend.add_face(face, column=1)

    #draw tree
    from ete3 import NodeStyle
    styles = {}
    styles["normal_B"] = NodeStyle(bgcolor="MediumSeaGreen",
                                   hz_line_color="Black",
                                   vt_line_color="Black")
    styles["NormalBCD19pcell"] = NodeStyle(bgcolor="SeaGreen",
                                           hz_line_color="Black",
                                           vt_line_color="Black")
    styles["NormalBCD19pCD27pcell"] = NodeStyle(bgcolor="ForestGreen",
                                                hz_line_color="Black",
                                                vt_line_color="Black")
    styles["NormalBCD19pCD27mcell"] = NodeStyle(bgcolor="Green",
                                                hz_line_color="Black",
                                                vt_line_color="Black")
    styles["CLL"] = NodeStyle(bgcolor="RoyalBlue",
                              hz_line_color="Black",
                              vt_line_color="Black")

    for node in tree.traverse("postorder"):
        #print node.set_style()
        if len(node.get_leaf_names()) == 1:
            name = node.get_leaf_names()[0]
            if "normal_B" in name:
                node.set_style(styles["normal_B"])
            elif "NormalBCD19pcell" in name:
                node.set_style(styles["NormalBCD19pcell"])

            elif "NormalBCD19pCD27pcell" in name:
                node.set_style(styles["NormalBCD19pCD27pcell"])

            elif "NormalBCD19pCD27mcell" in name:
                node.set_style(styles["NormalBCD19pCD27mcell"])
            else:
                node.set_style(styles["CLL"])
    #lol
    tree.render(filename, w=10, dpi=600, units='in', tree_style=ts)
Exemplo n.º 15
0
def draw_tree(the_tree, colour, back_color, label, out_file, the_scale, extend,
              bootstrap, group_file, grid_options, the_table, pres_abs,
              circular):
    t = Tree(the_tree, quoted_node_names=True)
    #    t.ladderize()
    font_size = 8
    font_type = 'Heveltica'
    font_gap = 3
    font_buffer = 10
    o = t.get_midpoint_outgroup()
    t.set_outgroup(o)
    the_leaves = []
    for leaves in t.iter_leaves():
        the_leaves.append(leaves)
    groups = {}
    num = 0
    # set cutoff value for clades as 1/20th of the distance between the furthest two branches
    # assign nodes to groups
    last_node = None
    ca_list = []
    if not group_file is None:
        style = NodeStyle()
        style['size'] = 0
        style["vt_line_color"] = '#000000'
        style["hz_line_color"] = '#000000'
        style["vt_line_width"] = 1
        style["hz_line_width"] = 1
        for n in t.traverse():
            n.set_style(style)
        with open(group_file) as f:
            group_dict = {}
            for line in f:
                group_dict[line.split()[0]] = line.split()[1]
        for node in the_leaves:
            i = node.name
            for j in group_dict:
                if j in i:
                    if group_dict[j] in groups:
                        groups[group_dict[j]].append(i)
                    else:
                        groups[group_dict[j]] = [i]
        coloured_nodes = []
        for i in groups:
            the_col = i
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            if len(groups[i]) == 1:
                ca = t.search_nodes(name=groups[i][0])[0]
                ca.set_style(style)
                coloured_nodes.append(ca)
            else:
                ca = t.get_common_ancestor(groups[i])
                ca.set_style(style)
                coloured_nodes.append(ca)
                tocolor = []
                for j in ca.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    coloured_nodes.append(x)
                    x.set_style(style)
                    for j in x.children:
                        tocolor.append(j)
            ca_list.append((ca, the_col))
        if back_color:
            # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes
            # colour the common ancestor then add it to the group - continue until only the root node is left
            while len(ca_list) > 1:
                distance = float('inf')
                for i, col1 in ca_list:
                    for j, col2 in ca_list:
                        if not i is j:
                            parent = t.get_common_ancestor(i, j)
                            getit = True
                            the_dist = t.get_distance(i, j)
                            if the_dist <= distance:
                                distance = the_dist
                                the_i = i
                                the_j = j
                                the_i_col = col1
                                the_j_col = col2
                ca_list.remove((the_i, the_i_col))
                ca_list.remove((the_j, the_j_col))
                rgb1 = strtorgb(the_i_col)
                rgb2 = strtorgb(the_j_col)
                rgb3 = ((rgb1[0] + rgb2[0]) / 2, (rgb1[1] + rgb2[1]) / 2,
                        (rgb1[2] + rgb2[2]) / 2)
                new_col = colorstr(rgb3)
                new_node = t.get_common_ancestor(the_i, the_j)
                the_col = new_col
                style = NodeStyle()
                style['size'] = 0
                style["vt_line_color"] = the_col
                style["hz_line_color"] = the_col
                style["vt_line_width"] = 2
                style["hz_line_width"] = 2
                new_node.set_style(style)
                coloured_nodes.append(new_node)
                ca_list.append((new_node, new_col))
                for j in new_node.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    if not x in coloured_nodes:
                        coloured_nodes.append(x)
                        x.set_style(style)
                        for j in x.children:
                            tocolor.append(j)
    elif colour:
        distances = []
        for node1 in the_leaves:
            for node2 in the_leaves:
                if node1 != node2:
                    distances.append(t.get_distance(node1, node2))
        distances.sort()
        clade_cutoff = distances[len(distances) / 4]
        for node in the_leaves:
            i = node.name
            if not last_node is None:
                if t.get_distance(node, last_node) <= clade_cutoff:
                    groups[group_num].append(i)
                else:
                    groups[num] = [num, i]
                    group_num = num
                    num += 1
            else:
                groups[num] = [num, i]
                group_num = num
                num += 1
            last_node = node
        for i in groups:
            num = groups[i][0]
            h = num * 360 / len(groups)
            the_col = hsl_to_str(h, 0.5, 0.5)
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            if len(groups[i]) == 2:
                ca = t.search_nodes(name=groups[i][1])[0]
                ca.set_style(style)
            else:
                ca = t.get_common_ancestor(groups[i][1:])
                ca.set_style(style)
                tocolor = []
                for j in ca.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    x.set_style(style)
                    for j in x.children:
                        tocolor.append(j)
            ca_list.append((ca, h))
        # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes
        # colour the common ancestor then add it to the group - continue until only the root node is left
        while len(ca_list) > 1:
            distance = float('inf')
            got_one = False
            for i, col1 in ca_list:
                for j, col2 in ca_list:
                    if not i is j:
                        parent = t.get_common_ancestor(i, j)
                        getit = True
                        for children in parent.children:
                            if children != i and children != j:
                                getit = False
                                break
                        if getit:
                            the_dist = t.get_distance(i, j)
                            if the_dist <= distance:
                                distance = the_dist
                                the_i = i
                                the_j = j
                                the_i_col = col1
                                the_j_col = col2
                                got_one = True
            if not got_one:
                break
            ca_list.remove((the_i, the_i_col))
            ca_list.remove((the_j, the_j_col))
            new_col = (the_i_col + the_j_col) / 2
            new_node = t.get_common_ancestor(the_i, the_j)
            the_col = hsl_to_str(new_col, 0.5, 0.3)
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            new_node.set_style(style)
            ca_list.append((new_node, new_col))
    # if you just want a black tree
    else:
        style = NodeStyle()
        style['size'] = 0
        style["vt_line_color"] = '#000000'
        style["hz_line_color"] = '#000000'
        style["vt_line_width"] = 1
        style["hz_line_width"] = 1
        for n in t.traverse():
            n.set_style(style)
    color_list = [(240, 163, 255), (0, 117, 220), (153, 63, 0), (76, 0, 92),
                  (25, 25, 25), (0, 92, 49), (43, 206, 72), (255, 204, 153),
                  (128, 128, 128), (148, 255, 181), (143, 124, 0),
                  (157, 204, 0), (194, 0, 136), (0, 51, 128), (255, 164, 5),
                  (255, 168, 187), (66, 102, 0), (255, 0, 16), (94, 241, 242),
                  (0, 153, 143), (224, 255, 102), (116, 10, 255), (153, 0, 0),
                  (255, 255, 128), (255, 255, 0), (255, 80, 5), (0, 0, 0),
                  (50, 50, 50)]
    up_to_colour = {}
    ts = TreeStyle()
    column_list = []
    width_dict = {}
    if not grid_options is None:
        colour_dict = {}
        type_dict = {}
        min_val_dict = {}
        max_val_dict = {}
        leaf_name_dict = {}
        header_count = 0
        the_columns = {}
        if grid_options == 'auto':
            with open(the_table) as f:
                headers = f.readline().rstrip().split('\t')[1:]
                for i in headers:
                    the_columns[i] = [i]
                    type_dict[i] = 'colour'
                    colour_dict[i] = {'empty': '#FFFFFF'}
                    width_dict[i] = 20
                    up_to_colour[i] = 0
                    column_list.append(i)
        else:
            with open(grid_options) as g:
                for line in g:
                    if line.startswith('H'):
                        name, type, width = line.rstrip().split('\t')[1:]
                        if name in the_columns:
                            the_columns[name].append(name + '_' +
                                                     str(header_count))
                        else:
                            the_columns[name] = [
                                name + '_' + str(header_count)
                            ]
                        width = int(width)
                        name = name + '_' + str(header_count)
                        header_count += 1
                        colour_dict[name] = {'empty': '#FFFFFF'}
                        type_dict[name] = type
                        width_dict[name] = width
                        column_list.append(name)
                        up_to_colour[name] = 0
                        min_val_dict[name] = float('inf')
                        max_val_dict[name] = 0
                    elif line.startswith('C'):
                        c_name, c_col = line.rstrip().split('\t')[1:]
                        if not c_col.startswith('#'):
                            c_col = colorstr(map(int, c_col.split(',')))
                        colour_dict[name][c_name] = c_col
        val_dict = {}
        with open(the_table) as f:
            headers = f.readline().rstrip().split('\t')[1:]
            column_no = {}
            for num, i in enumerate(headers):
                if i in the_columns:
                    column_no[num] = i
            for line in f:
                name = line.split('\t')[0]
                leaf_name = None
                for n in t.traverse():
                    if n.is_leaf():
                        if name.split('.')[0] in n.name:
                            leaf_name = n.name
                if leaf_name is None:
                    continue
                else:
                    leaf_name_dict[leaf_name] = name
                vals = line.rstrip().split('\t')[1:]
                if name in val_dict:
                    sys.exit('Duplicate entry found in table.')
                else:
                    val_dict[name] = {}
                for num, val in enumerate(vals):
                    if num in column_no and val != '':
                        for q in the_columns[column_no[num]]:
                            column_name = q
                            if type_dict[column_name] == 'colour':
                                val_dict[name][column_name] = val
                                if not val in colour_dict[column_name]:
                                    colour_dict[column_name][val] = colorstr(
                                        color_list[up_to_colour[column_name] %
                                                   len(color_list)])
                                    up_to_colour[column_name] += 1
                            elif type_dict[column_name] == 'text':
                                val_dict[name][column_name] = val
                            elif type_dict[column_name] == 'colour_scale_date':
                                year, month, day = val.split('-')
                                year, month, day = int(year), int(month), int(
                                    day)
                                the_val = datetime.datetime(
                                    year, month, day, 0, 0,
                                    0) - datetime.datetime(
                                        1970, 1, 1, 0, 0, 0)
                                val_dict[name][
                                    column_name] = the_val.total_seconds()
                                if the_val.total_seconds(
                                ) < min_val_dict[column_name]:
                                    min_val_dict[
                                        column_name] = the_val.total_seconds()
                                if the_val.total_seconds(
                                ) > max_val_dict[column_name]:
                                    max_val_dict[
                                        column_name] = the_val.total_seconds()
                            elif type_dict[column_name] == 'colour_scale':
                                the_val = float(val)
                                val_dict[name][column_name] = the_val
                                if the_val < min_val_dict[column_name]:
                                    min_val_dict[column_name] = the_val
                                if the_val > max_val_dict[column_name]:
                                    max_val_dict[column_name] = the_val
                            else:
                                sys.exit('Unknown column type')
        if not out_file is None:
            new_desc = open(out_file + '.new_desc', 'w')
        else:
            new_desc = open('viridis.new_desc', 'w')
        ts.legend_position = 3
        leg_column = 0
        for num, i in enumerate(column_list):
            nameF = TextFace(font_gap * ' ' + i.rsplit('_', 1)[0] +
                             ' ' * font_buffer,
                             fsize=font_size,
                             ftype=font_type,
                             tight_text=True)
            nameF.rotation = -90
            ts.aligned_header.add_face(nameF, column=num + 1)
            new_desc.write('H\t' + i.rsplit('_', 1)[0] + '\t' + type_dict[i] +
                           '\t' + str(width_dict[i]) + '\n')
            x = num * 200
            if type_dict[i] == 'colour':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2, j in enumerate(colour_dict[i]):
                    new_desc.write('C\t' + j + '\t' + colour_dict[i][j] + '\n')
                    ts.legend.add_face(TextFace(font_gap * ' ' + j +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20,
                                                colour_dict[i][j],
                                                colour_dict[i][j]),
                                       column=leg_column)
                leg_column += 2
            elif type_dict[i] == 'colour_scale':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2 in range(11):
                    y = num2 * 20 + 30
                    val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0
                    h = val / (max_val_dict[i] - min_val_dict[i]) * 270
                    s = 0.5
                    l = 0.5
                    colour = hsl_to_str(h, s, l)
                    ts.legend.add_face(TextFace(font_gap * ' ' + str(val) +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20, colour,
                                                colour),
                                       column=leg_column)
                leg_column += 2
            elif type_dict[i] == 'colour_scale_date':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2 in range(11):
                    y = num2 * 20 + 30
                    val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0
                    h = val / (max_val_dict[i] - min_val_dict[i]) * 360
                    s = 0.5
                    l = 0.5
                    colour = hsl_to_str(h, s, l)
                    days = str(int(val / 60 / 60 / 24)) + ' days'
                    ts.legend.add_face(TextFace(font_gap * ' ' + days +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20, colour,
                                                colour),
                                       column=leg_column)
                leg_column += 2
            for n in t.traverse():
                if n.is_leaf():
                    name = leaf_name_dict[n.name]
                    if i in val_dict[name]:
                        val = val_dict[name][i]
                    else:
                        val = 'empty'
                    if type_dict[i] == 'colour':
                        n.add_face(RectFace(width_dict[i], 20,
                                            colour_dict[i][val],
                                            colour_dict[i][val]),
                                   column=num + 1,
                                   position="aligned")
                    elif type_dict[i] == 'colour_scale' or type_dict[
                            i] == 'colour_scale_date':
                        if val == 'empty':
                            colour = '#FFFFFF'
                        else:
                            h = (val - min_val_dict[i]) / (
                                max_val_dict[i] - min_val_dict[i]) * 360
                            s = 0.5
                            l = 0.5
                            colour = hsl_to_str(h, s, l)
                        n.add_face(RectFace(width_dict[i], 20, colour, colour),
                                   column=num + 1,
                                   position="aligned")
                    elif type_dict[i] == 'text':
                        n.add_face(TextFace(font_gap * ' ' + val +
                                            ' ' * font_buffer,
                                            fsize=font_size,
                                            ftype=font_type,
                                            tight_text=True),
                                   column=num + 1,
                                   position="aligned")
    if not pres_abs is None:
        starting_col = len(column_list) + 1
        subprocess.Popen('makeblastdb -out tempdb -dbtype prot -in ' +
                         pres_abs[0],
                         shell=True).wait()
        folder = pres_abs[1]
        len_dict = {}
        gene_list = []
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, '#FFFFFF', '#FFFFFF'),
                           column=starting_col)
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, "#5ba965", "#5ba965"),
                           column=starting_col)
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, "#cb5b4c", "#cb5b4c"),
                           column=starting_col)
        with open(pres_abs[0]) as f:
            for line in f:
                if line.startswith('>'):
                    name = line.split()[0][1:]
                    gene_list.append(name)
                    len_dict[name] = 0
                    nameF = TextFace(font_gap * ' ' + name + ' ' * font_buffer,
                                     fsize=font_size,
                                     ftype=font_type,
                                     tight_text=True)
                    nameF.rotation = -90
                    ts.aligned_header.add_face(nameF,
                                               column=starting_col +
                                               len(gene_list) - 1)
                else:
                    len_dict[name] += len(line.rstrip())
        min_length = 0.9
        min_ident = 90
        for n in t.iter_leaves():
            the_name = n.name
            if the_name[0] == '"' and the_name[-1] == '"':
                the_name = the_name[1:-1]
            if the_name.endswith('.ref'):
                the_name = the_name[:-4]
            if not os.path.exists(folder + '/' + the_name):
                for q in os.listdir(folder):
                    if q.startswith(the_name):
                        the_name = q
            if not os.path.exists(the_name + '.blast'):
                subprocess.Popen(
                    'blastx -query ' + folder + '/' + the_name +
                    ' -db tempdb -outfmt 6 -num_threads 24 -out ' + the_name +
                    '.blast',
                    shell=True).wait()
            gotit = set()
            with open(the_name + '.blast') as b:
                for line in b:
                    query, subject, ident, length = line.split()[:4]
                    ident = float(ident)
                    length = int(length)
                    if ident >= min_ident and length >= min_length * len_dict[
                            subject]:
                        gotit.add(subject)
            for num, i in enumerate(gene_list):
                if i in gotit:
                    colour = "#5ba965"
                else:
                    colour = "#cb5b4c"
                n.add_face(RectFace(20, 20, colour, colour),
                           column=num + starting_col,
                           position="aligned")
        # for num, i in enumerate(gene_list):
        #     x = (starting_col + num) * 200
        #     svg.writeString(i, x+50, 20, 12)
        #     y = 30
        #     svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#5ba965'), strtorgb('#5ba965'), lt=0)
        #     svg.writeString('present', x + 70, y + 12, 12)
        #     y = 50
        #     svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#cb5b4c'), strtorgb('#cb5b4c'), lt=0)
        #     svg.writeString('absent', x + 70, y + 12, 12)

    # Set these to False if you don't want bootstrap/distance values
    ts.show_branch_length = label
    ts.show_branch_support = bootstrap
    ts.show_leaf_name = False
    for node in t.traverse():
        if node.is_leaf():
            node.add_face(AttrFace("name",
                                   fsize=font_size,
                                   ftype=font_type,
                                   tight_text=True,
                                   fgcolor='black'),
                          column=0,
                          position="aligned")

    ts.margin_left = 20
    ts.margin_right = 100
    ts.margin_top = 20
    ts.margin_bottom = 20
    if extend:
        ts.draw_guiding_lines = True
    ts.scale = the_scale
    if not circular is None:
        ts.mode = "c"
        ts.arc_start = 0
        ts.arc_span = 360
    if out_file is None:
        t.show(tree_style=ts)
    else:
        t.render(out_file, w=210, units='mm', tree_style=ts)
Exemplo n.º 16
0
	else:
		#We're at the root node
		new_node.dist = 0

	cur_node_id = str(current_bud_row.OrgID)

	for idx, new_row in saved_pop_hosts[saved_pop_hosts.ParentID.eq(cur_node_id)].iterrows():	
		build_tree_recursive(new_row, new_node)

	return new_node


root_node_row = saved_pop_hosts[saved_pop_hosts.ParentID == "(none)"].squeeze()
print("Building Tree")
build_tree_recursive(root_node_row, host_phylo)


print("Drawing Tree")
#Some drawing code
ts = TreeStyle()
ts.show_leaf_name = True
ts.mode = "c"
ts.arc_start = -180 # 0 degrees = 3 o'clock
ts.arc_span = 180
host_phylo.render("tree.png", tree_style=ts)

print("Saving Tree")
#Write the Newick Format Tree
host_phylo.write(format=1, outfile="avida_tree.nw")

Exemplo n.º 17
0
ts = TreeStyle()
ts.show_leaf_name = True
ts.show_branch_length = True
ts.show_branch_support = True
t.show(tree_style=ts)


# %%Circular TRee in 180 DEgrees
from ete3 import Tree, TreeStyle
t = Tree()
t.populate(30)
ts = TreeStyle()
ts.show_leaf_name = True
ts.mode = "c"
ts.arc_start = -180 # 0 degrees = 3 o'clock
ts.arc_span = 180
t.show(tree_style=ts)

# %%Circular TRee in 180 DEgrees MYVERSION
from ete3 import Tree, TreeStyle
t = Tree( "(sweetpotato,(hotpepper,(eggplant,(potato,tomato))));" )

ts = TreeStyle()
ts.show_leaf_name = True
ts.mode = "c"
ts.arc_start = -60 # 0 degrees = 3 o'clock
ts.arc_span = 120
t.show(tree_style=ts)


# %%Circular TRee in 180 DEgrees MYVERSION
Exemplo n.º 18
0
def main(args):
    if args.alignment:
        t = PhyloTree(args.tree, alignment=args.alignment, alg_format='fasta')
    else:
        t = PhyloTree(args.tree)

    if args.highlight_new:
        runs = read_runs(args.highlight_new)

    t.set_outgroup('EM_079422')
    t.ladderize()

    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.show_branch_support = False
    ts.layout_fn = layout

    thick_hz_line = NodeStyle()
    thick_hz_line["hz_line_width"] = 8
    t.set_style(thick_hz_line)
    #t.children[0].set_style(thick_hz_line)
    #t.children[1].set_style(thick_hz_line)

    thick_vt_line = NodeStyle()
    thick_vt_line["vt_line_width"] = 4
    t.set_style(thick_vt_line)

    # header
    if not args.hide_annotations:
        ts.aligned_header.add_face(MyTextFace('Sample identifier', fstyle='Bold', fsize=8, tight_text=False), column = 1)
        ts.aligned_header.add_face(MyTextFace('Prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 2)
        ts.aligned_header.add_face(MyTextFace('Sous-prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 3)
        ts.aligned_header.add_face(MyTextFace('Village', fstyle='Bold', fsize=8, tight_text=False), column = 4)
        ts.aligned_header.add_face(MyTextFace('Sample received', fstyle='Bold', fsize=8, tight_text=False), column = 5)

    if args.positions:
        positions = read_positions(args.positions)

        alg_header = RulerFace(positions,
                              col_width=11,
                              height=0, # set to 0 if dont want to use values
                              kind="stick",
                              hlines = [0],
                              hlines_col = ["white"], # trick to hide hz line
                              )

        ts.aligned_header.add_face(alg_header, 6)

    #legend
    if args.legend:
        legend = {}
        for s in list(samples.values()):
            legend[s['prefec']] = s['prefec__colour']
        for p in sorted(legend.keys()):
            ts.legend.add_face(CircleFace(4, legend[p]), column=0)
            ts.legend.add_face(MyTextFace(p, fsize=6, tight_text=False), column=1)    
        ts.legend_position=1

    if args.circular:
        ts.mode = "c"
        ts.arc_start = -180 # 0 degrees = 3 o'clock
        ts.arc_span = 180

#    t.show(tree_style=ts)
    t.render(args.output, tree_style=ts, w=1024)
Exemplo n.º 19
0
GK = pd.DataFrame(kernel.fit_transform(GL['graph'].values))
GK.columns = GK.index = label

# Use 1-K as measure of Distance
DM_GK = DistanceMatrix(1 - GK.values)

#make GK tree
sktree = nj(DM_GK, result_constructor=str)
GK_tree = Tree(sktree)
GK_tree.name = 'AGORA network similarity tree'
# style
ts = TreeStyle()
ts.show_leaf_name = True
ts.mode = "c"
ts.arc_start = -180
ts.arc_span = 360

#plot tree
#GK_tree.render(file_name='/home/acabbia/Documents/Muscle_Model/GSMM-distance/figures/GK_tree_AGORA.png', tree_style=ts)
#GK_tree.show(tree_style=ts)

#%%
####
# MAKE JACCARD TREE
###

# make binary matrices (rxn, mets and gene matrices)
ref_model = cobra.io.read_sbml_model(ref_model_file)
reactions_matrix, metabolite_matrix, gene_matrix = make_binary_mat(
    model_library_folder, ref_model)
Exemplo n.º 20
0
    def plot(self, placement, togjson, outdir, cfg):
        """
        plot a plcement in the tree
        show all pplacer placements and the LCA and HCA node
        as well as the inferred lineage
        """
        from ete3 import NodeStyle, TreeStyle
        from ete3 import CircleFace, TextFace, RectFace

        logging.debug("Plotting trees now")
        # with no X display this needs to be set
        os.environ["QT_QPA_PLATFORM"] = "offscreen"
        info = self.loadInfo(togjson)

        def defaultNodeStyle():
            return NodeStyle()

        nodeStyles = defaultdict(defaultNodeStyle)

        no = 0
        for LCAp, HPAp in zip(placement["LCA"], placement["HPA"]):

            plotpath = os.path.join(outdir, f"tree_{no}.png")

            # make shallow copy
            t = self.t

            LCA = LCAp["node"]
            HPA = HPAp["node"]
            # define basic tree style
            ts = TreeStyle()
            # hide leave names
            ts.show_leaf_name = False
            ts.root_opening_factor = 1
            # circular tree
            ts.mode = "c"
            ts.rotation = 210
            ts.arc_start = 0  # 0 degrees = 3 o'clock
            ts.arc_span = 350

            highlightsize = 80
            nodesize = 10

            # define styles for special nodes
            # at the moment hard coded, but could be accesible for the user

            # LCA style
            LCAstyle = NodeStyle()
            LCAstyle["fgcolor"] = "#33a02c"
            LCAstyle["bgcolor"] = "#b2df8a"
            LCAstyle["size"] = highlightsize

            # HPA style
            HPAstyle = NodeStyle()
            HPAstyle["fgcolor"] = "#1f78b4"
            HPAstyle["bgcolor"] = "#a6cee3"
            HPAstyle["size"] = highlightsize

            # default node
            defaultStyle = NodeStyle()
            defaultStyle["fgcolor"] = "gray"
            defaultStyle["size"] = nodesize

            # add legend
            ts.legend_position = 1
            ts.legend.add_face(CircleFace(40, LCAstyle["fgcolor"]), column=1)
            ts.legend.add_face(TextFace(f"LCA", fsize=50), column=2)
            ts.legend.add_face(CircleFace(40, HPAstyle["fgcolor"]), column=1)
            ts.legend.add_face(TextFace(f"HPA", fsize=50), column=2)
            i = 1
            ts.legend.add_face(TextFace(f"p = {i}", fsize=50), column=1)
            while i > 0:
                temp_face = RectFace(60,
                                     10,
                                     fgcolor=p_to_color(i),
                                     bgcolor=p_to_color(i))
                temp_face.margin_top = -4
                ts.legend.add_face(temp_face, column=1)
                i -= 0.01
            ts.legend.add_face(TextFace(f"p = {cfg['minPlacementLikelyhood']}",
                                        fsize=50),
                               column=1)

            # add highlights for each placed protein
            for n in t.traverse():
                if n.name.startswith("PTHR"):
                    # set color based on posterior prob:
                    x = (info[n.name]["post_prob"] -
                         cfg["minPlacementLikelyhood"]) / (
                             1 - cfg["minPlacementLikelyhood"])
                    # orange to purple gradient from 0 to 1 posterior propability
                    he = p_to_color(x)
                    nodeStyles[he]["bgcolor"] = he
                    # define back color of locations
                    n.set_style(nodeStyles[he])

                elif n.name == LCA:
                    n.set_style(LCAstyle)
                elif n.name == HPA:
                    n.set_style(HPAstyle)
                else:
                    n.set_style(defaultStyle)

            # plot to disk
            _ = t.render(plotpath, w=320, units="mm", tree_style=ts)
            no = no + 1
Exemplo n.º 21
0
def plot_phylo(nw_tree,
               out_name,
               parenthesis_classif=True,
               show_support=False,
               radial_mode=False,
               root=False):

    from ete3 import Tree, AttrFace, TreeStyle, NodeStyle, TextFace
    import orthogroup2phylogeny_best_refseq_uniprot_hity

    ete2_tree = Tree(nw_tree, format=0)
    if root:
        R = ete2_tree.get_midpoint_outgroup()
        # and set it as tree outgroup
        ete2_tree.set_outgroup(R)
    ete2_tree.set_outgroup('Bacillus subtilis')
    ete2_tree.ladderize()

    if parenthesis_classif:
        print('parenthesis_classif!')
        name2classif = {}
        for lf in ete2_tree.iter_leaves():
            print(lf)
            try:
                classif = lf.name.split('_')[-2][0:-1]
                print('classif', classif)
                #lf.name = lf.name.split('(')[0]
                name2classif[lf.name] = classif
            except:
                pass
        classif_list = list(set(name2classif.values()))
        classif2col = dict(
            zip(
                classif_list,
                orthogroup2phylogeny_best_refseq_uniprot_hity.
                get_spaced_colors(len(classif_list))))

    for lf in ete2_tree.iter_leaves():

        #try:
        if parenthesis_classif:
            try:
                col = classif2col[name2classif[lf.name]]
            except:
                col = 'black'
        else:
            col = 'black'
            #print col
            #lf.name = '%s|%s-%s' % (lf.name, accession2name_and_phylum[lf.name][0],accession2name_and_phylum[lf.name][1])

        if radial_mode:
            ff = AttrFace("name", fsize=12, fstyle='italic')
        else:
            ff = AttrFace("name", fsize=12, fstyle='italic')
        #ff.background.color = 'red'
        ff.fgcolor = col

        lf.add_face(ff, column=0)

        if not show_support:
            print('support')
            for n in ete2_tree.traverse():
                print(n.support)
                nstyle = NodeStyle()
                if float(n.support) < 1:
                    nstyle["fgcolor"] = "red"
                    nstyle["size"] = 4
                    n.set_style(nstyle)
                else:
                    nstyle["fgcolor"] = "red"
                    nstyle["size"] = 0
                    n.set_style(nstyle)
        else:
            for n in ete2_tree.traverse():
                nstyle = NodeStyle()
                nstyle["fgcolor"] = "red"
                nstyle["size"] = 0
                n.set_style(nstyle)

        #nameFace = AttrFace(lf.name, fsize=30, fgcolor=phylum2col[accession2name_and_phylum[lf.name][1]])
        #faces.add_face_to_node(nameFace, lf, 0, position="branch-right")
        #
        #nameFace.border.width = 1
        '''
        except:
            col = 'red'
            print col
            lf.name = '%s| %s' % (lf.name, locus2organism[lf.name])

            ff = AttrFace("name", fsize=12)
            #ff.background.color = 'red'
            ff.fgcolor = col

            lf.add_face(ff, column=0)
        '''
        #n = TextFace(lf.name, fgcolor = "black", fsize = 12, fstyle = 'italic')
        #lf.add_face(n, 0)
    '''
    for n in ete2_tree.traverse():
       nstyle = NodeStyle()
       if n.support < 90:
           nstyle["fgcolor"] = "black"
           nstyle["size"] = 4
           n.set_style(nstyle)
       else:
           nstyle["fgcolor"] = "red"
           nstyle["size"] = 0
           n.set_style(nstyle)
    '''
    ts = TreeStyle()
    ts.show_leaf_name = False
    #ts.scale=2000
    #ts.scale=20000
    ts.show_branch_support = show_support

    if radial_mode:
        ts.mode = "c"
        ts.arc_start = -90
        ts.arc_span = 360
    ts.tree_width = 370
    ts.complete_branch_lines_when_necessary = True
    ete2_tree.render(out_name, tree_style=ts, w=900)
Exemplo n.º 22
0
style2["shape"] = "circle"
style2["vt_line_color"] = "#0000aa"
style2["hz_line_color"] = "#0000aa"
style2["vt_line_width"] = 5
style2["hz_line_width"] = 5
style2["vt_line_type"] = 1  # 0 solid, 1 dashed, 2 dotted
style2["hz_line_type"] = 1
for l in t.iter_leaves():
    l.img_style = style2

ts = TreeStyle()
ts.show_leaf_name = True
#ts.rotation = 90
ts.mode = 'c'
ts.arc_start = 180
ts.arc_span = 350
#t.show(tree_style=ts)
#t.show()

t = Tree()
t.populate(8)
style2 = NodeStyle()
style2["fgcolor"] = "darkred"
style2["shape"] = "circle"
style2["vt_line_color"] = "green"
style2["hz_line_color"] = "red"
style2["vt_line_width"] = 5
style2["hz_line_width"] = 5
style2["vt_line_type"] = 1  # 0 solid, 1 dashed, 2 dotted
style2["hz_line_type"] = 1
for l in t.iter_leaves():
Exemplo n.º 23
0
ts = TreeStyle()
ts.show_leaf_name = True
ts.scale = 800

for n in t.traverse():
    if n.is_leaf():
        if n.name in strnr:
            n.img_style["fgcolor"] = "red"
        n.img_style["size"] = 10

t.render("plots/tree_ete.pdf", w=300, tree_style=ts)

t2 = Tree("data/accessory_binary_genes.fa.newick")

ts2 = TreeStyle()
ts2.show_leaf_name = False
ts2.mode = "c"
ts2.arc_start = -180  # 0 degrees = 3 o'clock
ts2.arc_span = 359

for n in t2.traverse():
    if n.is_leaf():
        if "DTU" in n.name:
            print(n.name)
            n.img_style["bgcolor"] = "red"
        if "CEB" in n.name or "ERR" in n.name:
            print(n.name)
            n.img_style["bgcolor"] = "blue"

t2.render("plots/tree_ete2.png", w=1200, tree_style=ts2)
Exemplo n.º 24
0
def generateFigure(PF, sample, rank, input_file, output_base_name, file_type, plot_l1, scaling, output_dpi):

    # Make the ETE3 tree
    try:
        tree = ncbi.get_topology(PF.get_all_tax_ids(sample), rank_limit=rank)

    except:
        logging.getLogger('Tampa').critical("Input format not compatible.")
        exit(1)

    ts = TreeStyle()
    ts.layout_fn = PF.layout
    ts.mode = "c"
    ts.show_leaf_name = False
    ts.show_branch_length = False
    ts.show_branch_support = False
    ts.min_leaf_separation = 10
    ts.arc_span = 360
    #ts.legend.add_face(CircleFace(100, "#1b9e77", label="Predicted"), column=0)
    #ts.legend.add_face(CircleFace(100, '#d95f02', label="True"), column=1)
    # add white space to move the legend closer
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0)

    # add the legend
    legend_fs = 50
    C1 = CircleFace(100, "#1b9e77")
    C1.hz_align = True
    ts.legend.add_face(C1, column=0)
    T1 = TextFace("Predicted", fsize=legend_fs)
    T1.hz_align = True
    ts.legend.add_face(T1, column=0)

    if len(PF.ground_truth_dict) > 0:
        C2 = CircleFace(100, "#d95f02")
        C2.hz_align = True
        ts.legend.add_face(C2, column=1)
        T2 = TextFace("True", fsize=legend_fs)
        T2.hz_align = True
        ts.legend.add_face(T2, column=1)

    T3 = TextFace(f"Tool: {os.path.basename(input_file).split('.')[0]}", fsize=legend_fs)
    T3.hz_align = True
    ts.legend.add_face(T3, column=0)
    ts.allow_face_overlap = False  # this lets me mess a bit with font size and face size without the interaction of the two
    ts.min_leaf_separation = 10
    tree_output_file = f"{output_base_name}_tree_{rank}_{sample}.{file_type}"
    tree.render(tree_output_file, h=5.2, w=5, tree_style=ts, units="in", dpi=output_dpi)


    if plot_l1:

        # if you asked for L1 too, then plot that
        true_abundance_at_rank = []
        predicted_abundance_at_rank = []
        for node in tree.get_leaves():
            if node.rank == rank:
                tax_id = str(node.taxid)
                if tax_id in PF.ground_truth_tax_id_to_percentage:
                    true_abundance_at_rank.append(PF.ground_truth_tax_id_to_percentage[str(node.taxid)] / 100.)
                else:
                    true_abundance_at_rank.append(0)
                if tax_id in PF.profile_tax_id_to_percentage:
                    predicted_abundance_at_rank.append(PF.profile_tax_id_to_percentage[str(node.taxid)] / 100.)
                else:
                    predicted_abundance_at_rank.append(0)

        data = np.zeros((len(true_abundance_at_rank), 2))
        data[:, 0] = np.array(true_abundance_at_rank)
        data[:, 1] = np.array(predicted_abundance_at_rank)

        df = pd.DataFrame(data, columns=['True', 'Predicted'])
        # g = seaborn.FacetGrid(df, height=6)
        ax = seaborn.scatterplot(x='True', y='Predicted', data=df, color='b', s=55)
        eps = 1
        ax.set_aspect('equal')
        max_val = np.max(data) + eps
        ax.set_xlim(-.5, max_val)
        ax.set_ylim(-.5, max_val)
        ax.set_xbound(-.5, max_val)
        ax.set_ybound(-.5, max_val)

        #plt.figure(figsize=(6,6))
        plt.plot(np.linspace(0, max_val, 100), np.linspace(0, max_val, 100), color='k')

        for (x, y) in zip(true_abundance_at_rank, predicted_abundance_at_rank):
            if x > y:
                ax.vlines(x, y, x, colors='r')
            if y > x:
                ax.vlines(x, x, y, colors='r')
        plt.title(f"Tool: {os.path.basename(input_file).split('.')[0]}")
        plt.tight_layout()
        l1_out_file = f"{output_base_name}_L1_{rank}.{file_type}"
        plt.savefig(l1_out_file, dpi=output_dpi)