Пример #1
0
class bayesianptp:
	"""Run MCMC on multiple trees"""
	def __init__(self, filename, ftype = "nexus", reroot = False, method = "H1", seed = 1234, thinning = 100, sampling = 10000, burnin = 0.1, firstktrees = 0, taxa_order = []):
		self.method = method
		self.seed = seed
		self.thinning = thinning 
		self.sampling = sampling
		self.burnin = burnin
		self.firstktrees = firstktrees
		if ftype == "nexus":
			self.nexus = NexusReader(filename)
			self.nexus.blocks['trees'].detranslate()
			self.trees = self.nexus.trees.trees
		else:
			self.trees = self.raxmlTreeParser(filename)
		
		if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
			self.trees = self.trees[:self.firstktrees]
		
		self.taxa_order = taxa_order
		if len(self.taxa_order) == 0:
			self.taxa_order = Tree(self.trees[0]).get_leaf_names()
		self.numtaxa = len(self.taxa_order)
		self.numtrees = len(self.trees)
		self.reroot = reroot
	
	
	def remove_outgroups(self, ognames, remove = False, output = ""):
		"""reroot using outgroups and remove them"""
		self.reroot = False
		try:
			if remove:
				for og in ognames:
					self.taxa_order.remove(og)
				self.numtaxa = len(self.taxa_order)
			for i in range(len(self.trees)):
				t = Tree(self.trees[i])
				if len(ognames) < 2:
					t.set_outgroup(ognames[0])
					if remove:
						t.prune(self.taxa_order, preserve_branch_length=True)
				else:
					ancestor = t.get_common_ancestor(ognames)
					if not t == ancestor:
						t.set_outgroup(ancestor)
					if remove:
						t.prune(self.taxa_order, preserve_branch_length=True)
				self.trees[i] = t.write()
			if remove and output!="":
				with open(output, "w") as fout:
					for t in self.trees:
						fout.write(t + "\n") 
		except ValueError, e:
			print(e)
			print("")
			print("")
			print("Somthing is wrong with the input outgroup names")
			print("")
			print("Quiting .....")
			sys.exit()
Пример #2
0
    def __init__(self,
                 filename,
                 ftype="nexus",
                 reroot=False,
                 method="H1",
                 seed=1234,
                 thinning=100,
                 sampling=10000,
                 burnin=0.1,
                 firstktrees=0,
                 taxa_order=[]):
        self.method = method
        self.seed = seed
        self.thinning = thinning
        self.sampling = sampling
        self.burnin = burnin
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks['trees'].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[:self.firstktrees]

        self.taxa_order = taxa_order
        if len(self.taxa_order) == 0:
            self.taxa_order = Tree(self.trees[0]).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot
Пример #3
0
class bootstrap_ptp:
    """Run MCMC on multiple trees"""
    def __init__(self,
                 filename,
                 ftype="nexus",
                 reroot=False,
                 method="H1",
                 firstktrees=0):
        self.method = method
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks['trees'].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[:self.firstktrees]

        self.taxa_order = Tree(self.trees[0]).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot

    def remove_outgroups(self, ognames, remove=False):
        """reroot using outgroups and remove them"""
        self.reroot = False
        try:
            if remove:
                for og in ognames:
                    self.taxa_order.remove(og)
                self.numtaxa = len(self.taxa_order)
            for i in range(len(self.trees)):
                t = Tree(self.trees[i])
                if len(ognames) < 2:
                    t.set_outgroup(ognames[0])
                    if remove:
                        t.prune(self.taxa_order, preserve_branch_length=True)
                else:
                    ancestor = t.get_common_ancestor(ognames)
                    if not t == ancestor:
                        t.set_outgroup(ancestor)
                    if remove:
                        t.prune(self.taxa_order, preserve_branch_length=True)
                self.trees[i] = t.write()
        except ValueError, e:
            print(e)
            print(
                "\n Somthing is wrong with the input outgroup names \n Quiting ..."
            )
            sys.exit()
Пример #4
0
	def __init__(self, tree, sp_rate = 0, fix_sp_rate = False, max_iters = 20000, min_br = 0.0001):
		self.min_brl = min_br
		self.tree = Tree(tree, format = 1)
		self.tree.resolve_polytomy(recursive=True)
		self.tree.dist = 0.0
		self.fix_spe_rate = fix_sp_rate
		self.fix_spe = sp_rate
		self.max_logl = float("-inf") 
		self.max_setting = None
		self.null_logl = 0.0
		self.null_model()
		self.species_list = None
		self.counter = 0
		self.setting_set = set([])
		self.max_num_search = max_iters
Пример #5
0
	def __init__(self, tree, start_config = None, reroot = False, startmethod = "H0", min_br = 0.0001, seed = 1234, thinning = 100, sampling = 10000, burning = 0.1, taxa_order = []):
		if start_config == None:
			me = exponential_mixture(tree= tree)
			me.search(strategy = startmethod, reroot = reroot)
			me.count_species(print_log = False, pv = 0.0)
			self.tree = me.tree
			self.current_setting = me.max_setting
		else:
			self.current_setting = start_config
			self.tree = Tree(tree, format = 1)
		self.burning = burning
		self.last_setting = self.current_setting
		self.current_logl = self.current_setting.get_log_l()
		self.last_logl = self.last_setting.get_log_l()
		self.min_br = min_br
		self.rand_nr = random.Random()
		self.rand_nr.seed(seed)
		self.thinning = thinning
		self.sampling = sampling
		if taxa_order == []:
			self.taxaorder = self.tree.get_leaf_names()
		else:
			self.taxaorder = taxa_order
		self.numtaxa = len(self.taxaorder)
		self.partitions = []
		self.llhs = []
		self.nsplit = 0
		self.nmerge = 0
		"""remember the ML partition"""
		self.maxllh = self.current_logl
		to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
		self.maxpar = spe
		self.max_setting = self.current_setting
		"""record all delimitation settings for plotting, this could consume a lot of MEM"""
		self.settings = []
Пример #6
0
class bootstrap_ptp:
    """Run MCMC on multiple trees"""

    def __init__(self, filename, ftype="nexus", reroot=False, method="H1", firstktrees=0):
        self.method = method
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks["trees"].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[: self.firstktrees]

        self.taxa_order = Tree(self.trees[0]).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot

    def remove_outgroups(self, ognames, remove=False):
        """reroot using outgroups and remove them"""
        self.reroot = False
        try:
            if remove:
                for og in ognames:
                    self.taxa_order.remove(og)
                self.numtaxa = len(self.taxa_order)
            for i in range(len(self.trees)):
                t = Tree(self.trees[i])
                if len(ognames) < 2:
                    t.set_outgroup(ognames[0])
                    if remove:
                        t.prune(self.taxa_order, preserve_branch_length=True)
                else:
                    ancestor = t.get_common_ancestor(ognames)
                    if not t == ancestor:
                        t.set_outgroup(ancestor)
                    if remove:
                        t.prune(self.taxa_order, preserve_branch_length=True)
                self.trees[i] = t.write()
        except ValueError, e:
            print(e)
            print("\n Somthing is wrong with the input outgroup names \n Quiting ...")
            sys.exit()
Пример #7
0
 def __init__(self,
              tree,
              start_config=None,
              reroot=False,
              startmethod="H0",
              min_br=0.0001,
              seed=1234,
              thinning=100,
              sampling=10000,
              burning=0.1,
              taxa_order=[]):
     if start_config == None:
         me = exponential_mixture(tree=tree)
         me.search(strategy=startmethod, reroot=reroot)
         me.count_species(print_log=False, pv=0.0)
         self.tree = me.tree
         self.current_setting = me.max_setting
     else:
         self.current_setting = start_config
         self.tree = Tree(tree, format=1)
     self.burning = burning
     self.last_setting = self.current_setting
     self.current_logl = self.current_setting.get_log_l()
     self.last_logl = self.last_setting.get_log_l()
     self.min_br = min_br
     self.rand_nr = random.Random()
     self.rand_nr.seed(seed)
     self.thinning = thinning
     self.sampling = sampling
     if taxa_order == []:
         self.taxaorder = self.tree.get_leaf_names()
     else:
         self.taxaorder = taxa_order
     self.numtaxa = len(self.taxaorder)
     self.partitions = []
     self.llhs = []
     self.nsplit = 0
     self.nmerge = 0
     """remember the ML partition"""
     self.maxllh = self.current_logl
     to, spe = self.current_setting.output_species(
         taxa_order=self.taxaorder)
     self.maxpar = spe
     self.max_setting = self.current_setting
     """record all delimitation settings for plotting, this could consume a lot of MEM"""
     self.settings = []
Пример #8
0
    def __init__(self,
                 filename,
                 ftype="nexus",
                 reroot=False,
                 method="H1",
                 firstktrees=0):
        self.method = method
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks['trees'].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[:self.firstktrees]

        self.taxa_order = Tree(self.trees[0]).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot
Пример #9
0
    def __init__(self, filename, ftype="nexus", reroot=False, method="H1", firstktrees=0):
        self.method = method
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks["trees"].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[: self.firstktrees]

        self.taxa_order = Tree(self.trees[0]).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot
Пример #10
0
	def __init__(self, filename, ftype = "nexus", reroot = False, method = "H1", seed = 1234, thinning = 100, sampling = 10000, burnin = 0.1, firstktrees = 0, taxa_order = []):
		self.method = method
		self.seed = seed
		self.thinning = thinning 
		self.sampling = sampling
		self.burnin = burnin
		self.firstktrees = firstktrees
		if ftype == "nexus":
			self.nexus = NexusReader(filename)
			self.nexus.blocks['trees'].detranslate()
			self.trees = self.nexus.trees.trees
		else:
			self.trees = self.raxmlTreeParser(filename)
		
		if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
			self.trees = self.trees[:self.firstktrees]
		
		self.taxa_order = taxa_order
		if len(self.taxa_order) == 0:
			self.taxa_order = Tree(self.trees[0]).get_leaf_names()
		self.numtaxa = len(self.taxa_order)
		self.numtrees = len(self.trees)
		self.reroot = reroot
Пример #11
0
class exponential_mixture:
	"""ML search PTP, to use: __init__(), search() and count_species()"""
	def __init__(self, tree, sp_rate = 0, fix_sp_rate = False, max_iters = 20000, min_br = 0.0001):
		self.min_brl = min_br
		self.tree = Tree(tree, format = 1)
		self.tree.resolve_polytomy(recursive=True)
		self.tree.dist = 0.0
		self.fix_spe_rate = fix_sp_rate
		self.fix_spe = sp_rate
		self.max_logl = float("-inf") 
		self.max_setting = None
		self.null_logl = 0.0
		self.null_model()
		self.species_list = None
		self.counter = 0
		self.setting_set = set([])
		self.max_num_search = max_iters


	def null_model(self):
		coa_br = []
		all_nodes = self.tree.get_descendants()
		for node in all_nodes:
			if node.dist > self.min_brl:
				coa_br.append(node.dist)
		e1 = exp_distribution(coa_br)
		self.null_logl = e1.sum_log_l()
		return e1.rate


	def __compare_node(self, node):
		return node.dist


	def re_rooting(self):
		node_list = self.tree.get_descendants()
		node_list.sort(key=self.__compare_node)
		node_list.reverse()
		rootnode = node_list[0]
		self.tree.set_outgroup(rootnode)
		self.tree.dist = 0.0


	def comp_num_comb(self):
		for node in self.tree.traverse(strategy='postorder'):
			if node.is_leaf():
				node.add_feature("cnt", 1.0)
			else:
				acum = 1.0
				for child in node.get_children():
					acum = acum * child.cnt
				acum = acum + 1.0
				node.add_feature("cnt", acum)
		return self.tree.cnt


	def next(self, sp_setting):
		self.setting_set.add(frozenset(sp_setting.spe_nodes))
		logl = sp_setting.get_log_l()
		if logl > self.max_logl:
			self.max_logl = logl
			self.max_setting = sp_setting
		for node in sp_setting.active_nodes:
			if node.is_leaf():
				pass
			else:
				childs = node.get_children()
				sp_nodes = []
				for child in childs:
					sp_nodes.append(child)
				for nod in sp_setting.spe_nodes:
					sp_nodes.append(nod)
				new_sp_setting = species_setting(spe_nodes = sp_nodes, root = sp_setting.root, sp_rate = sp_setting.spe_rate, fix_sp_rate = sp_setting.fix_spe_rate, minbr = self.min_brl)
				if frozenset(sp_nodes) in self.setting_set:
					pass
				else:
					self.next(new_sp_setting)


	def H0(self, reroot = True):
		self.H1(reroot)
		self.H2(reroot = False)
		self.H3(reroot = False)


	def H1(self, reroot = True):
		if reroot:
			self.re_rooting()
			
		#self.init_tree()
		sorted_node_list = self.tree.get_descendants()
		sorted_node_list.sort(key=self.__compare_node)
		sorted_node_list.reverse()
		
		first_node_list = []
		first_node_list.append(self.tree)
		first_childs = self.tree.get_children()
		for child in first_childs:
			first_node_list.append(child)
		first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
		last_setting = first_setting
		max_logl = last_setting.get_log_l()
		max_setting = last_setting
		
		for node in sorted_node_list:
			if node not in last_setting.spe_nodes:
				curr_sp_nodes = []
				for nod in last_setting.spe_nodes:
					curr_sp_nodes.append(nod)
				
				chosen_branching_node = node.up #find the father of this new node
				if chosen_branching_node in last_setting.spe_nodes:
					for nod in chosen_branching_node.get_children():
						if nod not in curr_sp_nodes:
							curr_sp_nodes.append(nod)
				else:
					for nod in chosen_branching_node.get_children():
						if nod not in curr_sp_nodes:
							curr_sp_nodes.append(nod)
					while not chosen_branching_node.is_root():
						chosen_branching_node = chosen_branching_node.up
						for nod in chosen_branching_node.get_children():
							if nod not in curr_sp_nodes:
								curr_sp_nodes.append(nod)
						if chosen_branching_node in last_setting.spe_nodes:
							break
				new_setting = species_setting(spe_nodes = curr_sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
				new_logl = new_setting.get_log_l()
				if new_logl> max_logl:
					max_logl = new_logl
					max_setting = new_setting 
				last_setting = new_setting
				
			else:
				"""node already is a speciation node, do nothing"""
				pass
		
		if max_logl > self.max_logl:
			self.max_logl = max_logl
			self.max_setting = max_setting


	def H2(self, reroot = True):
		"""Greedy"""
		if reroot:
			self.re_rooting()
			
		#self.init_tree()
		sorted_node_list = self.tree.get_descendants()
		sorted_node_list.sort(key=self.__compare_node)
		sorted_node_list.reverse()
		
		first_node_list = []
		first_node_list.append(self.tree)
		first_childs = self.tree.get_children()
		for child in first_childs:
			first_node_list.append(child)
		first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
		last_setting = first_setting
		max_logl = last_setting.get_log_l()
		max_setting = last_setting
		contin_flag = True 
		
		
		while contin_flag:
			curr_max_logl = float("-inf") 
			curr_max_setting = None
			contin_flag = False
			for node in last_setting.active_nodes:
				if node.is_leaf():
					pass
				else:
					contin_flag = True 
					childs = node.get_children()
					sp_nodes = []
					for child in childs:
						sp_nodes.append(child)
					for nod in last_setting.spe_nodes:
						sp_nodes.append(nod)
					new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
					logl = new_sp_setting.get_log_l()
					if logl > curr_max_logl:
						curr_max_logl = logl
						curr_max_setting = new_sp_setting
			
			if curr_max_logl > max_logl:
				max_setting = curr_max_setting
				max_logl = curr_max_logl
			
			last_setting = curr_max_setting
			
		if max_logl > self.max_logl:
			self.max_logl = max_logl
			self.max_setting = max_setting


	def H3(self, reroot = True):
		if reroot:
			self.re_rooting()
		sorted_node_list = self.tree.get_descendants()
		sorted_node_list.sort(key=self.__compare_node)
		sorted_node_list.reverse()
		sorted_br = []
		for node in sorted_node_list:
			sorted_br.append(node.dist)
		maxlogl = float("-inf") 
		maxidx = -1
		for i in range(len(sorted_node_list))[1:]:
			l1 = sorted_br[0:i]
			l2 = sorted_br[i:]
			e1 = exp_distribution(l1)
			e2 = exp_distribution(l2)
			logl = e1.sum_log_l() + e2.sum_log_l()
			if logl > maxlogl:
				maxidx = i
				maxlogl = logl
		
		target_nodes = sorted_node_list[0:maxidx]
		
		first_node_list = []
		first_node_list.append(self.tree)
		first_childs = self.tree.get_children()
		for child in first_childs:
			first_node_list.append(child)
		first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
		last_setting = first_setting
		max_logl = last_setting.get_log_l()
		max_setting = last_setting
		contin_flag = True 
		target_node_cnt = 0
		while contin_flag:
			curr_max_logl = float("-inf") 
			curr_max_setting = None
			contin_flag = False
			unchanged_flag = True
			for node in last_setting.active_nodes:
				if node.is_leaf():
					pass
				else:
					contin_flag = True 
					childs = node.get_children()
					sp_nodes = []
					flag = False
					for child in childs:
						if child in target_nodes:
							flag = True
							#target_nodes.remove(child)
					if flag:
						unchanged_flag = False
						for child in childs:
							sp_nodes.append(child)
						for nod in last_setting.spe_nodes:
							sp_nodes.append(nod)
						new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
						logl = new_sp_setting.get_log_l()
						if logl > curr_max_logl:
							curr_max_logl = logl
							curr_max_setting = new_sp_setting
			if not unchanged_flag:
				target_node_cnt = target_node_cnt + 1
				if curr_max_logl > max_logl:
					max_setting = curr_max_setting
					max_logl = curr_max_logl
				last_setting = curr_max_setting
			
			if len(target_nodes) == target_node_cnt:
				contin_flag = False
			if contin_flag and unchanged_flag and last_setting!= None:
				for node in last_setting.active_nodes:
					if node.is_leaf():
						pass
					else:
						childs = node.get_children()
						sp_nodes = []
						for child in childs:
							sp_nodes.append(child)
						for nod in last_setting.spe_nodes:
							sp_nodes.append(nod)
						new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
						logl = new_sp_setting.get_log_l()
						if logl > curr_max_logl:
							curr_max_logl = logl
							curr_max_setting = new_sp_setting
				if curr_max_logl > max_logl:
					max_setting = curr_max_setting
					max_logl = curr_max_logl
				last_setting = curr_max_setting
				
		if max_logl > self.max_logl:
			self.max_logl = max_logl
			self.max_setting = max_setting


	def Brutal(self, reroot = False):
		if reroot:
			self.re_rooting()
		first_node_list = []
		first_node_list.append(self.tree)
		first_childs = self.tree.get_children()
		for child in first_childs:
			first_node_list.append(child)
		num_s = self.comp_num_comb()
		if num_s > self.max_num_search:
			print("Too many search iterations: " + repr(num_s) + ", using H0 instead!!!")
			self.H0(reroot = False)
		else:
			first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
			self.next(first_setting)


	def search(self, strategy = "H1", reroot = False):
		if strategy == "H1":
			self.H1(reroot)
		elif strategy == "H2":
			self.H2(reroot)
		elif strategy == "H3":
			self.H3(reroot)
		elif strategy == "Brutal":
			self.Brutal(reroot)
		else:
			self.H0(reroot)


	def count_species(self, print_log = True, pv = 0.001):
		lhr = lh_ratio_test(self.null_logl, self.max_logl, 1)
		pvalue = lhr.get_p_value()
		if print_log:
			print("Speciation rate: " + "{0:.3f}".format(self.max_setting.rate2))
			print("Coalesecnt rate: " + "{0:.3f}".format(self.max_setting.rate1))
			print("Null logl: " + "{0:.3f}".format(self.null_logl))
			print("MAX logl: " + "{0:.3f}".format(self.max_logl))
			print("P-value: " + "{0:.3f}".format(pvalue))
			spefit, speaw = self.max_setting.e2.ks_statistic()
			coafit, coaaw = self.max_setting.e1.ks_statistic()
			print("Kolmogorov-Smirnov test for model fitting:")
			print("Speciation: " + "Dtest = {0:.3f}".format(spefit) + " " + speaw)
			print("Coalescent: " + "Dtest = {0:.3f}".format(coafit) + " " + coaaw)
		if pvalue < pv:
			num_sp, self.species_list = self.max_setting.count_species()
			return num_sp
		else:
			self.species_list = []
			self.species_list.append(self.tree.get_leaf_names()) 
			return 1


	def whitening_search(self, strategy = "H1", reroot = False, pv = 0.001):
		self.search(strategy, reroot, pv)
		num_sp, self.species_list = self.max_setting.count_species()
		spekeep = self.max_setting.whiten_species()
		self.tree.prune(spekeep)
		self.max_logl = float("-inf") 
		self.max_setting = None
		self.null_logl = 0.0
		self.null_model()
		self.species_list = None
		self.counter = 0
		self.setting_set = set([])
		self.search(strategy, reroot, pv)


	def print_species(self):
		cnt = 1
		for sp in self.species_list:
			print("Species " + repr(cnt) + ":")
			for leaf in sp:
				print("          " + leaf)
			cnt = cnt + 1


	def output_species(self, taxa_order = []):
		"""taxa_order is a list of taxa names, the paritions will be output as the same order"""
		if len(taxa_order) == 0:
			taxa_order = self.tree.get_leaf_names()
		
		num_taxa = 0
		for sp in self.species_list:
			for leaf in sp:
				num_taxa = num_taxa + 1
		if not len(taxa_order) == num_taxa:
			print("error error, taxa_order != num_taxa!")
			return None, None
		else: 
			partion = [-1] * num_taxa
			cnt = 1
			for sp in self.species_list:
				for leaf in sp:
					idx = taxa_order.index(leaf)
					partion[idx] = cnt
				cnt = cnt + 1
			return taxa_order, partion
Пример #12
0
 def remove_outgroups(self, ognames, remove=False):
     """reroot using outgroups and remove them"""
     self.reroot = False
     try:
         if remove:
             for og in ognames:
                 self.taxa_order.remove(og)
             self.numtaxa = len(self.taxa_order)
         for i in range(len(self.trees)):
             t = Tree(self.trees[i])
             if len(ognames) < 2:
                 t.set_outgroup(ognames[0])
                 if remove:
                     t.prune(self.taxa_order, preserve_branch_length=True)
             else:
                 ancestor = t.get_common_ancestor(ognames)
                 if not t == ancestor:
                     t.set_outgroup(ancestor)
                 if remove:
                     t.prune(self.taxa_order, preserve_branch_length=True)
             self.trees[i] = t.write()
     except ValueError, e:
         print(e)
         print("\n Somthing is wrong with the input outgroup names \n Quiting ...")
         sys.exit()
Пример #13
0
 def remove_outgroups(self, ognames, remove=False):
     """reroot using outgroups and remove them"""
     self.reroot = False
     try:
         if remove:
             for og in ognames:
                 self.taxa_order.remove(og)
             self.numtaxa = len(self.taxa_order)
         for i in range(len(self.trees)):
             t = Tree(self.trees[i])
             if len(ognames) < 2:
                 t.set_outgroup(ognames[0])
                 if remove:
                     t.prune(self.taxa_order, preserve_branch_length=True)
             else:
                 ancestor = t.get_common_ancestor(ognames)
                 if not t == ancestor:
                     t.set_outgroup(ancestor)
                 if remove:
                     t.prune(self.taxa_order, preserve_branch_length=True)
             self.trees[i] = t.write()
     except ValueError, e:
         print(e)
         print(
             "\n Somthing is wrong with the input outgroup names \n Quiting ..."
         )
         sys.exit()
Пример #14
0
class bayesianptp:
    """Run MCMC on multiple trees"""
    def __init__(self,
                 filename,
                 ftype="nexus",
                 reroot=False,
                 method="H1",
                 seed=1234,
                 thinning=100,
                 sampling=10000,
                 burnin=0.1,
                 firstktrees=0,
                 taxa_order=[]):
        self.method = method
        self.seed = seed
        self.thinning = thinning
        self.sampling = sampling
        self.burnin = burnin
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks['trees'].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[:self.firstktrees]

        self.taxa_order = taxa_order
        if len(self.taxa_order) == 0:
            self.taxa_order = Tree(self.trees[0]).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot

    def remove_outgroups(self, ognames, remove=False, output=""):
        """reroot using outgroups and remove them"""
        self.reroot = False
        try:
            if remove:
                for og in ognames:
                    self.taxa_order.remove(og)
                self.numtaxa = len(self.taxa_order)
            for i in range(len(self.trees)):
                t = Tree(self.trees[i])
                if len(ognames) < 2:
                    t.set_outgroup(ognames[0])
                    if remove:
                        t.prune(self.taxa_order, preserve_branch_length=True)
                else:
                    ancestor = t.get_common_ancestor(ognames)
                    if not t == ancestor:
                        t.set_outgroup(ancestor)
                    if remove:
                        t.prune(self.taxa_order, preserve_branch_length=True)
                self.trees[i] = t.write()
            if remove and output != "":
                with open(output, "w") as fout:
                    for t in self.trees:
                        fout.write(t + "\n")
        except ValueError, e:
            print(e)
            print("")
            print("")
            print("Somthing is wrong with the input outgroup names")
            print("")
            print("Quiting .....")
            sys.exit()
Пример #15
0
class ptpmcmc:
    """MCMC on a single tree using PTP model"""
    def __init__(self,
                 tree,
                 start_config=None,
                 reroot=False,
                 startmethod="H0",
                 min_br=0.0001,
                 seed=1234,
                 thinning=100,
                 sampling=10000,
                 burning=0.1,
                 taxa_order=[]):
        if start_config == None:
            me = exponential_mixture(tree=tree)
            me.search(strategy=startmethod, reroot=reroot)
            me.count_species(print_log=False, pv=0.0)
            self.tree = me.tree
            self.current_setting = me.max_setting
        else:
            self.current_setting = start_config
            self.tree = Tree(tree, format=1)
        self.burning = burning
        self.last_setting = self.current_setting
        self.current_logl = self.current_setting.get_log_l()
        self.last_logl = self.last_setting.get_log_l()
        self.min_br = min_br
        self.rand_nr = random.Random()
        self.rand_nr.seed(seed)
        self.thinning = thinning
        self.sampling = sampling
        if taxa_order == []:
            self.taxaorder = self.tree.get_leaf_names()
        else:
            self.taxaorder = taxa_order
        self.numtaxa = len(self.taxaorder)
        self.partitions = []
        self.llhs = []
        self.nsplit = 0
        self.nmerge = 0
        """remember the ML partition"""
        self.maxllh = self.current_logl
        to, spe = self.current_setting.output_species(
            taxa_order=self.taxaorder)
        self.maxpar = spe
        self.max_setting = self.current_setting
        """record all delimitation settings for plotting, this could consume a lot of MEM"""
        self.settings = []

    def split(self, chosen_anode):
        self.nsplit = self.nsplit + 1
        newspenodes = [node for node in self.current_setting.spe_nodes]
        #newspenodes = []
        #for node in self.current_setting.spe_nodes:
        #	newspenodes.append(node)
        newspenodes.extend(chosen_anode.get_children())
        self.current_setting = species_setting(spe_nodes=newspenodes,
                                               root=self.tree,
                                               sp_rate=0,
                                               fix_sp_rate=False,
                                               minbr=self.min_br)
        self.current_logl = self.current_setting.get_log_l()

    def merge(self, chosen_anode):
        self.nmerge = self.nmerge + 1
        mnodes = chosen_anode.get_children()
        newspenodes = [
            node for node in self.current_setting.spe_nodes
            if not node in mnodes
        ]
        #newspenodes = []
        #for node in self.current_setting.spe_nodes:
        #	if not node in mnodes:
        #		newspenodes.append(node)
        self.current_setting = species_setting(spe_nodes=newspenodes,
                                               root=self.tree,
                                               sp_rate=0,
                                               fix_sp_rate=False,
                                               minbr=self.min_br)
        self.current_logl = self.current_setting.get_log_l()

    def mcmc(self):
        cnt = 0
        accepted = 0
        sample_start = int(self.sampling * self.burning)
        printinterval = self.thinning * 100
        while cnt < self.sampling:
            cnt = cnt + 1
            if cnt % printinterval == 0:
                print("MCMC generation: " + repr(cnt))
            self.last_setting = self.current_setting
            self.last_logl = self.current_logl
            acceptance = 0.0
            """proposal"""
            """First chose to split or merge"""
            rdchoice = self.rand_nr.uniform(0.0, 1.0)
            if rdchoice <= 0.5:
                """split"""
                xinverse = self.current_setting.get_nodes_can_split()
                if xinverse > 0:
                    rdidx = self.rand_nr.randint(0, xinverse - 1)
                    chosen_anode = self.current_setting.node_can_split[rdidx]
                    self.split(chosen_anode)
                    xpinverse = self.current_setting.get_nodes_can_merge()
                    if xpinverse > 0:
                        newlogl = self.current_logl
                        oldlogl = self.last_logl
                        acceptance = math.exp(newlogl - oldlogl) * float(
                            xinverse) / float(xpinverse)
                        if newlogl > self.maxllh:
                            self.maxllh = newlogl
                            to, spe = self.current_setting.output_species(
                                taxa_order=self.taxaorder)
                            self.maxpar = spe
                            self.max_setting = self.current_setting
            else:
                """merge"""
                xinverse = self.current_setting.get_nodes_can_merge()
                if xinverse > 0:
                    rdidx = self.rand_nr.randint(0, xinverse - 1)
                    chosen_anode = self.current_setting.node_can_merge[rdidx]
                    self.merge(chosen_anode)
                    xpinverse = self.current_setting.get_nodes_can_split()
                    if xpinverse > 0:
                        newlogl = self.current_logl
                        oldlogl = self.last_logl
                        acceptance = math.exp(newlogl - oldlogl) * float(
                            xinverse) / float(xpinverse)
                        if newlogl > self.maxllh:
                            self.maxllh = newlogl
                            to, spe = self.current_setting.output_species(
                                taxa_order=self.taxaorder)
                            self.maxpar = spe
                            self.max_setting = self.current_setting

            if acceptance > 1.0:
                if cnt % self.thinning == 0 and cnt >= sample_start:
                    to, spe = self.current_setting.output_species(
                        taxa_order=self.taxaorder)
                    self.partitions.append(spe)
                    self.llhs.append(newlogl)
                    self.settings.append(self.current_setting)
                accepted = accepted + 1
            else:
                u = self.rand_nr.uniform(0.0, 1.0)
                if (u < acceptance):
                    if cnt % self.thinning == 0 and cnt >= sample_start:
                        to, spe = self.current_setting.output_species(
                            taxa_order=self.taxaorder)
                        self.partitions.append(spe)
                        self.llhs.append(newlogl)
                        self.settings.append(self.current_setting)
                    accepted = accepted + 1
                else:
                    self.current_setting = self.last_setting
                    self.current_logl = self.last_logl
                    if cnt % self.thinning == 0 and cnt >= sample_start:
                        to, spe = self.current_setting.output_species(
                            taxa_order=self.taxaorder)
                        self.partitions.append(spe)
                        self.llhs.append(self.current_logl)
                        self.settings.append(self.current_setting)

        print("Accptance rate: " + repr(float(accepted) / float(cnt)))
        print("Merge: " + repr(self.nmerge))
        print("Split: " + repr(self.nsplit))
        return self.partitions, self.llhs, self.settings
Пример #16
0
class ptpmcmc:
	"""MCMC on a single tree using PTP model"""
	def __init__(self, tree, start_config = None, reroot = False, startmethod = "H0", min_br = 0.0001, seed = 1234, thinning = 100, sampling = 10000, burning = 0.1, taxa_order = []):
		if start_config == None:
			me = exponential_mixture(tree= tree)
			me.search(strategy = startmethod, reroot = reroot)
			me.count_species(print_log = False, pv = 0.0)
			self.tree = me.tree
			self.current_setting = me.max_setting
		else:
			self.current_setting = start_config
			self.tree = Tree(tree, format = 1)
		self.burning = burning
		self.last_setting = self.current_setting
		self.current_logl = self.current_setting.get_log_l()
		self.last_logl = self.last_setting.get_log_l()
		self.min_br = min_br
		self.rand_nr = random.Random()
		self.rand_nr.seed(seed)
		self.thinning = thinning
		self.sampling = sampling
		if taxa_order == []:
			self.taxaorder = self.tree.get_leaf_names()
		else:
			self.taxaorder = taxa_order
		self.numtaxa = len(self.taxaorder)
		self.partitions = []
		self.llhs = []
		self.nsplit = 0
		self.nmerge = 0
		"""remember the ML partition"""
		self.maxllh = self.current_logl
		to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
		self.maxpar = spe
		self.max_setting = self.current_setting
		"""record all delimitation settings for plotting, this could consume a lot of MEM"""
		self.settings = []
	
	
	def split(self, chosen_anode):
		self.nsplit = self.nsplit + 1
		newspenodes = [node for node in self.current_setting.spe_nodes] 
		#newspenodes = []
		#for node in self.current_setting.spe_nodes:
		#	newspenodes.append(node)
		newspenodes.extend(chosen_anode.get_children())
		self.current_setting = species_setting(spe_nodes = newspenodes, root = self.tree, sp_rate = 0, fix_sp_rate = False, minbr = self.min_br)
		self.current_logl = self.current_setting.get_log_l()
	
	
	def merge(self, chosen_anode):
		self.nmerge = self.nmerge + 1
		mnodes = chosen_anode.get_children()
		newspenodes = [node for node in self.current_setting.spe_nodes if not node in mnodes]
		#newspenodes = []
		#for node in self.current_setting.spe_nodes:
		#	if not node in mnodes:
		#		newspenodes.append(node)
		self.current_setting = species_setting(spe_nodes = newspenodes, root = self.tree, sp_rate = 0, fix_sp_rate = False, minbr = self.min_br)
		self.current_logl = self.current_setting.get_log_l()
	
	
	def mcmc(self):
		cnt = 0
		accepted = 0
		sample_start = int(self.sampling * self.burning) 
		printinterval = self.thinning * 100
		while cnt < self.sampling:
			cnt = cnt + 1
			if cnt % printinterval == 0:
				print("MCMC generation: " + repr(cnt))
			self.last_setting = self.current_setting
			self.last_logl = self.current_logl
			acceptance = 0.0
			"""proposal"""
			"""First chose to split or merge"""
			rdchoice = self.rand_nr.uniform(0.0,1.0)
			if rdchoice <= 0.5:
				"""split"""
				xinverse = self.current_setting.get_nodes_can_split()
				if xinverse > 0:
					rdidx = self.rand_nr.randint(0, xinverse-1)
					chosen_anode = self.current_setting.node_can_split[rdidx]
					self.split(chosen_anode)
					xpinverse = self.current_setting.get_nodes_can_merge()
					if xpinverse > 0:
						newlogl = self.current_logl
						oldlogl = self.last_logl 
						acceptance = math.exp(newlogl - oldlogl) * float(xinverse)/float(xpinverse)
						if newlogl > self.maxllh:
							self.maxllh = newlogl
							to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
							self.maxpar = spe
							self.max_setting = self.current_setting
			else:
				"""merge"""
				xinverse = self.current_setting.get_nodes_can_merge()
				if xinverse > 0:
					rdidx = self.rand_nr.randint(0, xinverse-1)
					chosen_anode = self.current_setting.node_can_merge[rdidx]
					self.merge(chosen_anode)
					xpinverse = self.current_setting.get_nodes_can_split()
					if xpinverse > 0:
						newlogl = self.current_logl
						oldlogl = self.last_logl  
						acceptance = math.exp(newlogl - oldlogl) * float(xinverse)/float(xpinverse)
						if newlogl > self.maxllh:
							self.maxllh = newlogl
							to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
							self.maxpar = spe
							self.max_setting = self.current_setting
			
			if acceptance > 1.0:
				if cnt % self.thinning == 0 and cnt >= sample_start:
					to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
					self.partitions.append(spe)
					self.llhs.append(newlogl)
					self.settings.append(self.current_setting) 
				accepted = accepted + 1
			else:
				u = self.rand_nr.uniform(0.0,1.0)
				if (u < acceptance):
					if cnt % self.thinning == 0 and cnt >= sample_start:
						to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
						self.partitions.append(spe)
						self.llhs.append(newlogl)
						self.settings.append(self.current_setting) 
					accepted = accepted + 1
				else:
					self.current_setting = self.last_setting
					self.current_logl = self.last_logl
					if cnt % self.thinning == 0 and cnt >= sample_start:
						to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
						self.partitions.append(spe)
						self.llhs.append(self.current_logl)
						self.settings.append(self.current_setting)  
		
		print("Accptance rate: " + repr(float(accepted)/float(cnt)))
		print("Merge: " + repr(self.nmerge))
		print("Split: " + repr(self.nsplit))
		return self.partitions, self.llhs, self.settings