Exemplo n.º 1
0
 def infer_tree(self, raxml_time_limit=1.0):
     import tree_infer
     tree_fname = tree_infer.main(self.viruses, raxml_time_limit,
                                  config['outgroup'])
     delimit_newick(tree_fname, "temp.newick")
     self.tree = dendropy.Tree.get_from_path("temp.newick", "newick")
     os.remove('temp.newick')
Exemplo n.º 2
0
    def infer_tree(self, raxml_time_limit):
        '''
		builds a tree from the alignment using fasttree and RAxML. raxml runs for
		raxml_time_limit and is terminated thereafter. raxml_time_limit can be 0.
		'''
        self.make_run_dir()
        os.chdir(self.run_dir)
        AlignIO.write(self.viruses, 'temp.fasta', 'fasta')

        print "Building initial tree with FastTree"
        os.system(
            "/Users/yujiazhou/Documents/nextflu/fasttree -gtr -nt -gamma -nosupport -mlacc 2 -slownni temp.fasta > initial_tree.newick"
        )
        self.tree = dendropy.Tree.get_from_string(
            delimit_newick('initial_tree.newick'),
            'newick',
            rooting='force-rooted')
        self.tree.resolve_polytomies()
        self.tree.write_to_path("initial_tree.newick", "newick")

        AlignIO.write(self.viruses, "temp.phyx", "phylip-relaxed")
        if raxml_time_limit > 0:
            print "RAxML tree optimization with time limit " + str(
                raxml_time_limit) + " hours"
            # using exec to be able to kill process
            end_time = time.time() + int(raxml_time_limit * 3600)
            process = subprocess.Popen(
                "exec /Users/yujiazhou/Documents/nextflu/raxmlHPC-AVX-v8/raxml -f d -T 6 -j -s temp.phyx -n topology -c 25 -m GTRCAT -p 344312987 -t initial_tree.newick",
                shell=True)
            while (time.time() < end_time):
                if os.path.isfile('RAxML_result.topology'):
                    break
                time.sleep(10)
            process.terminate()

            checkpoint_files = glob.glob("RAxML_checkpoint*")
            if os.path.isfile('RAxML_result.topology'):
                checkpoint_files.append('RAxML_result.topology')
            if len(checkpoint_files) > 0:
                last_tree_file = checkpoint_files[-1]
                shutil.copy(last_tree_file, 'raxml_tree.newick')
            else:
                shutil.copy("initial_tree.newick", 'raxml_tree.newick')
        else:
            shutil.copy("initial_tree.newick", 'raxml_tree.newick')

        print "RAxML branch length optimization and rooting"
        os.system(
            "/Users/yujiazhou/Documents/nextflu/raxmlHPC-AVX-v8/raxml -f e -T 6 -s temp.phyx -n branches -c 25 -m GTRGAMMA -p 344312987 -t raxml_tree.newick -o "
            + self.outgroup['strain'])

        out_fname = "tree_infer.newick"
        shutil.copy('RAxML_result.branches', out_fname)
        Phylo.write(Phylo.read(out_fname, 'newick'), 'temp.newick', 'newick')
        self.tree = dendropy.Tree.get_from_string(delimit_newick(out_fname),
                                                  'newick',
                                                  rooting='force-rooted')

        os.chdir('..')
        self.remove_run_dir()
Exemplo n.º 3
0
def main(viruses, raxml_time_limit, outgroup):

    print "--- Tree infer at " + time.strftime("%H:%M:%S") + " ---"

    cleanup()
    write_fasta(viruses, 'temp.fasta')
    print "Building initial tree with FastTree"
    os.system(
        "./fasttree -gtr -nt -gamma -nosupport -mlacc 2 -slownni temp.fasta > initial_tree.newick"
    )
    delimit_newick("initial_tree.newick", "temp.newick")
    tree = dendropy.Tree.get_from_path("temp.newick", "newick")
    tree.resolve_polytomies()
    tree.write_to_path("initial_tree.newick", "newick")

    os.system("seqmagick convert temp.fasta temp.phyx")
    if raxml_time_limit > 0:
        print "RAxML tree optimization with time limit " + str(
            raxml_time_limit) + " hours"
        # using exec to be able to kill process
        end_time = time.time() + int(raxml_time_limit * 3600)
        process = subprocess.Popen(
            "exec raxml -f d -T 6 -j -s temp.phyx -n topology -c 25 -m GTRCAT -p 344312987 -t initial_tree.newick",
            shell=True)
        while (time.time() < end_time):
            if os.path.isfile('RAxML_result.topology'):
                break
            time.sleep(10)
        process.terminate()

        checkpoint_files = [file for file in glob.glob("RAxML_checkpoint*")]
        if os.path.isfile('RAxML_result.topology'):
            checkpoint_files.append('RAxML_result.topology')
        if len(checkpoint_files) > 0:
            last_tree_file = checkpoint_files[-1]
            shutil.copy(last_tree_file, 'raxml_tree.newick')
        else:
            shutil.copy("initial_tree.newick", 'raxml_tree.newick')
    else:
        shutil.copy("initial_tree.newick", 'raxml_tree.newick')

    print "RAxML branch length optimization and rooting"
    os.system(
        "raxml -f e -T 6 -s temp.phyx -n branches -c 25 -m GTRGAMMA -p 344312987 -t raxml_tree.newick -o "
        + outgroup)

    out_fname = "data/tree_infer.newick"
    os.rename('RAxML_result.branches', out_fname)
    cleanup()
    return out_fname
Exemplo n.º 4
0
def main(viruses, raxml_time_limit, outgroup):

    print "--- Tree infer at " + time.strftime("%H:%M:%S") + " ---"

    cleanup()
    write_fasta(viruses, "temp.fasta")
    print "Building initial tree with FastTree"
    os.system("fasttree -gtr -nt -gamma -nosupport -mlacc 2 -slownni temp.fasta > initial_tree.newick")
    delimit_newick("initial_tree.newick", "temp.newick")
    tree = dendropy.Tree.get_from_path("temp.newick", "newick")
    tree.resolve_polytomies()
    tree.write_to_path("initial_tree.newick", "newick")

    os.system("seqmagick convert temp.fasta temp.phyx")
    if raxml_time_limit > 0:
        print "RAxML tree optimization with time limit " + str(raxml_time_limit) + " hours"
        # using exec to be able to kill process
        end_time = time.time() + int(raxml_time_limit * 3600)
        process = subprocess.Popen(
            "exec raxml -f d -T 6 -j -s temp.phyx -n topology -c 25 -m GTRCAT -p 344312987 -t initial_tree.newick",
            shell=True,
        )
        while time.time() < end_time:
            if os.path.isfile("RAxML_result.topology"):
                break
            time.sleep(10)
        process.terminate()

        checkpoint_files = [file for file in glob.glob("RAxML_checkpoint*")]
        if os.path.isfile("RAxML_result.topology"):
            checkpoint_files.append("RAxML_result.topology")
        if len(checkpoint_files) > 0:
            last_tree_file = checkpoint_files[-1]
            shutil.copy(last_tree_file, "raxml_tree.newick")
        else:
            shutil.copy("initial_tree.newick", "raxml_tree.newick")
    else:
        shutil.copy("initial_tree.newick", "raxml_tree.newick")

    print "RAxML branch length optimization and rooting"
    os.system(
        "raxml -f e -T 6 -s temp.phyx -n branches -c 25 -m GTRGAMMA -p 344312987 -t raxml_tree.newick -o " + outgroup
    )

    out_fname = "data/tree_infer.newick"
    os.rename("RAxML_result.branches", out_fname)
    cleanup()
    return out_fname
Exemplo n.º 5
0
	def infer_tree(self, raxml_time_limit):
		'''
		builds a tree from the alignment using fasttree and RAxML. raxml runs for
		raxml_time_limit and is terminated thereafter. raxml_time_limit can be 0.
		'''
		self.make_run_dir()
		os.chdir(self.run_dir)
		AlignIO.write(self.viruses, 'temp.fasta', 'fasta')

		print "Building initial tree with FastTree"
		os.system("fasttree -gtr -nt -gamma -nosupport -mlacc 2 -slownni temp.fasta > initial_tree.newick")
		self.tree = dendropy.Tree.get_from_string(delimit_newick('initial_tree.newick'),'newick', as_rooted=True)
		self.tree.resolve_polytomies()
		self.tree.write_to_path("initial_tree.newick", "newick")

		AlignIO.write(self.viruses,"temp.phyx", "phylip-relaxed")
		if raxml_time_limit>0:
			print "RAxML tree optimization with time limit " + str(raxml_time_limit) + " hours"
			# using exec to be able to kill process
			end_time = time.time() + int(raxml_time_limit*3600)
			process = subprocess.Popen("exec raxml -f d -T 6 -j -s temp.phyx -n topology -c 25 -m GTRCAT -p 344312987 -t initial_tree.newick", shell=True)
			while (time.time() < end_time):
				if os.path.isfile('RAxML_result.topology'):
					break
				time.sleep(10)
			process.terminate()

			checkpoint_files = glob.glob("RAxML_checkpoint*")
			if os.path.isfile('RAxML_result.topology'):
				checkpoint_files.append('RAxML_result.topology')
			if len(checkpoint_files) > 0:
				last_tree_file = checkpoint_files[-1]
				shutil.copy(last_tree_file, 'raxml_tree.newick')
			else:
				shutil.copy("initial_tree.newick", 'raxml_tree.newick')
		else:
			shutil.copy("initial_tree.newick", 'raxml_tree.newick')

		print "RAxML branch length optimization and rooting"
		os.system("raxml -f e -T 6 -s temp.phyx -n branches -c 25 -m GTRGAMMA -p 344312987 -t raxml_tree.newick -o " + self.outgroup['strain'])

		out_fname = "tree_infer.newick"
		shutil.copy('RAxML_result.branches', out_fname)
		Phylo.write(Phylo.read(out_fname, 'newick'),'temp.newick','newick')
		self.tree = dendropy.Tree.get_from_string(delimit_newick(out_fname), 'newick', as_rooted=True)

		os.chdir('..')
		self.remove_run_dir()
Exemplo n.º 6
0
	def infer_tree(self, raxml_time_limit = 1.0):
		import tree_infer
		tree_fname = tree_infer.main(self.viruses, raxml_time_limit, config['outgroup'])
		delimit_newick(tree_fname, "temp.newick")
		self.tree = dendropy.Tree.get_from_path("temp.newick", "newick")
		os.remove('temp.newick')
Exemplo n.º 7
0
	def infer_tree(self, raxml_time_limit):
		'''
		builds a tree from the alignment using fasttree and RAxML. raxml runs for
		raxml_time_limit and is terminated thereafter. raxml_time_limit can be 0.
		'''
		self.make_run_dir()
		os.chdir(self.run_dir)
		AlignIO.write(self.viruses, 'temp.fasta', 'fasta')

		print "Building initial tree with FastTree"
		os.system("fasttree -gtr -nt -gamma -nosupport temp.fasta 1> initial_tree.newick 2>fasttree.out")
		self.tree = dendropy.Tree.get_from_string(delimit_newick('initial_tree.newick'),'newick', as_rooted=True)
		self.tree.resolve_polytomies()
		self.tree.write_to_path("initial_tree.newick", "newick")

		AlignIO.write(self.viruses,"temp.phyx", "phylip-relaxed")
		if raxml_time_limit>0:
			print "RAxML tree optimization with time limit " + str(raxml_time_limit) + " hours"
			# using exec to be able to kill process
			end_time = time.time() + int(raxml_time_limit*3600)
			process = subprocess.Popen("exec raxmlHPC -f d -T "+str(self.nthreads) +  " -j -s temp.phyx -n topology -c 25 -m GTRCAT -p 344312987 -t initial_tree.newick >raxml.out", shell=True)
			while (time.time() < end_time):
				if os.path.isfile('RAxML_result.topology'):
					break
				time.sleep(10)
			process.terminate()

			checkpoint_files = [file for file in glob.glob("RAxML_checkpoint*")]
			if os.path.isfile('RAxML_result.topology'):
				checkpoint_files.append('RAxML_result.topology')
			if len(checkpoint_files) > 0:
				last_tree_file = checkpoint_files[-1]
				shutil.copy(last_tree_file, 'raxml_tree.newick')
			else:
				shutil.copy("initial_tree.newick", 'raxml_tree.newick')
		else:
			shutil.copy("initial_tree.newick", 'raxml_tree.newick')

		if raxml_time_limit>0:
			print "RAxML branch length optimization and rooting"
			os.system("raxmlHPC -f e -T "+str(self.nthreads) +  " -s temp.phyx -n branches -c 25 -m GTRGAMMA -p 344312987 -t raxml_tree.newick -o " + self.outgroup['strain']+ ' >raxml2.out')
			raxml_rooted=True
		else:
			shutil.copy('raxml_tree.newick', 'RAxML_result.branches')
			raxml_rooted=False

		out_fname = "tree_infer.newick"
		shutil.copy('RAxML_result.branches', out_fname)
		if not raxml_rooted:
			with open(out_fname) as ofile:
				tstr = "".join([x.strip() for x in ofile])
			out_fname = out_fname+'_fixed'
			if tstr.startswith('[&R]'):
				with open(out_fname,'w') as ofile:
					ofile.write(tstr[4:]+'\n')
			T = Phylo.read(out_fname, 'newick')

			try:
				outgroup_clade = [c for c in T.get_terminals() if c.name == self.outgroup['strain']][0]
			except:
				print("Can't find outgroup in tree -- midpoint_rooting")
				self.midpoint_rooting = True
		else:
			T = Phylo.read(out_fname, 'newick')

		Phylo.write(T,'temp.newick','newick')
		self.tree = dendropy.Tree.get_from_string(delimit_newick(out_fname), 'newick', rooting="force-rooted")
		os.chdir('..')
		self.remove_run_dir()
		if self.midpoint_rooting:
			self.tree.reroot_at_midpoint()