def pxrr(tree_file, output_dir): """Unroot the tree returned by treeshrink.""" unrooted = util.file_name(tree_file) cmd = ' '.join([ 'pxrr', '--unroot', '--treef {}'.format(tree_file), '--outf {}'.format(unrooted) ]) with util.cd(output_dir): subprocess.check_call(cmd, shell=True) util.remove_files('phyx.logfile') return unrooted
def raxml(fasta_file, output_dir, output_ext, seq_type, cpus, seed): """Build a tree with raxml.""" model = "PROTCATWAG" if seq_type == "aa" else "GTRCAT" tree = util.file_name(fasta_file, output_ext) cmd = ' '.join([ 'raxml', '-T {}'.format(cpus), '-p {}'.format(seed), '-m {}'.format(model), '-s {}'.format(fasta_file), '-n {}'.format(tree) ]) with util.cd(output_dir): subprocess.check_call(cmd, shell=True) tree_src = 'RAxML_bestTree.' + tree move(tree_src, tree) util.remove_files('RAxML_*') return tree
def pasta(fasta_file, output_dir, output_ext, seq_type, cpus): """Align sequences.""" in_path = fasta_file if seq_type == 'aa': in_path = bio.adjust_aa_seqs(fasta_file, output_dir) cmd = ' '.join([ which('run_pasta.py'), '--datatype {}'.format('Protein' if seq_type == 'aa' else 'DNA'), '--num-cpus {}'.format(cpus), "--input '{}'".format(in_path), "--output-directory '{}'".format(abspath(output_dir)) ]) with util.cd(output_dir): subprocess.check_call(cmd, shell=True) base_name = splitext(basename(fasta_file))[0] temp_aligned = 'pastajob.marker001.' + base_name + EXT aligned = base_name + output_ext move(temp_aligned, aligned) util.remove_files('pastajob*') return aligned
def pxclsq(fasta_file, output_dir, output_ext, seq_type, min_occupancy, min_len): """Filter aligned sequences for occupancy and length.""" ext = output_ext + EXT_PXCLSQ temp_cleaned = util.file_name(fasta_file, ext) cmd = ' '.join([ 'pxclsq', '--aminoacid' if seq_type == 'aa' else '', '--prop {}'.format(min_occupancy), '--seqf {}'.format(fasta_file), '--outf {}'.format(basename(temp_cleaned)) ]) cleaned = util.file_name(fasta_file, output_ext) with util.cd(output_dir): subprocess.check_call(cmd, shell=True) with open(temp_cleaned) as in_file, open(cleaned, 'w') as out_file: for header, seq in SimpleFastaParser(in_file): if len(seq.replace('-', '')) >= min_len: bio.write_fasta_record(out_file, header, seq) util.remove_files('phyx.logfile') return cleaned
def raxml_bs(fasta_file, output_dir, output_ext, seq_type, cpus, seed, replicates=100): """Build a bootstrapped tree with raxml.""" model = "PROTCATWAG" if seq_type == "aa" else "GTRCAT" tree = util.file_name(fasta_file, output_ext) cmd = ' '.join([ 'raxml', '-T {}'.format(cpus), '-f a', '-x {}'.format(seed), '-p {}'.format(seed), '-m {}'.format(model), '-# {}'.format(replicates), '-s {}'.format(fasta_file), '-n {}'.format(tree) ]) with util.cd(output_dir): subprocess.check_call(cmd, shell=True) tree_src = 'RAxML_bipartitions.' + tree move(tree_src, tree) util.remove_files('RAxML_*') return tree