Exemplo n.º 1
0
    def test_split_ref_study_papara(self):
        '''Basic test for split_ref_study_papara function.'''

        # Read in PaPaRa output.
        papara_out = read_phylip(exp_papara_phylip)

        # Read in expected output files.
        exp_fasta = [read_fasta(exp_ref_fasta), read_fasta(exp_study_fasta)]

        with TemporaryDirectory() as temp_dir:
            out_ref_fasta = path.join(temp_dir, "ref_test.fna")
            out_study_fasta = path.join(temp_dir, "study_test.fna")

            split_ref_study_papara(papara_out=papara_out,
                                   ref_seqnames=set(exp_fasta[0].keys()),
                                   ref_fasta=out_ref_fasta,
                                   study_fasta=out_study_fasta)

            # Read in output files.
            obs_fasta = [
                read_fasta(out_ref_fasta),
                read_fasta(out_study_fasta)
            ]

        self.assertEqual(exp_fasta, obs_fasta)
Exemplo n.º 2
0
def run_papara(tree: str, ref_msa: dict, study_fasta: str, out_dir: str,
               threads=1, print_cmds=False):
    '''Run PaPaRa to place study sequences into reference multiple-sequence
    alignment (MSA). Will return dictionary of the the output MSA (sequence ids
    as keys). Expects path to tree and study FASTA as strings. Expects
    reference MSA as a dictionary output by read_fasta. This MSA will be
    converted to phylip format before running PaPaRa.'''

    # Get absolute paths to input files.
    tree = path.abspath(tree)
    study_fasta = path.abspath(study_fasta)

    # Change working directory to out directory (but keep track of original).
    # This is necessary because PaPaRa outputs into the current working
    # directory.
    orig_wd = getcwd()
    chdir(out_dir)

    # Convert ref sequences from MSA FASTA to phylip.
    write_phylip(ref_msa, "ref_seqs.phylip")

    # Make call to papara to place sequences (outputs phylip format).
    system_call_check("papara -t " + tree + " -s ref_seqs.phylip " +
                      "-q " + study_fasta + " -j " + str(threads) +
                      " -n out", print_command=print_cmds,
                      print_stdout=print_cmds, print_stderr=print_cmds)

    # Change back to original working directory.
    chdir(orig_wd)

    # Read in papara phylip output and return.
    return(read_phylip(path.join(out_dir, "papara_alignment.out"),
                       check_input=True))
Exemplo n.º 3
0
    def test_run_papara(self):
        '''Basic test for run_papara function.'''

        exp_phylip = read_phylip(exp_papara_phylip)
        in_msa = read_fasta(test_msa)

        with TemporaryDirectory() as temp_dir:
            obs_phylip = run_papara(tree=test_tree,
                                    ref_msa=in_msa,
                                    out_dir=temp_dir,
                                    study_fasta=test_study_seqs)

        self.assertEqual(exp_phylip, obs_phylip)
Exemplo n.º 4
0
def place_seqs_pipeline(study_fasta, ref_msa, tree, out_tree, threads,
                        papara_output, out_dir, chunk_size, print_cmds):
    '''Full pipeline for running sequence placement.'''

    # Read in ref seqs FASTA as a dict.
    ref_msa = read_fasta(ref_msa)

    # Either read in PaPaRa output or run it.
    if papara_output:
        # Read in PaPaRa output if already done.
        papara_out = read_phylip(papara_output, check_input=True)

    else:
        # Run PaPaRa to place study sequences and read in Phylip file.
        papara_out = run_papara(tree=tree,
                                ref_msa=ref_msa,
                                study_fasta=study_fasta,
                                out_dir=out_dir,
                                threads=threads,
                                print_cmds=print_cmds)

    # Specify split FASTA files to be created.
    study_msa_fastafile = path.join(out_dir, "study_seqs_papara.fasta")
    ref_msa_fastafile = path.join(out_dir, "ref_seqs_papara.fasta")

    # Split PaPaRa output into two FASTA files containging study and reference
    # sequences respectively.
    split_ref_study_papara(papara_out=papara_out,
                           ref_seqnames=set(list(ref_msa.keys())),
                           study_fasta=study_msa_fastafile,
                           ref_fasta=ref_msa_fastafile)

    # Run EPA-NG to output .jplace file.
    epa_out_dir = path.join(out_dir, "epa_out")

    run_epa_ng(tree=tree,
               ref_msa_fastafile=ref_msa_fastafile,
               study_msa_fastafile=study_msa_fastafile,
               chunk_size=chunk_size,
               threads=threads,
               out_dir=epa_out_dir,
               print_cmds=print_cmds)

    jplace_outfile = path.join(epa_out_dir, "epa_result.jplace")

    gappa_jplace_to_newick(jplace_file=jplace_outfile,
                           outfile=out_tree,
                           print_cmds=print_cmds)
Exemplo n.º 5
0
    def test_read_write_phylip(self):
        '''Basic test that Phylip files are read and written correctly.'''

        test_seqs_dict = {
            "seq1": "GNATNGAC",
            "seq2": "GTCGTGGC",
            "seq3": "GNCTGAGA"
        }

        # Write these sequences temp file and then read them back in again.
        with TemporaryDirectory() as temp_dir:
            outfile = path.join(temp_dir, "test.phylip")

            write_phylip(test_seqs_dict, outfile)

            test_seqs_dict_in = read_phylip(outfile)

        self.assertEqual(test_seqs_dict, test_seqs_dict_in)