示例#1
0
    def test_create_temporary_phylip(self, temp_mock):
        temp_file = StringIO()
        temp_mock.return_value = temp_file

        builder = TreeBuilder()

        fasta_file = StringIO("""\
>cluster_A
NNAACAAAANN
>cluster_B
CCAACAAAANN
""")

        expected_output = """\
 2 11
cluster_A  NNAACAAAAN N
cluster_B  CCAACAAAAN N
"""

        builder.load_fasta_sequences(fasta_file)
        output_file = builder._create_temporary_phylip(builder.sequences)

        self.assertEqual(output_file, temp_file)

        output_file.seek(0)
        self.assertEqual(output_file.read(), expected_output)
示例#2
0
    def test_run_fastml(self):
        builder = TreeBuilder()

        fasta_filename = os.path.join(test_data(), 'animals.mfa')
        tree_filename = os.path.join(test_data(),
                                     'animals.terminal_nodes.newick')
        output_directory = tempfile.mkdtemp()

        # fastml doesn't like comments in fasta files, guesses it is a different
        # format and gets confused when a sequence has a '>' in it.  I'm therefore
        # creating a temporary file without the comments in it.
        fasta_file_without_comments = create_fasta_without_comments(
            fasta_filename)

        fastml_stdout, fastml_stderr = builder._run_fastml(
            'fastml', {}, tree_filename, fasta_file_without_comments,
            output_directory)

        self.assertTrue(
            os.path.isfile(os.path.join(output_directory, 'all_nodes.newick')))
        self.assertTrue(
            os.path.isfile(os.path.join(output_directory, 'all_nodes.mfa')))

        shutil.rmtree(output_directory)
        os.remove(fasta_file_without_comments)
示例#3
0
  def test_build_tree(self, temp_mock):
    builder = TreeBuilder()

    # I could mock _run_raxml but, because I'm using a regex to parse the
    # results, I want to know straight away if the output of RAxML changes in a
    # way that breaks this.

    random_file = tempfile.NamedTemporaryFile('w', delete=False)
    random_filename = random_file.name
    random_folder = tempfile.mkdtemp()

    temp_mock.NamedTemporaryFile.return_value = random_file
    temp_mock.mkdtemp.return_value = random_folder

    fasta_file = open(os.path.join(test_data(), 'animals.mfa'), 'r')
    builder.load_fasta_sequences(fasta_file)
    fasta_file.close()

    tree = builder.build_tree()

    tree_nodes = sorted([node.name for node in tree.get_terminals()])
    expected_tree_nodes = ["Carp", "Chicken", "Cow", "Frog", "Human", "Loach",
                           "Mouse", "Rat", "Seal", "Whale"]

    self.assertIsInstance(tree, Bio.Phylo.Newick.Tree)
    self.assertEqual(tree_nodes, expected_tree_nodes)
    self.assertFalse(os.path.isdir(random_folder))
    self.assertFalse(os.path.isfile(random_filename))
示例#4
0
    def test_build_tree(self, temp_mock):
        builder = TreeBuilder()

        # I could mock _run_raxml but, because I'm using a regex to parse the
        # results, I want to know straight away if the output of RAxML changes in a
        # way that breaks this.

        random_file = tempfile.NamedTemporaryFile('w', delete=False)
        random_filename = random_file.name
        random_folder = tempfile.mkdtemp()

        temp_mock.NamedTemporaryFile.return_value = random_file
        temp_mock.mkdtemp.return_value = random_folder

        fasta_file = open(os.path.join(test_data(), 'animals.mfa'), 'r')
        builder.load_fasta_sequences(fasta_file)
        fasta_file.close()

        tree = builder.build_tree()

        tree_nodes = sorted([node.name for node in tree.get_terminals()])
        expected_tree_nodes = [
            "Carp", "Chicken", "Cow", "Frog", "Human", "Loach", "Mouse", "Rat",
            "Seal", "Whale"
        ]

        self.assertIsInstance(tree, Bio.Phylo.Newick.Tree)
        self.assertEqual(tree_nodes, expected_tree_nodes)
        self.assertFalse(os.path.isdir(random_folder))
        self.assertFalse(os.path.isfile(random_filename))
示例#5
0
  def test_create_temporary_phylip(self, temp_mock):
    temp_file = StringIO()
    temp_mock.return_value = temp_file

    builder = TreeBuilder()

    fasta_file = StringIO("""\
>cluster_A
NNAACAAAANN
>cluster_B
CCAACAAAANN
""")

    expected_output = """\
 2 11
cluster_A  NNAACAAAAN N
cluster_B  CCAACAAAAN N
"""

    builder.load_fasta_sequences(fasta_file)
    output_file = builder._create_temporary_phylip(builder.sequences)

    self.assertEqual(output_file, temp_file)

    output_file.seek(0)
    self.assertEqual(output_file.read(), expected_output)
示例#6
0
  def test_write_tree(self):
    builder = TreeBuilder()

    tree_filename = os.path.join(test_data(), 'animals.terminal_nodes.newick')
    tree = Bio.Phylo.read(tree_filename, 'newick')

    output = StringIO()

    builder._write_tree(tree, output)

    with open(tree_filename, 'r') as tree_file:
      expected = tree_file.read()
    output.seek(0)

    self.assertEqual(output.read(), expected)
示例#7
0
    def test_write_output(self):
        builder = TreeBuilder()
        builder.sequences = {"seq1": "a sequence object"}
        builder.tree = "Tree"
        builder.sequences_output_file = StringIO()
        builder.tree_output_file = StringIO()

        builder._write_tree = MagicMock()
        builder._write_sequences = MagicMock()

        builder.write_output()

        builder._write_tree.assert_called_with("Tree",
                                               builder.tree_output_file)
        builder._write_sequences.assert_called_with(
            ["a sequence object"], builder.sequences_output_file)
示例#8
0
    def test_write_tree(self):
        builder = TreeBuilder()

        tree_filename = os.path.join(test_data(),
                                     'animals.terminal_nodes.newick')
        tree = Bio.Phylo.read(tree_filename, 'newick')

        output = StringIO()

        builder._write_tree(tree, output)

        with open(tree_filename, 'r') as tree_file:
            expected = tree_file.read()
        output.seek(0)

        self.assertEqual(output.read(), expected)
示例#9
0
  def test_run_raxml(self):
    builder = TreeBuilder()

    fasta_file = open(os.path.join(test_data(), 'animals.mfa'), 'r')
    builder.load_fasta_sequences(fasta_file)
    fasta_file.close()

    phylip_file = builder._create_temporary_phylip(builder.sequences)
    phylip_file.close()

    output_directory = tempfile.mkdtemp()
    raxml_stdout, raxml_stderr = builder._run_raxml('raxmlHPC', {}, phylip_file.name, output_directory)

    self.assertTrue('Best-scoring ML tree written to' in raxml_stdout)

    os.remove(phylip_file.name)
    shutil.rmtree(output_directory)
示例#10
0
    def test_write_sequences(self):
        builder = TreeBuilder()

        fasta_filename = os.path.join(test_data(), 'animals.mfa')
        fasta_file = open(fasta_filename, 'r')
        sequences = Bio.SeqIO.parse(fasta_file, 'fasta')

        output = StringIO()

        builder._write_sequences(sequences, output)

        fasta_file.close()

        with open(fasta_filename, 'r') as fasta_file:
            expected = fasta_file.readlines()
            expected = "".join(expected[5:])  # Remove comments from the top
        output.seek(0)

        self.assertEqual(output.read(), expected)
示例#11
0
  def test_write_sequences(self):
    builder = TreeBuilder()

    fasta_filename = os.path.join(test_data(), 'animals.mfa')
    fasta_file = open(fasta_filename, 'r')
    sequences = Bio.SeqIO.parse(fasta_file, 'fasta')

    output = StringIO()

    builder._write_sequences(sequences, output)

    fasta_file.close()

    with open(fasta_filename, 'r') as fasta_file:
      expected = fasta_file.readlines()
      expected = "".join(expected[5:]) # Remove comments from the top
    output.seek(0)

    self.assertEqual(output.read(), expected)
示例#12
0
  def test_write_output(self):
    builder = TreeBuilder()
    builder.sequences = {"seq1": "a sequence object"}
    builder.tree = "Tree"
    builder.sequences_output_file = StringIO()
    builder.tree_output_file = StringIO()

    builder._write_tree = MagicMock()
    builder._write_sequences = MagicMock()

    builder.write_output()

    builder._write_tree.assert_called_with("Tree",
                                           builder.tree_output_file)
    builder._write_sequences.assert_called_with(["a sequence object"],
                                                builder.sequences_output_file)
示例#13
0
    def test_get_raxml_tree_file(self):
        builder = TreeBuilder()

        fake_stdout = """\
Inference[0] final GAMMA-based Likelihood: -385.279958 tree written to file/tmp/tmpaA9uJN/RAxML_result.raxml_output


Starting final GAMMA-basedthorough Optimization on tree 0 likelihood -385.279958 .... 

FinalGAMMA-based Score of best tree -385.279958

Program execution info written to/tmp/tmpaA9uJN/RAxML_info.raxml_output
Best-scoring ML tree written to:  /tmp/tmpaA9uJN/RAxML_bestTree.raxml_output

Overall execution time: 0.219131  secs or 0.000061 hours or 0.000003 days

"""

        output = builder._get_raxml_tree_file(fake_stdout)
        self.assertEqual(output, '/tmp/tmpaA9uJN/RAxML_bestTree.raxml_output')

        fake_stdout = """\
Final tree written to:      /tmp/tmpaA9uJN/raxml_output
"""

        output = builder._get_raxml_tree_file(fake_stdout)
        self.assertEqual(output, '/tmp/tmpaA9uJN/raxml_output')

        fake_stdout = """\
Oh dear

Something went wrong

I've written something to /tmp/wrong.log

But it is definitly not what you're looking for.

Sorry
"""

        output = builder._get_raxml_tree_file(fake_stdout)
        self.assertEqual(output, None)
示例#14
0
  def test_get_raxml_tree_file(self):
    builder = TreeBuilder()

    fake_stdout = """\
Inference[0] final GAMMA-based Likelihood: -385.279958 tree written to file/tmp/tmpaA9uJN/RAxML_result.raxml_output


Starting final GAMMA-basedthorough Optimization on tree 0 likelihood -385.279958 .... 

FinalGAMMA-based Score of best tree -385.279958

Program execution info written to/tmp/tmpaA9uJN/RAxML_info.raxml_output
Best-scoring ML tree written to:  /tmp/tmpaA9uJN/RAxML_bestTree.raxml_output

Overall execution time: 0.219131  secs or 0.000061 hours or 0.000003 days

"""

    output = builder._get_raxml_tree_file(fake_stdout)
    self.assertEqual(output, '/tmp/tmpaA9uJN/RAxML_bestTree.raxml_output')

    fake_stdout = """\
Final tree written to:      /tmp/tmpaA9uJN/raxml_output
"""

    output = builder._get_raxml_tree_file(fake_stdout)
    self.assertEqual(output, '/tmp/tmpaA9uJN/raxml_output')

    fake_stdout = """\
Oh dear

Something went wrong

I've written something to /tmp/wrong.log

But it is definitly not what you're looking for.

Sorry
"""

    output = builder._get_raxml_tree_file(fake_stdout)
    self.assertEqual(output, None)
示例#15
0
  def test_run_fastml(self):
    builder = TreeBuilder()

    fasta_filename = os.path.join(test_data(), 'animals.mfa')
    tree_filename = os.path.join(test_data(), 'animals.terminal_nodes.newick')
    output_directory = tempfile.mkdtemp()

    # fastml doesn't like comments in fasta files, guesses it is a different
    # format and gets confused when a sequence has a '>' in it.  I'm therefore
    # creating a temporary file without the comments in it.
    fasta_file_without_comments = create_fasta_without_comments(fasta_filename)

    fastml_stdout, fastml_stderr = builder._run_fastml('fastml', {},
                                                       tree_filename,
                                                       fasta_file_without_comments,
                                                       output_directory)

    self.assertTrue(os.path.isfile(os.path.join(output_directory,
                                                'all_nodes.newick')))
    self.assertTrue(os.path.isfile(os.path.join(output_directory,
                                                'all_nodes.mfa')))

    shutil.rmtree(output_directory)
    os.remove(fasta_file_without_comments)
示例#16
0
    def test_run_raxml(self):
        builder = TreeBuilder()

        fasta_file = open(os.path.join(test_data(), 'animals.mfa'), 'r')
        builder.load_fasta_sequences(fasta_file)
        fasta_file.close()

        phylip_file = builder._create_temporary_phylip(builder.sequences)
        phylip_file.close()

        output_directory = tempfile.mkdtemp()
        raxml_stdout, raxml_stderr = builder._run_raxml(
            'raxmlHPC', {}, phylip_file.name, output_directory)

        self.assertTrue('Best-scoring ML tree written to' in raxml_stdout)

        os.remove(phylip_file.name)
        shutil.rmtree(output_directory)
示例#17
0
    def test_add_hereditary_nodes(self, temp_mock):
        builder = TreeBuilder()

        output_directory = tempfile.mkdtemp()
        temp_mock.mkdtemp.return_value = output_directory
        temp_mock.NamedTemporaryFile.side_effect = \
            (tempfile.NamedTemporaryFile(dir=output_directory, delete=False) for i in xrange(10))

        fasta_filename = os.path.join(test_data(), 'animals.mfa')
        with open(fasta_filename) as fasta_file:
            builder.load_fasta_sequences(fasta_file)

        tree_filename = os.path.join(test_data(),
                                     'animals.terminal_nodes.newick')
        builder.tree = Bio.Phylo.read(tree_filename, 'newick')

        builder.add_hereditary_nodes()

        self.assertIsInstance(builder.tree, Bio.Phylo.Newick.Tree)
        tree_nodes = sorted([node.name for node in builder.tree.find_clades()])

        self.assertItemsEqual(tree_nodes, builder.sequences.keys())
        self.assertEqual(len(tree_nodes), 19)
        self.assertFalse(os.path.isdir(output_directory))
示例#18
0
  def test_add_hereditary_nodes(self, temp_mock):
    builder = TreeBuilder()

    output_directory = tempfile.mkdtemp()
    temp_mock.mkdtemp.return_value = output_directory
    temp_mock.NamedTemporaryFile.side_effect = \
        (tempfile.NamedTemporaryFile(dir=output_directory, delete=False) for i in xrange(10))

    fasta_filename = os.path.join(test_data(), 'animals.mfa')
    with open(fasta_filename) as fasta_file:
      builder.load_fasta_sequences(fasta_file)

    tree_filename = os.path.join(test_data(), 'animals.terminal_nodes.newick')
    builder.tree = Bio.Phylo.read(tree_filename, 'newick')

    builder.add_hereditary_nodes()

    self.assertIsInstance(builder.tree, Bio.Phylo.Newick.Tree)
    tree_nodes = sorted([node.name for node in builder.tree.find_clades()])

    self.assertItemsEqual(tree_nodes, builder.sequences.keys())
    self.assertEqual(len(tree_nodes), 19)
    self.assertFalse(os.path.isdir(output_directory))