Exemplo n.º 1
0
    def test_subpangraph_construction_full_graph(self):
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(1), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(3), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(4), base=b('T'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'),
                          [pSeq.SequencePath([*map(nid, [0, 1, 2, 3, 4])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)
        translator = poa.PoagraphPOTranslator(poagraph,
                                              [pSeq.SequenceID('seq0')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=5\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq0\n" \
                              "SOURCEINFO=5 0 100 -1 seq0\n" \
                              "a:S0\n" \
                              "a:L0S0\n" \
                              "c:L1S0\n" \
                              "a:L2S0\n" \
                              "t:L3S0"
        self.assertEqual(expected_po_content, actual_po_content)
Exemplo n.º 2
0
    def test_subpangraph_should_omit_edges_2(self):
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(1), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [0, 2])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'),
                          [pSeq.SequencePath([*map(nid, [0, 1, 2])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)

        translator = poa.PoagraphPOTranslator(poagraph,
                                              [pSeq.SequenceID('seq1')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=2\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=2 0 100 -1 seq1\n" \
                              "a:S0\n" \
                              "c:L0S0"

        self.assertEqual(expected_po_content, actual_po_content)
Exemplo n.º 3
0
    def test_2_three_sequences(self):
        fasta_path = self.fasta_dir + "test_2_three_sequences.fasta"

        fasta_provider = FromFile(Path(fasta_path))

        sequence_id_1 = pSeq.SequenceID("seq1")
        self.raise_error_if_unequal(sequence_id_1, "ACTGGGTGGGA", fasta_provider)

        sequence_id_2 = pSeq.SequenceID("seq2")
        self.raise_error_if_unequal(sequence_id_2, "AA", fasta_provider)

        sequence_id_3 = pSeq.SequenceID("seq3")
        self.raise_error_if_unequal(sequence_id_3, "GT", fasta_provider)
Exemplo n.º 4
0
class FromNCBITests(unittest.TestCase):
    def setUp(self) -> None:
        self.fasta_provider = FromNCBI(EmailAddress('*****@*****.**'),
                                       use_cache=False)

    @unittest.skip("slow test - internet connection required")
    def test_0_get_10th_symbol_of_AB050936v1(self):
        sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False)
        actual_base = self.fasta_provider.get_base(sequence_id, 10)
        path = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta')
        expected_base = self.read_sequence(path)[10]
        self.assertEqual(expected_base, actual_base)

    @unittest.skip("slow test - internet connection required")
    def test_1_download_AB050936v1(self):
        fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False)
        sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False)
        actual_sequence = fasta_provider._download_from_ncbi(sequence_id)
        p = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta')
        expected_sequence = self.read_sequence(p)
        self.assertEqual(expected_sequence, actual_sequence)

    @unittest.skip("slow test - internet connection required")
    def test_2_failed_download(self):
        fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False)
        sequence_id = ""
        with self.assertRaises(Exception) as err:
            _ = fasta_provider._download_from_ncbi(sequence_id)
            self.assertEqual(
                str(err),
                f"Cannot download from Entrez sequence of ID: {sequence_id}")

    @data((pSeq.SequenceID("plain", False), "plain"),
          (pSeq.SequenceID("with.dot", False), "with.dot"),
          (pSeq.SequenceID("with.two.dots", False), "with.two.dots"),
          (pSeq.SequenceID("withv1", False), "with.1"))
    @unpack
    def test_3_guess_entrez_id(self, sequenceID: pSeq.SequenceID,
                               expected_guessed_entrez_id: str):
        fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False)
        actual_guessed_entrez_id = fasta_provider._guess_ncbi_sequence_id(
            sequenceID)

        self.assertEqual(expected_guessed_entrez_id, actual_guessed_entrez_id)

    def read_sequence(self, path: Path):
        with open(path) as fasta_file_hanlder:
            _ = fasta_file_hanlder.readline()
            return fasta_file_hanlder.read().upper().replace("\n", "")
    def test_1_download_sequence_and_save_to_cache(self):
        fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=True)
        cache_dir_path = pathtools.get_child_path(Path.cwd(), ".fastacache")
        if cache_dir_path.exists():
            shutil.rmtree(cache_dir_path)

        sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False)
        actual_sequence = fasta_provider.get_base(sequence_id, 15)

        # cache directory creation
        cache_directory_created = cache_dir_path.exists()
        self.assertTrue(cache_directory_created)

        # file creation
        files_in_cache_dircetory = [*cache_dir_path.glob("*")]
        expected_filepath = pathtools.get_child_path(cache_dir_path,
                                                     f"{sequence_id}.fasta")
        file_created_in_cache = expected_filepath in files_in_cache_dircetory
        self.assertTrue(file_created_in_cache)

        # file content
        control_fasta_path = Path(
            'tests/data/fasta_providers/fasta_ncbi/AB050936.1.fasta')

        with open(control_fasta_path) as fasta_file_hanlder:
            expected_content = fasta_file_hanlder.read()
        with open(expected_filepath) as fasta_file_handler:
            actual_content = fasta_file_handler.read()
        self.assertEqual(expected_content, actual_content)
Exemplo n.º 6
0
 def test_1_download_AB050936v1(self):
     fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False)
     sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False)
     actual_sequence = fasta_provider._download_from_ncbi(sequence_id)
     p = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta')
     expected_sequence = self.read_sequence(p)
     self.assertEqual(expected_sequence, actual_sequence)
Exemplo n.º 7
0
    def test_1_one_sequence(self):
        fasta_path = self.fasta_dir + "test_1_one_sequence.fasta"
        fasta_provider = FromFile(Path(fasta_path))

        sequence_id = pSeq.SequenceID("seq1")
        expected_sequence = self.read_sequence(fasta_path)

        self.raise_error_if_unequal(sequence_id, expected_sequence, fasta_provider)
Exemplo n.º 8
0
    def test_1_one_sequence_one_file_in_zip(self):
        fasta_path = self.fasta_dir + "test_1_one_sequence_one_file_in_zip.zip"
        fasta_provider = FromFile(Path(fasta_path))

        sequence_id = pSeq.SequenceID("seq1")
        expected_sequence = "ACTGGGTGGGA"

        self.raise_error_if_unequal(sequence_id, expected_sequence,
                                    fasta_provider)
Exemplo n.º 9
0
    def test_1_typical_pangraph(self):
        expected_po_content_path = Path(self.po_files_dir + "test_1.po")

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
            pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
            pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
            pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
            pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
            pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'), [
                pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 9])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(
                pSeq.SequenceID('seq2'),
                [pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12, 14, 17])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'),
                          [pSeq.SequencePath([*map(nid, [11, 13, 14, 15])])],
                          pSeq.SequenceMetadata({'group': '1'})),
        }

        poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(
            expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
Exemplo n.º 10
0
 def test_subpangraph_construction_from_pangraph_keep_seq_0_1(self):
     translator = poa.PoagraphPOTranslator(
         self.poagraph, [pSeq.SequenceID('seq0'),
                         pSeq.SequenceID('seq1')])
     actual_po_content = translator.get_input_po_content()
     expected_po_content =  "VERSION=pangenome\n"\
                            "NAME=pangenome\n"\
                            "TITLE=pangenome\n"\
                            "LENGTH=9\n"\
                            "SOURCECOUNT=2\n"\
                            "SOURCENAME=seq0\n"\
                            "SOURCEINFO=7 0 0 -1 seq0\n"\
                            "SOURCENAME=seq1\n"\
                            "SOURCEINFO=5 1 100 -1 seq1\n"\
                            "t:S0\n"\
                            "a:L0S0S1\n"\
                            "a:L1S0S1\n"\
                            "a:L2S0A4\n"\
                            "c:L2S1A3\n"\
                            "a:L3L4S0S1\n"\
                            "c:L5S0A7\n"\
                            "t:L5S1A6\n"\
                            "a:L6S0"
     self.assertEqual(expected_po_content, actual_po_content)
Exemplo n.º 11
0
    def setUp(self):
        nodes = [
            pNode.Node(
                node_id=nid(0),
                base=b('T'),
                aligned_to=None,
            ),
            pNode.Node(node_id=nid(1), base=b('A'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(2), base=b('G'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(3), base=b('A'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(4), base=b('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(5), base=b('A'), aligned_to=nid(6)),
            pNode.Node(node_id=nid(6), base=b('C'), aligned_to=nid(7)),
            pNode.Node(node_id=nid(7), base=b('G'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(8), base=b('T'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(9), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(10), base=b('C'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=b('T'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(12), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(13), base=b('A'), aligned_to=nid(14)),
            pNode.Node(node_id=nid(14), base=b('C'), aligned_to=nid(13))
        ]

        sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(
                pSeq.SequenceID('seq0'),
                [pSeq.SequencePath([*map(nid, [0, 1, 3, 5, 9, 10, 13])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 3, 6, 9, 11])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(
                pSeq.SequenceID('seq2'),
                [pSeq.SequencePath([*map(nid, [2, 4, 7, 9, 11, 12])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(
                pSeq.SequenceID('seq3'),
                [pSeq.SequencePath([*map(nid, [2, 4, 8, 9, 11, 12, 14])])],
                pSeq.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = pPoagraph.Poagraph(nodes, sequences)
Exemplo n.º 12
0
    def setUp(self):
        self.fasta_dir = 'tests/output/fasta_files/'

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
            pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
            pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
            pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
            pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
            pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'), [
                pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'), [
                pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12])]),
                pSeq.SequencePath([*map(nid, [14, 17])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'), [
                pSeq.SequencePath([*map(nid, [11])]),
                pSeq.SequencePath([*map(nid, [13, 14, 15])])
            ], pSeq.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)
Exemplo n.º 13
0
    def test_read_consensus_path_seq1_only_in_input(self):
        translator = poa.PoagraphPOTranslator(self.poagraph,
                                              [pSeq.SequenceID('seq1')])
        _ = translator.get_input_po_content()

        poa_lines = [
            "VERSION=pangenome\n", "NAME=pangenome\n", "TITLE=pangenome\n",
            "LENGTH=5\n", "SOURCECOUNT=2\n", "SOURCENAME=seq1\n",
            "SOURCEINFO=5 0 100 0 seq1\n", "SOURCENAME=CONSENS0\n",
            "SOURCEINFO=5 0 100 0 CONSENS0\n", "a:S0S1\n", "a:L0S0S1\n",
            "c:L1S0S1\n", "a:L2S0S1\n", "t:L2S0S1"
        ]
        actual_consensus_path = translator.read_consensus_paths(poa_lines, [0])
        expected_consensus_path = [1, 3, 6, 9, 11]
        self.assertEqual(expected_consensus_path,
                         actual_consensus_path[0].path)
Exemplo n.º 14
0
    def test_subpangraph_unfilled_nodes(self):
        symbol_for_uknown = '?'
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=b('C'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(3),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            pNode.Node(node_id=nid(4),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            pNode.Node(node_id=nid(5), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(6), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(5), base=b('T'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [0, 2, 3, 4, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)

        translator = poa.PoagraphPOTranslator(
            poagraph, [pSeq.SequenceID('seq1'),
                       pSeq.SequenceID('seq2')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=9\n" \
                              "SOURCECOUNT=2\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=6 0 100 -1 seq1\n" \
                              "SOURCENAME=seq2\n" \
                              "SOURCEINFO=6 1 100 -1 seq2\n" \
                              "a:S0A1\n" \
                              "c:S1A0\n" \
                              "g:L0L1S0S1\n" \
                              f"{symbol_for_uknown}:L2S0\n" \
                              f"{symbol_for_uknown}:L3S0\n" \
                              "g:L2S1\n" \
                              "c:L5S1\n" \
                              "a:L4L6S0S1\n" \
                              "t:L7S0S1"
        self.assertEqual(expected_po_content, actual_po_content)
Exemplo n.º 15
0
    def test_2_read_seqeunce_from_cache_instead_downloading(self):
        fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=True)
        cache_dir_path = pathtools.get_child_path(Path.cwd(), ".fastacache")
        if cache_dir_path.exists():
            shutil.rmtree(cache_dir_path)

        cache_dir_path.mkdir()
        sequence_id = pSeq.SequenceID("seq1")
        fake_sequence = "foo"
        expected_base = pNode.Base("o")
        fake_fasta_path = pathtools.get_child_path(cache_dir_path,
                                                   f"{sequence_id}.fasta")
        with open(fake_fasta_path, 'w') as fake_fasta_handler:
            fake_fasta_handler.write(f">{sequence_id} cached\n{fake_sequence}")

        actual_base = fasta_provider.get_base(sequence_id, 2)

        self.assertEqual(expected_base, actual_base)
Exemplo n.º 16
0
 def test_subpangraph_construction_from_pangraph_keep_seq3(self):
     translator = poa.PoagraphPOTranslator(self.poagraph,
                                           [pSeq.SequenceID('seq3')])
     actual_po_content = translator.get_input_po_content()
     expected_po_content = "VERSION=pangenome\n" \
                           "NAME=pangenome\n" \
                           "TITLE=pangenome\n" \
                           "LENGTH=7\n" \
                           "SOURCECOUNT=1\n" \
                           "SOURCENAME=seq3\n" \
                           "SOURCEINFO=7 0 100 -1 seq3\n" \
                           "g:S0\n" \
                           "c:L0S0\n" \
                           "t:L1S0\n" \
                           "a:L2S0\n" \
                           "t:L3S0\n" \
                           "g:L4S0\n" \
                           "c:L5S0"
     self.assertEqual(expected_po_content, actual_po_content)
Exemplo n.º 17
0
 def test_0_get_10th_symbol_of_AB050936v1(self):
     sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False)
     actual_base = self.fasta_provider.get_base(sequence_id, 10)
     path = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta')
     expected_base = self.read_sequence(path)[10]
     self.assertEqual(expected_base, actual_base)
Exemplo n.º 18
0
    def test_2_consensuses_tree_fasta(self):
        expected_consensuses_fasta_path = Path(self.fasta_dir +
                                               "consensuses.fasta")

        consensuses_tree = CT.ConsensusTree()
        consensuses_tree.nodes = [
            # all members set
            CT.ConsensusNode(consensus_id=CT.ConsensusNodeID(0),
                             parent_node_id=CT.ConsensusNodeID(-1),
                             children_nodes_ids=[
                                 CT.ConsensusNodeID(1),
                                 CT.ConsensusNodeID(2)
                             ],
                             sequences_ids=[
                                 pSeq.SequenceID('seq0'),
                                 pSeq.SequenceID('seq1'),
                                 pSeq.SequenceID('seq2'),
                                 pSeq.SequenceID('seq3')
                             ],
                             mincomp=CT.CompatibilityToPath(0.5, P(1)),
                             compatibilities_to_all={
                                 pSeq.SequenceID('seq0'):
                                 CT.CompatibilityToPath(1.0, P(1)),
                                 pSeq.SequenceID('seq1'):
                                 CT.CompatibilityToPath(0.9, P(1)),
                                 pSeq.SequenceID('seq2'):
                                 CT.CompatibilityToPath(0.95, P(1)),
                                 pSeq.SequenceID('seq3'):
                                 CT.CompatibilityToPath(0.6, P(1))
                             },
                             consensus_path=pSeq.SequencePath([
                                 nid(0),
                                 nid(2),
                                 nid(5),
                                 nid(6),
                                 nid(10),
                                 nid(12),
                                 nid(13),
                                 nid(16)
                             ])),
            # no compatibilities to all, no mincomp
            CT.ConsensusNode(consensus_id=CT.ConsensusNodeID(1),
                             parent_node_id=CT.ConsensusNodeID(0),
                             sequences_ids=[
                                 pSeq.SequenceID('seq0'),
                                 pSeq.SequenceID('seq1'),
                                 pSeq.SequenceID('seq2')
                             ],
                             consensus_path=pSeq.SequencePath([
                                 nid(0),
                                 nid(2),
                                 nid(3),
                                 nid(6),
                                 nid(10),
                                 nid(11),
                                 nid(13),
                                 nid(17)
                             ]))
        ]

        actual_consensuses_fasta_content = PangenomeFASTA.consensuses_tree_to_fasta(
            self.poagraph, consensuses_tree)
        expected_consensuses_fasta_content = pathtools.get_file_content(
            expected_consensuses_fasta_path)
        self.assertEqual(expected_consensuses_fasta_content,
                         actual_consensuses_fasta_content)
Exemplo n.º 19
0
    def test_2_consensuses_and_empty_sequences(self):
        expected_po_content_path = Path(self.po_files_dir + "test_2.po")

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('C'), aligned_to=None),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)),
            pNode.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)),
            pNode.Node(node_id=nid(8), base=bid('G'), aligned_to=None)
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'),
                          [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 6, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('CONSENS0'):
            pSeq.Sequence(pSeq.SequenceID('CONSENS0'),
                          [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 7, 8])])],
                          None),
            pSeq.SequenceID('CONSENS1'):
            pSeq.Sequence(pSeq.SequenceID('CONSENS1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 6, 8])])],
                          None),
        }

        poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(
            expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)