Пример #1
0
    def test_subpangraph_should_omit_edges_2(self):
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(1), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [0, 2])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'),
                          [pSeq.SequencePath([*map(nid, [0, 1, 2])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)

        translator = poa.PoagraphPOTranslator(poagraph,
                                              [pSeq.SequenceID('seq1')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=2\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=2 0 100 -1 seq1\n" \
                              "a:S0\n" \
                              "c:L0S0"

        self.assertEqual(expected_po_content, actual_po_content)
Пример #2
0
    def test_subpangraph_unfilled_nodes(self):
        symbol_for_uknown = '?'
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=b('C'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(3),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            pNode.Node(node_id=nid(4),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            pNode.Node(node_id=nid(5), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(6), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(5), base=b('T'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [0, 2, 3, 4, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)

        translator = poa.PoagraphPOTranslator(
            poagraph, [pSeq.SequenceID('seq1'),
                       pSeq.SequenceID('seq2')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=9\n" \
                              "SOURCECOUNT=2\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=6 0 100 -1 seq1\n" \
                              "SOURCENAME=seq2\n" \
                              "SOURCEINFO=6 1 100 -1 seq2\n" \
                              "a:S0A1\n" \
                              "c:S1A0\n" \
                              "g:L0L1S0S1\n" \
                              f"{symbol_for_uknown}:L2S0\n" \
                              f"{symbol_for_uknown}:L3S0\n" \
                              "g:L2S1\n" \
                              "c:L5S1\n" \
                              "a:L4L6S0S1\n" \
                              "t:L7S0S1"
        self.assertEqual(expected_po_content, actual_po_content)
Пример #3
0
    def test_1_typical_pangraph(self):
        expected_po_content_path = Path(self.po_files_dir + "test_1.po")

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
            pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
            pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
            pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
            pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
            pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'), [
                pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 9])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(
                pSeq.SequenceID('seq2'),
                [pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12, 14, 17])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'),
                          [pSeq.SequencePath([*map(nid, [11, 13, 14, 15])])],
                          pSeq.SequenceMetadata({'group': '1'})),
        }

        poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(
            expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
Пример #4
0
    def test_subpangraph_construction_full_graph(self):
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(1), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(3), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(4), base=b('T'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'),
                          [pSeq.SequencePath([*map(nid, [0, 1, 2, 3, 4])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)
        translator = poa.PoagraphPOTranslator(poagraph,
                                              [pSeq.SequenceID('seq0')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=5\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq0\n" \
                              "SOURCEINFO=5 0 100 -1 seq0\n" \
                              "a:S0\n" \
                              "a:L0S0\n" \
                              "c:L1S0\n" \
                              "a:L2S0\n" \
                              "t:L3S0"
        self.assertEqual(expected_po_content, actual_po_content)
Пример #5
0
    def test_2_consensuses_and_empty_sequences(self):
        expected_po_content_path = Path(self.po_files_dir + "test_2.po")

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('C'), aligned_to=None),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)),
            pNode.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)),
            pNode.Node(node_id=nid(8), base=bid('G'), aligned_to=None)
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'),
                          [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 6, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('CONSENS0'):
            pSeq.Sequence(pSeq.SequenceID('CONSENS0'),
                          [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 7, 8])])],
                          None),
            pSeq.SequenceID('CONSENS1'):
            pSeq.Sequence(pSeq.SequenceID('CONSENS1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 6, 8])])],
                          None),
        }

        poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(
            expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
Пример #6
0
    def setUp(self):
        nodes = [
            pNode.Node(
                node_id=nid(0),
                base=b('T'),
                aligned_to=None,
            ),
            pNode.Node(node_id=nid(1), base=b('A'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(2), base=b('G'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(3), base=b('A'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(4), base=b('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(5), base=b('A'), aligned_to=nid(6)),
            pNode.Node(node_id=nid(6), base=b('C'), aligned_to=nid(7)),
            pNode.Node(node_id=nid(7), base=b('G'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(8), base=b('T'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(9), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(10), base=b('C'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=b('T'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(12), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(13), base=b('A'), aligned_to=nid(14)),
            pNode.Node(node_id=nid(14), base=b('C'), aligned_to=nid(13))
        ]

        sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(
                pSeq.SequenceID('seq0'),
                [pSeq.SequencePath([*map(nid, [0, 1, 3, 5, 9, 10, 13])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 3, 6, 9, 11])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(
                pSeq.SequenceID('seq2'),
                [pSeq.SequencePath([*map(nid, [2, 4, 7, 9, 11, 12])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(
                pSeq.SequenceID('seq3'),
                [pSeq.SequencePath([*map(nid, [2, 4, 8, 9, 11, 12, 14])])],
                pSeq.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = pPoagraph.Poagraph(nodes, sequences)
Пример #7
0
    def setUp(self):
        self.fasta_dir = 'tests/output/fasta_files/'

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
            pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
            pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
            pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
            pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
            pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'), [
                pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'), [
                pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12])]),
                pSeq.SequencePath([*map(nid, [14, 17])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'), [
                pSeq.SequencePath([*map(nid, [11])]),
                pSeq.SequencePath([*map(nid, [13, 14, 15])])
            ], pSeq.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)
Пример #8
0
    def test_2_consensuses_tree_fasta(self):
        expected_consensuses_fasta_path = Path(self.fasta_dir +
                                               "consensuses.fasta")

        consensuses_tree = CT.ConsensusTree()
        consensuses_tree.nodes = [
            # all members set
            CT.ConsensusNode(consensus_id=CT.ConsensusNodeID(0),
                             parent_node_id=CT.ConsensusNodeID(-1),
                             children_nodes_ids=[
                                 CT.ConsensusNodeID(1),
                                 CT.ConsensusNodeID(2)
                             ],
                             sequences_ids=[
                                 pSeq.SequenceID('seq0'),
                                 pSeq.SequenceID('seq1'),
                                 pSeq.SequenceID('seq2'),
                                 pSeq.SequenceID('seq3')
                             ],
                             mincomp=CT.CompatibilityToPath(0.5, P(1)),
                             compatibilities_to_all={
                                 pSeq.SequenceID('seq0'):
                                 CT.CompatibilityToPath(1.0, P(1)),
                                 pSeq.SequenceID('seq1'):
                                 CT.CompatibilityToPath(0.9, P(1)),
                                 pSeq.SequenceID('seq2'):
                                 CT.CompatibilityToPath(0.95, P(1)),
                                 pSeq.SequenceID('seq3'):
                                 CT.CompatibilityToPath(0.6, P(1))
                             },
                             consensus_path=pSeq.SequencePath([
                                 nid(0),
                                 nid(2),
                                 nid(5),
                                 nid(6),
                                 nid(10),
                                 nid(12),
                                 nid(13),
                                 nid(16)
                             ])),
            # no compatibilities to all, no mincomp
            CT.ConsensusNode(consensus_id=CT.ConsensusNodeID(1),
                             parent_node_id=CT.ConsensusNodeID(0),
                             sequences_ids=[
                                 pSeq.SequenceID('seq0'),
                                 pSeq.SequenceID('seq1'),
                                 pSeq.SequenceID('seq2')
                             ],
                             consensus_path=pSeq.SequencePath([
                                 nid(0),
                                 nid(2),
                                 nid(3),
                                 nid(6),
                                 nid(10),
                                 nid(11),
                                 nid(13),
                                 nid(17)
                             ]))
        ]

        actual_consensuses_fasta_content = PangenomeFASTA.consensuses_tree_to_fasta(
            self.poagraph, consensuses_tree)
        expected_consensuses_fasta_content = pathtools.get_file_content(
            expected_consensuses_fasta_path)
        self.assertEqual(expected_consensuses_fasta_content,
                         actual_consensuses_fasta_content)