def test_subpoagraph_should_omit_edges_2(self):
        nodes = [
            graph.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=b('C'), aligned_to=None),
            graph.Node(node_id=nid(2), base=b('C'), aligned_to=None)
        ]

        sequences = {
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [0, 2])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'),
                           [graph.SeqPath([*map(nid, [0, 1, 2])])],
                           graph.SequenceMetadata({'group': '1'}))
        }
        poagraph = graph.Poagraph(nodes, sequences)

        translator = poa._PoagraphPOTranslator(poagraph,
                                               [msa.SequenceID('seq1')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=2\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=2 0 100 -1 seq1\n" \
                              "a:S0\n" \
                              "c:L0S0"

        self.assertEqual(expected_po_content, actual_po_content)
    def test_05_single_block_single_nucletodide(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_5_single_block_single_nucletodide.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None, block_id=bid(0))
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_7_missing_one_reverted_sequence_middle_minus1_minus1(self):
        maf_path = self.maf_files_dir.joinpath(
            "test_7_missing_one_reverted_sequence_middle_minus1_minus1.maf")

        expected_nodes = [
            # block 0
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(3), base=graph.Base('A'), aligned_to=None),

            # missing seq2
            graph.Node(node_id=nid(4),
                       base=graph.Base(self.missing_n.value),
                       aligned_to=None),
            graph.Node(node_id=nid(5),
                       base=graph.Base(self.missing_n.value),
                       aligned_to=None),

            # block 1
            graph.Node(node_id=nid(6), base=graph.Base('A'),
                       aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=graph.Base('G'),
                       aligned_to=nid(6)),
            graph.Node(node_id=nid(8), base=graph.Base('C'),
                       aligned_to=nid(9)),
            graph.Node(node_id=nid(9), base=graph.Base('G'),
                       aligned_to=nid(8)),
            graph.Node(node_id=nid(10),
                       base=graph.Base('C'),
                       aligned_to=nid(11)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('T'),
                       aligned_to=nid(10)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [0, 1, 2, 3, 7, 9, 11])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [0, 1, 4, 5, 6, 8, 10])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            missings.ConstBaseProvider(self.missing_n), self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
Exemplo n.º 4
0
def _add_node_do_sequence(sequence: graph.Sequence, node_id: graph.NodeID) -> \
        graph.Sequence:
    if sequence.paths:
        a = graph.SeqPath([node_id])
        updated_path = graph.SeqPath(sequence.paths[-1] + a)
        newpaths = [sequence.paths[:-1] + updated_path]
    else:
        newpaths = [graph.SeqPath([node_id])]
    return graph.Sequence(sequence.seqid, newpaths, sequence.seqmetadata)
    def test_09_inactive_edges_but_all_strands_plus(self):
        maf_path = self.maf_files_dir.joinpath("test_9_inactive_edges_but_all_strands_plus.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(6), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(7), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(10), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(11), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(12), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(13), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(14), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(15), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(16), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(17), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(18), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(19), base=graph.Base('G'), aligned_to=None),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 10, 11, 12, 13, 14])]),
                                graph.SeqPath([*map(nid, [5, 6, 7, 8, 9, 15, 16, 17, 18, 19])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                                          13, 14, 15, 16, 17, 18, 19])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'})),
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_10_parallel_blocks_1st_and_2nd_merge_into_3rd(self):
        maf_path = self.maf_files_dir.joinpath("test_10_parallel_blocks_1st_and_2nd_merge_into_3rd.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('G'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=graph.Base('T'), aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(3), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('C'), aligned_to=nid(5)),
            graph.Node(node_id=nid(5), base=graph.Base('G'), aligned_to=nid(4)),
            graph.Node(node_id=nid(6), base=graph.Base('C'), aligned_to=None),

            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(10), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(11), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(12), base=graph.Base('C'), aligned_to=nid(13)),
            graph.Node(node_id=nid(13), base=graph.Base('G'), aligned_to=nid(12)),
            graph.Node(node_id=nid(14), base=graph.Base('C'), aligned_to=nid(15)),
            graph.Node(node_id=nid(15), base=graph.Base('G'), aligned_to=nid(16)),
            graph.Node(node_id=nid(16), base=graph.Base('T'), aligned_to=nid(14)),
            graph.Node(node_id=nid(17), base=graph.Base('A'), aligned_to=nid(18)),
            graph.Node(node_id=nid(18), base=graph.Base('T'), aligned_to=nid(17)),
            graph.Node(node_id=nid(19), base=graph.Base('A'), aligned_to=nid(20)),
            graph.Node(node_id=nid(20), base=graph.Base('C'), aligned_to=nid(19)),
            graph.Node(node_id=nid(21), base=graph.Base('C'), aligned_to=nid(22)),
            graph.Node(node_id=nid(22), base=graph.Base('G'), aligned_to=nid(21)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [7, 8, 9, 10, 11, 12, 15, 18, 19, 21])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [7, 8, 9, 10, 11, 12, 15, 18, 19, 21])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0, 2, 3, 4, 6, 13, 16, 17, 20, 21])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [1, 2, 3, 5, 6, 13, 14, 17, 20, 22])])],
                               graph.SequenceMetadata({'group': '2'})),
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
Exemplo n.º 7
0
    def test_6_missing_one_reverted_sequence_middle_minus1_1(self):
        maf_path = self.maf_files_dir.joinpath(
            "test_6_missing_one_reverted_sequence_middle_minus1_1.maf")

        expected_nodes = [
            # block 1 because it is first in DAG and reverted
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('C'),
                       aligned_to=nid(3)),
            graph.Node(node_id=nid(3), base=graph.Base('T'),
                       aligned_to=nid(2)),

            # missing seq2, on edge (-1,1)
            graph.Node(node_id=nid(4), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(5), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(6), base=graph.Base('A'),
                       aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=graph.Base('C'),
                       aligned_to=nid(6)),
            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(10),
                       base=graph.Base('A'),
                       aligned_to=nid(11)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('C'),
                       aligned_to=nid(10)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'), [
                graph.SeqPath([*map(nid, [0, 1, 2])]),
                graph.SeqPath([*map(nid, [6, 8, 9, 10])])
            ], graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [0, 1, 3, 4, 5, 7, 11])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider, self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_subpoagraph_unfilled_nodes(self):
        symbol_for_uknown = '?'
        nodes = [
            graph.Node(node_id=nid(0), base=b('A'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=b('C'), aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(3),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            graph.Node(node_id=nid(4),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            graph.Node(node_id=nid(5), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(6), base=b('C'), aligned_to=None),
            graph.Node(node_id=nid(7), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(5), base=b('T'), aligned_to=None)
        ]

        sequences = {
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [0, 2, 3, 4, 7, 8])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'),
                           [graph.SeqPath([*map(nid, [1, 2, 5, 6, 7, 8])])],
                           graph.SequenceMetadata({'group': '1'}))
        }
        poagraph = graph.Poagraph(nodes, sequences)

        translator = poa._PoagraphPOTranslator(
            poagraph, [msa.SequenceID('seq1'),
                       msa.SequenceID('seq2')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=9\n" \
                              "SOURCECOUNT=2\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=6 0 100 -1 seq1\n" \
                              "SOURCENAME=seq2\n" \
                              "SOURCEINFO=6 1 100 -1 seq2\n" \
                              "a:S0A1\n" \
                              "c:S1A0\n" \
                              "g:L0L1S0S1\n" \
                              f"{symbol_for_uknown}:L2S0\n" \
                              f"{symbol_for_uknown}:L3S0\n" \
                              "g:L2S1\n" \
                              "c:L5S1\n" \
                              "a:L4L6S0S1\n" \
                              "t:L7S0S1"
        self.assertEqual(expected_po_content, actual_po_content)
    def test_1_typical_poagraph(self):
        po_path = self.po_files_dir.joinpath("test_1.po")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
            graph.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            graph.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
            graph.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
            graph.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
            graph.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
            graph.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
            graph.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
            graph.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
            graph.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
            graph.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
            graph.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
            graph.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
            graph.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
            graph.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
            graph.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(
                msa.SequenceID('seq0'),
                [graph.SeqPath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [1, 2, 5, 6, 7, 9])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [3, 4, 6, 7, 10, 12, 14, 17])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'),
                           [graph.SeqPath([*map(nid, [11, 13, 14, 15])])],
                           graph.SequenceMetadata({'group': '2'}))
        }

        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        nodes, sequences = po2poagraph.get_poagraph(
            msa.Po(pathtools.get_file_content_stringio(po_path), po_path),
            self.metadatacsv)
        actual_poagraph = graph.Poagraph(nodes, sequences)
        self.assertEqual(expected_poagraph, actual_poagraph)
Exemplo n.º 10
0
    def test_2_missing_sequence_end(self):
        maf_path = self.maf_files_dir.joinpath(
            "test_2_missing_sequence_end.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'),
                       aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=graph.Base('G'),
                       aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=graph.Base('C'),
                       aligned_to=nid(3)),
            graph.Node(node_id=nid(3), base=graph.Base('G'),
                       aligned_to=nid(2)),
            graph.Node(node_id=nid(4), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(5), base=graph.Base('A'),
                       aligned_to=nid(6)),
            graph.Node(node_id=nid(6), base=graph.Base('C'),
                       aligned_to=nid(5)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(10), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(11), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(12), base=graph.Base('T'), aligned_to=None),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [0, 2, 4, 5, 8, 9, 10])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [1, 3, 4, 6, 7, 11, 12])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider, self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
Exemplo n.º 11
0
def _get_children_nodes_looping(node: tree.AffinityNode,
                                poagraph: graph.Poagraph,
                                output_dir: Path,
                                blosum_path: Path,
                                p: parameters.P,
                                current_max_affinity_node_id: int) -> List[tree.AffinityNode]:
    """Generates children of given Affinity Tree node."""

    children_nodes: List[tree.AffinityNode] = []
    not_assigned_sequences_ids: List[msa.SequenceID] = node.sequences
    detailed_logger.info(f"""Getting children nodes for
                             affinity node {node.id_}...""")

    affinity_node_id = 0
    so_far_cutoffs: List[poagraph.Compatibility] = []
    while not_assigned_sequences_ids:
        detailed_logger.info(f"### Getting child {len(so_far_cutoffs)}...")
        child_ready = False
        attempt = 0
        current_candidates = not_assigned_sequences_ids
        while not child_ready:
            consensus_candidate = poa.get_consensuses(poagraph,
                                                      current_candidates,
                                                      output_dir,
                                                      f"parent_{node.id_}_child_{len(so_far_cutoffs)}_attempt_{attempt}",
                                                      blosum_path,
                                                      parameters.Hbmin(0),
                                                      specific_consensuses_id=[0])[0].path
            compatibilities_to_consensus_candidate = poagraph.get_compatibilities(sequences_ids=not_assigned_sequences_ids,
                                                                                  consensus_path=consensus_candidate,
                                                                                  p=p)
            compatibilities_to_consensus_candidate[msa.SequenceID("parent")] = node.mincomp
            qualified_sequences_ids_candidates, cutoff = _get_qualified_sequences_ids_and_cutoff(
                compatibilities_to_max_c=compatibilities_to_consensus_candidate,
                so_far_cutoffs=so_far_cutoffs,
                splitted_node_id=node.id_)

            if qualified_sequences_ids_candidates == current_candidates or attempt == 10:
                if attempt == 10:
                    detailed_logger.info("Attempt treshold 10 exceeded!")
                affinity_node_id += 1

                affinity_node = tree.AffinityNode(
                    id_=tree.AffinityNodeID(current_max_affinity_node_id + affinity_node_id),
                    parent=node.id_,
                    sequences=qualified_sequences_ids_candidates,
                    mincomp=_get_min_comp(node_sequences_ids=qualified_sequences_ids_candidates,
                                          comps_to_consensus=compatibilities_to_consensus_candidate),
                    consensus=graph.SeqPath(consensus_candidate))
                children_nodes.append(affinity_node)
                not_assigned_sequences_ids = list(set(not_assigned_sequences_ids) - set(qualified_sequences_ids_candidates))
                child_ready = True
                so_far_cutoffs.append(affinity_node.mincomp)
            else:
                current_candidates = qualified_sequences_ids_candidates
                attempt += 1

    detailed_logger.info("Children nodes generated.")

    return children_nodes
Exemplo n.º 12
0
    def test_subpoagraph_construction_full_graph(self):
        nodes = [
            graph.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(2), base=b('C'), aligned_to=None),
            graph.Node(node_id=nid(3), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(4), base=b('T'), aligned_to=None)
        ]

        sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'),
                           [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4])])],
                           graph.SequenceMetadata({'group': '1'}))
        }
        poagraph = graph.Poagraph(nodes, sequences)
        translator = poa._PoagraphPOTranslator(poagraph,
                                               [msa.SequenceID('seq0')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=5\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq0\n" \
                              "SOURCEINFO=5 0 100 -1 seq0\n" \
                              "a:S0\n" \
                              "a:L0S0\n" \
                              "c:L1S0\n" \
                              "a:L2S0\n" \
                              "t:L3S0"
        self.assertEqual(expected_po_content, actual_po_content)
    def test_1_typical_poagraph(self):
        expected_po_content_path = self.po_files_dir.joinpath("test_1.po")

        poagraph_nodes = [graph.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
                          graph.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
                          graph.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
                          graph.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
                          graph.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
                          graph.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
                          graph.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
                          graph.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
                          graph.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
                          graph.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
                          graph.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
                          graph.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
                          graph.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
                          graph.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
                          graph.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
                          graph.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
                          graph.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
                          graph.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
                          ]

        poagraph_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [1, 2, 5, 6, 7, 9])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [3, 4, 6, 7, 10, 12, 14, 17])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [11, 13, 14, 15])])],
                               graph.SequenceMetadata({'group': '1'})),
        }

        poagraph = graph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = po.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
    def test_2_consensuses_and_empty_sequences(self):
        expected_po_content_path = self.po_files_dir.joinpath("test_2.po")

        poagraph_nodes = [graph.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)),
                          graph.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)),
                          graph.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)),
                          graph.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
                          graph.Node(node_id=nid(4), base=bid('C'), aligned_to=None),
                          graph.Node(node_id=nid(5), base=bid('T'), aligned_to=None),
                          graph.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)),
                          graph.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)),
                          graph.Node(node_id=nid(8), base=bid('G'), aligned_to=None)
                          ]

        poagraph_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [0, 3, 4, 5, 6, 8])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [1, 2, 4, 5, 7, 8])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('CONSENS0'):
                graph.Sequence(msa.SequenceID('CONSENS0'),
                               [graph.SeqPath([*map(nid, [0, 3, 4, 5, 7, 8])])],
                               None),
            msa.SequenceID('CONSENS1'):
                graph.Sequence(msa.SequenceID('CONSENS1'),
                               [graph.SeqPath([*map(nid, [1, 2, 4, 5, 6, 8])])],
                               None),
        }

        poagraph = graph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = po.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
Exemplo n.º 15
0
    def test_2_consensuses_and_empty_sequences(self):
        po_path = self.po_files_dir.joinpath("test_2.po")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)),
            graph.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            graph.Node(node_id=nid(4), base=bid('C'), aligned_to=None),
            graph.Node(node_id=nid(5), base=bid('T'), aligned_to=None),
            graph.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)),
            graph.Node(node_id=nid(8), base=bid('G'), aligned_to=None)
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'),
                           [graph.SeqPath([*map(nid, [0, 3, 4, 5, 6, 8])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [1, 2, 4, 5, 7, 8])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'), [],
                           graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('CONSENS0'):
            graph.Sequence(msa.SequenceID('CONSENS0'),
                           [graph.SeqPath([*map(nid, [0, 3, 4, 5, 7, 8])])],
                           graph.SequenceMetadata({})),
            msa.SequenceID('CONSENS1'):
            graph.Sequence(msa.SequenceID('CONSENS1'),
                           [graph.SeqPath([*map(nid, [1, 2, 4, 5, 6, 8])])],
                           graph.SequenceMetadata({}))
        }

        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        nodes, sequences = po2poagraph.get_poagraph(
            msa.Po(pathtools.get_file_content_stringio(po_path), po_path),
            self.metadatacsv)
        actual_poagraph = graph.Poagraph(nodes, sequences)
        self.assertEqual(expected_poagraph, actual_poagraph)
Exemplo n.º 16
0
    def setUp(self):
        nodes = [
            graph.Node(
                node_id=nid(0),
                base=b('T'),
                aligned_to=None,
            ),
            graph.Node(node_id=nid(1), base=b('A'), aligned_to=nid(2)),
            graph.Node(node_id=nid(2), base=b('G'), aligned_to=nid(1)),
            graph.Node(node_id=nid(3), base=b('A'), aligned_to=nid(4)),
            graph.Node(node_id=nid(4), base=b('C'), aligned_to=nid(3)),
            graph.Node(node_id=nid(5), base=b('A'), aligned_to=nid(6)),
            graph.Node(node_id=nid(6), base=b('C'), aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=b('G'), aligned_to=nid(8)),
            graph.Node(node_id=nid(8), base=b('T'), aligned_to=nid(5)),
            graph.Node(node_id=nid(9), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(10), base=b('C'), aligned_to=nid(11)),
            graph.Node(node_id=nid(11), base=b('T'), aligned_to=nid(10)),
            graph.Node(node_id=nid(12), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(13), base=b('A'), aligned_to=nid(14)),
            graph.Node(node_id=nid(14), base=b('C'), aligned_to=nid(13))
        ]

        sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(
                msa.SequenceID('seq0'),
                [graph.SeqPath([*map(nid, [0, 1, 3, 5, 9, 10, 13])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [1, 3, 6, 9, 11])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'),
                           [graph.SeqPath([*map(nid, [2, 4, 7, 9, 11, 12])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq3'):
            graph.Sequence(
                msa.SequenceID('seq3'),
                [graph.SeqPath([*map(nid, [2, 4, 8, 9, 11, 12, 14])])],
                graph.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = graph.Poagraph(nodes, sequences)
    def test_06_1st_block_separates_into_2_branches_which_connect_in_3rd_block(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_6_1st_block_separates_into_2_branches_which_connect_in_3rd_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=nid(2)),
            graph.Node(node_id=nid(2), base=graph.Base('G'), aligned_to=nid(0)),
            graph.Node(node_id=nid(3), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('A'), aligned_to=nid(5)),
            graph.Node(node_id=nid(5), base=graph.Base('T'), aligned_to=nid(4)),

            graph.Node(node_id=nid(6), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(7), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=nid(9)),
            graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=nid(10)),
            graph.Node(node_id=nid(10), base=graph.Base('T'), aligned_to=nid(8)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [0, 3, 4, 8])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [1, 3, 5, 6, 7, 9])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [2, 3, 5, 10])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_02_seq_starts_in_second_block(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_2_seq_starts_in_second_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None, block_id=bid(0)),
            graph.Node(node_id=nid(1), base=graph.Base('T'), aligned_to=None, block_id=bid(0)),
            graph.Node(node_id=nid(2), base=graph.Base('G'), aligned_to=None, block_id=bid(0)),

            graph.Node(node_id=nid(3), base=graph.Base('T'), aligned_to=None, block_id=bid(1)),

            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=nid(5), block_id=bid(2)),
            graph.Node(node_id=nid(5), base=graph.Base('T'), aligned_to=nid(4), block_id=bid(2)),
            graph.Node(node_id=nid(6), base=graph.Base('A'), aligned_to=None, block_id=bid(2)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None, block_id=bid(2)),
            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None, block_id=bid(2)),

        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [1, 2, 3])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 5, 7])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [3, 4, 6, 8])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_08_reversed_block(self):
        maf_path = self.maf_files_dir.joinpath("test_8_reversed_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            # next block is reversed because it was converted to dag
            graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=nid(6)),
            graph.Node(node_id=nid(6), base=graph.Base('G'), aligned_to=nid(5)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 1, 3, 4, 5, 7, 8, 9])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 8, 9])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 9])])],
                               graph.SequenceMetadata({'group': '2'})),
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
Exemplo n.º 20
0
def _add_node_to_sequence(build_state: _BuildState, seq_id: msa.SequenceID,
                          join_with: graph.NodeID,
                          node_id: graph.NodeID) -> None:
    if len(build_state.sequences[seq_id].paths) == 0 or join_with is None:
        build_state.sequences[seq_id].paths.append(graph.SeqPath([node_id]))
    else:
        for path in build_state.sequences[seq_id].paths:
            if path[-1] == join_with:
                path.append(node_id)
                return

        raise PoagraphBuildException("No path with specified last node id.")
Exemplo n.º 21
0
    def test_1_p_parameter_influence(self, p: at_params.P,
                                     expected_cutoff: graph.Compatibility):
        nodes = [
            graph.Node(node_id=nid(0), base=b('T'), aligned_to=None),
            graph.Node(node_id=nid(1), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(2), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(3), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(4), base=b('C'), aligned_to=None),
            graph.Node(node_id=nid(5), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(6), base=b('C'), aligned_to=None),
            graph.Node(node_id=nid(7), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(8), base=b('T'), aligned_to=None),
            graph.Node(node_id=nid(9), base=b('A'), aligned_to=None)
        ]

        sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [
                graph.SeqPath(
                    [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 18, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'), [
                graph.SeqPath(
                    [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 8, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'), [
                graph.SeqPath(
                    [*map(nid, [10, 11, 12, 13, 14, 15, 16, 7, 8, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [
                graph.SeqPath([*map(nid, [10, 11, 12, 3, 4, 5, 6, 7, 8, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq4'):
            graph.Sequence(
                msa.SequenceID('seq3'),
                [graph.SeqPath([*map(nid, [10, 11, 2, 3, 4, 5, 6, 7, 8, 9])])],
                graph.SequenceMetadata({}))
        }

        poagraph = graph.Poagraph(nodes, sequences)

        consensus_path = graph.SeqPath(
            [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19])])
        compatibilities = poagraph.get_compatibilities(
            poagraph.get_sequences_ids(), consensus_path, p)

        actual_cutoff = at_builders._find_node_cutoff(
            [c for c in compatibilities.values()], []).cutoff
        self.assertAlmostEqual(expected_cutoff.value, actual_cutoff.value)
Exemplo n.º 22
0
def _get_poagraph_paths_and_nodes(po_lines: List[str],
                                  sequences_info: Dict[int, POSequenceInfo],
                                  sequences: Dict[msa.SequenceID, graph.Sequence]) -> \
        Tuple[List[graph.Node], Dict[msa.SequenceID, graph.Sequence]]:
    nodes_count = int(_extract_line_value(po_lines[3]))
    paths_count = int(_extract_line_value(po_lines[4]))
    nodes: List[graph.Node] = [None] * nodes_count
    node_id = 0
    for i in range(5 + paths_count * 2, 5 + paths_count * 2 + nodes_count):
        node_line = po_lines[i]
        base = graph.Base(node_line[0].upper())
        in_nodes, po_sequences_ids, aligned_to = _extract_node_parameters(node_line)
        sequences_ids = [sequences_info[po_sequences_id].name
                         for po_sequences_id in po_sequences_ids]
        nodes[node_id] = graph.Node(graph.NodeID(node_id),
                                    base,
                                    graph.NodeID(aligned_to))
        for seq_id in sequences_ids:
            if len(sequences[seq_id].paths) == 1:
                sequences[seq_id].paths[0].append(graph.NodeID(node_id))
            else:
                sequences[seq_id].paths.append(graph.SeqPath([graph.NodeID(node_id)]))
        node_id += 1
    return nodes, sequences
Exemplo n.º 23
0
    def test_1_messy_sequences(self):
        maf_path = self.maf_files_dir.joinpath("test_1_messy_sequences.maf")
        expected_nodes = [
            graph.Node(node_id=nid(0),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(1),
                       base=graph.Base('A'),
                       aligned_to=nid(2),
                       block_id=bid(0)),
            graph.Node(node_id=nid(2),
                       base=graph.Base('C'),
                       aligned_to=nid(1),
                       block_id=bid(0)),
            graph.Node(node_id=nid(3),
                       base=graph.Base('T'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(4),
                       base=graph.Base('C'),
                       aligned_to=nid(5),
                       block_id=bid(0)),
            graph.Node(node_id=nid(5),
                       base=graph.Base('G'),
                       aligned_to=nid(4),
                       block_id=bid(0)),
            graph.Node(node_id=nid(6),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(7),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(8),
                       base=graph.Base('G'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(9),
                       base=graph.Base('C'),
                       aligned_to=nid(10),
                       block_id=bid(2)),
            graph.Node(node_id=nid(10),
                       base=graph.Base('G'),
                       aligned_to=nid(9),
                       block_id=bid(2)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('T'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(12),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(13),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(14),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(2)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(
                msa.SequenceID('seq0'),
                [graph.SeqPath([*map(nid, [1, 3, 4, 6, 8, 9, 11, 12])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [2, 3, 4, 10, 11, 12, 13, 14])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [0, 2, 5, 6, 7, 10, 11, 12, 14])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        actual_nodes, actual_sequences = maf2poagraph.get_poagraph(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.metadatacsv)

        self.assertEqual(expected_nodes, actual_nodes)
        self.assertEqual(expected_sequences, actual_sequences)