def test_05_single_block_single_nucletodide(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_5_single_block_single_nucletodide.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None, block_id=bid(0))
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [0])])],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_7_missing_one_reverted_sequence_middle_minus1_minus1(self):
        maf_path = self.maf_files_dir.joinpath(
            "test_7_missing_one_reverted_sequence_middle_minus1_minus1.maf")

        expected_nodes = [
            # block 0
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(3), base=graph.Base('A'), aligned_to=None),

            # missing seq2
            graph.Node(node_id=nid(4),
                       base=graph.Base(self.missing_n.value),
                       aligned_to=None),
            graph.Node(node_id=nid(5),
                       base=graph.Base(self.missing_n.value),
                       aligned_to=None),

            # block 1
            graph.Node(node_id=nid(6), base=graph.Base('A'),
                       aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=graph.Base('G'),
                       aligned_to=nid(6)),
            graph.Node(node_id=nid(8), base=graph.Base('C'),
                       aligned_to=nid(9)),
            graph.Node(node_id=nid(9), base=graph.Base('G'),
                       aligned_to=nid(8)),
            graph.Node(node_id=nid(10),
                       base=graph.Base('C'),
                       aligned_to=nid(11)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('T'),
                       aligned_to=nid(10)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [0, 1, 2, 3, 7, 9, 11])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [0, 1, 4, 5, 6, 8, 10])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            missings.ConstBaseProvider(self.missing_n), self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_04_single_block_no_nucleotides(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_4_single_block_no_nucleotides.maf")

        expected_nodes = []

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_10_parallel_blocks_1st_and_2nd_merge_into_3rd(self):
        maf_path = self.maf_files_dir.joinpath("test_10_parallel_blocks_1st_and_2nd_merge_into_3rd.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('G'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=graph.Base('T'), aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(3), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('C'), aligned_to=nid(5)),
            graph.Node(node_id=nid(5), base=graph.Base('G'), aligned_to=nid(4)),
            graph.Node(node_id=nid(6), base=graph.Base('C'), aligned_to=None),

            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(10), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(11), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(12), base=graph.Base('C'), aligned_to=nid(13)),
            graph.Node(node_id=nid(13), base=graph.Base('G'), aligned_to=nid(12)),
            graph.Node(node_id=nid(14), base=graph.Base('C'), aligned_to=nid(15)),
            graph.Node(node_id=nid(15), base=graph.Base('G'), aligned_to=nid(16)),
            graph.Node(node_id=nid(16), base=graph.Base('T'), aligned_to=nid(14)),
            graph.Node(node_id=nid(17), base=graph.Base('A'), aligned_to=nid(18)),
            graph.Node(node_id=nid(18), base=graph.Base('T'), aligned_to=nid(17)),
            graph.Node(node_id=nid(19), base=graph.Base('A'), aligned_to=nid(20)),
            graph.Node(node_id=nid(20), base=graph.Base('C'), aligned_to=nid(19)),
            graph.Node(node_id=nid(21), base=graph.Base('C'), aligned_to=nid(22)),
            graph.Node(node_id=nid(22), base=graph.Base('G'), aligned_to=nid(21)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [7, 8, 9, 10, 11, 12, 15, 18, 19, 21])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [7, 8, 9, 10, 11, 12, 15, 18, 19, 21])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0, 2, 3, 4, 6, 13, 16, 17, 20, 21])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [1, 2, 3, 5, 6, 13, 14, 17, 20, 22])])],
                               graph.SequenceMetadata({'group': '2'})),
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_09_inactive_edges_but_all_strands_plus(self):
        maf_path = self.maf_files_dir.joinpath("test_9_inactive_edges_but_all_strands_plus.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(6), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(7), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(10), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(11), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(12), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(13), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(14), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(15), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(16), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(17), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(18), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(19), base=graph.Base('G'), aligned_to=None),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 10, 11, 12, 13, 14])]),
                                graph.SeqPath([*map(nid, [5, 6, 7, 8, 9, 15, 16, 17, 18, 19])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                                          13, 14, 15, 16, 17, 18, 19])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'})),
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
Пример #6
0
    def test_6_missing_one_reverted_sequence_middle_minus1_1(self):
        maf_path = self.maf_files_dir.joinpath(
            "test_6_missing_one_reverted_sequence_middle_minus1_1.maf")

        expected_nodes = [
            # block 1 because it is first in DAG and reverted
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('C'),
                       aligned_to=nid(3)),
            graph.Node(node_id=nid(3), base=graph.Base('T'),
                       aligned_to=nid(2)),

            # missing seq2, on edge (-1,1)
            graph.Node(node_id=nid(4), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(5), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(6), base=graph.Base('A'),
                       aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=graph.Base('C'),
                       aligned_to=nid(6)),
            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(10),
                       base=graph.Base('A'),
                       aligned_to=nid(11)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('C'),
                       aligned_to=nid(10)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'), [
                graph.SeqPath([*map(nid, [0, 1, 2])]),
                graph.SeqPath([*map(nid, [6, 8, 9, 10])])
            ], graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [0, 1, 3, 4, 5, 7, 11])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider, self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
Пример #7
0
def run_pangtree(maf_path: Path, fasta_path: Path, output_dir: Path,
                 po_output: bool) -> None:
    output_dir = pathtools.get_child_dir(output_dir,
                                         pathtools.get_current_time())
    print(f"Runing pangtree for maf: {maf_path} and fasta: {fasta_path} "
          f"Output in: {output_dir}, include po file: {po_output}.")

    fasta_provider = missings.FromFile(fasta_path)
    maf = msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path)
    poagraph, dagmaf = builder.build_from_dagmaf(maf, fasta_provider)
    for p in p_values:
        current_output_dir = pathtools.get_child_dir(output_dir,
                                                     str(p).replace(".", "_"))
        stop = at_params.Stop(0.99)
        at = at_builders.build_affinity_tree(poagraph, None,
                                             current_output_dir, stop,
                                             at_params.P(p), True)

        at_newick = at.as_newick(None, separate_leaves=True)

        pathtools.save_to_file(
            at_newick,
            pathtools.get_child_path(current_output_dir,
                                     "affinity_tree.newick"))

        if po_output:
            pangenome_po = po.poagraph_to_PangenomePO(poagraph)
            pathtools.save_to_file(
                pangenome_po,
                pathtools.get_child_path(current_output_dir, "poagraph.po"))

        task_params = json.TaskParameters(
            multialignment_file_path=str(maf_path),
            multialignment_format="maf",
            datatype="nucleotides",
            blosum_file_path="",
            output_path=current_output_dir,
            fasta_provider=fasta_provider,
            fasta_source_file=fasta_path,
            consensus_type="tree",
            stop=str(stop),
            p=str(p),
            output_with_nodes=False)
        pangenomejson = json.to_PangenomeJSON(task_parameters=task_params,
                                              poagraph=poagraph,
                                              dagmaf=dagmaf,
                                              affinity_tree=at)

        pangenome_json_str = json.to_json(pangenomejson)
        pathtools.save_to_file(
            pangenome_json_str,
            pathtools.get_child_path(current_output_dir, "pangenome.json"))
Пример #8
0
    def test_2_missing_sequence_end(self):
        maf_path = self.maf_files_dir.joinpath(
            "test_2_missing_sequence_end.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'),
                       aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=graph.Base('G'),
                       aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=graph.Base('C'),
                       aligned_to=nid(3)),
            graph.Node(node_id=nid(3), base=graph.Base('G'),
                       aligned_to=nid(2)),
            graph.Node(node_id=nid(4), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(5), base=graph.Base('A'),
                       aligned_to=nid(6)),
            graph.Node(node_id=nid(6), base=graph.Base('C'),
                       aligned_to=nid(5)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(10), base=graph.Base('T'), aligned_to=None),
            graph.Node(node_id=nid(11), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(12), base=graph.Base('T'), aligned_to=None),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [0, 2, 4, 5, 8, 9, 10])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [1, 3, 4, 6, 7, 11, 12])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider, self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_10_metadata_feed_to_alignment_from_csv(self, test_name, maf_name,
                                                    csv_name, po_name,
                                                    expected_metadata):
        maf_path = self.alignment_files_dir.joinpath(maf_name)
        csv_path = self.csv_files_dir.joinpath(csv_name)
        po_path = self.alignment_files_dir.joinpath(po_name)

        poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path),
                            csv_path))
        actual_metadata = {
            seq_id: seq.seqmetadata
            for seq_id, seq in poagraph.sequences.items()
        }
        self.assertEqual(expected_metadata, actual_metadata)

        poagraph = builder.build_from_maf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path),
                            csv_path))
        actual_metadata = {
            seq_id: seq.seqmetadata
            for seq_id, seq in poagraph.sequences.items()
        }
        self.assertEqual(expected_metadata, actual_metadata)

        poagraph = builder.build_from_po(
            msa.Po(pathtools.get_file_content_stringio(po_path), maf_path),
            msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path),
                            csv_path))
        actual_metadata = {
            seq_id: seq.seqmetadata
            for seq_id, seq in poagraph.sequences.items()
        }
        self.assertEqual(expected_metadata, actual_metadata)
    def test_06_1st_block_separates_into_2_branches_which_connect_in_3rd_block(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_6_1st_block_separates_into_2_branches_which_connect_in_3rd_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=nid(2)),
            graph.Node(node_id=nid(2), base=graph.Base('G'), aligned_to=nid(0)),
            graph.Node(node_id=nid(3), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('A'), aligned_to=nid(5)),
            graph.Node(node_id=nid(5), base=graph.Base('T'), aligned_to=nid(4)),

            graph.Node(node_id=nid(6), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(7), base=graph.Base('G'), aligned_to=None),

            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=nid(9)),
            graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=nid(10)),
            graph.Node(node_id=nid(10), base=graph.Base('T'), aligned_to=nid(8)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [0, 3, 4, 8])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [1, 3, 5, 6, 7, 9])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [2, 3, 5, 10])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_02_seq_starts_in_second_block(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_2_seq_starts_in_second_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None, block_id=bid(0)),
            graph.Node(node_id=nid(1), base=graph.Base('T'), aligned_to=None, block_id=bid(0)),
            graph.Node(node_id=nid(2), base=graph.Base('G'), aligned_to=None, block_id=bid(0)),

            graph.Node(node_id=nid(3), base=graph.Base('T'), aligned_to=None, block_id=bid(1)),

            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=nid(5), block_id=bid(2)),
            graph.Node(node_id=nid(5), base=graph.Base('T'), aligned_to=nid(4), block_id=bid(2)),
            graph.Node(node_id=nid(6), base=graph.Base('A'), aligned_to=None, block_id=bid(2)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None, block_id=bid(2)),
            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None, block_id=bid(2)),

        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [1, 2, 3])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 5, 7])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [3, 4, 6, 8])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_08_reversed_block(self):
        maf_path = self.maf_files_dir.joinpath("test_8_reversed_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            # next block is reversed because it was converted to dag
            graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=nid(6)),
            graph.Node(node_id=nid(6), base=graph.Base('G'), aligned_to=nid(5)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 1, 3, 4, 5, 7, 8, 9])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 8, 9])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 9])])],
                               graph.SequenceMetadata({'group': '2'})),
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_1_messy_sequences(self):
        maf_path = self.maf_files_dir.joinpath("test_1_messy_sequences.maf")
        expected_nodes = [
            graph.Node(node_id=nid(0),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(1),
                       base=graph.Base('A'),
                       aligned_to=nid(2),
                       block_id=bid(0)),
            graph.Node(node_id=nid(2),
                       base=graph.Base('C'),
                       aligned_to=nid(1),
                       block_id=bid(0)),
            graph.Node(node_id=nid(3),
                       base=graph.Base('T'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(4),
                       base=graph.Base('C'),
                       aligned_to=nid(5),
                       block_id=bid(0)),
            graph.Node(node_id=nid(5),
                       base=graph.Base('G'),
                       aligned_to=nid(4),
                       block_id=bid(0)),
            graph.Node(node_id=nid(6),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(7),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(8),
                       base=graph.Base('G'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(9),
                       base=graph.Base('C'),
                       aligned_to=nid(10),
                       block_id=bid(2)),
            graph.Node(node_id=nid(10),
                       base=graph.Base('G'),
                       aligned_to=nid(9),
                       block_id=bid(2)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('T'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(12),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(13),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(14),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(2)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(
                msa.SequenceID('seq0'),
                [graph.SeqPath([*map(nid, [1, 3, 4, 6, 8, 9, 11, 12])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [2, 3, 4, 10, 11, 12, 13, 14])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [0, 2, 5, 6, 7, 10, 11, 12, 14])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        actual_nodes, actual_sequences = maf2poagraph.get_poagraph(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.metadatacsv)

        self.assertEqual(expected_nodes, actual_nodes)
        self.assertEqual(expected_sequences, actual_sequences)