def test_subpangraph_construction_full_graph(self): nodes = [ pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None), pNode.Node(node_id=nid(1), base=b('A'), aligned_to=None), pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None), pNode.Node(node_id=nid(3), base=b('A'), aligned_to=None), pNode.Node(node_id=nid(4), base=b('T'), aligned_to=None) ] sequences = { pSeq.SequenceID('seq0'): pSeq.Sequence(pSeq.SequenceID('seq0'), [pSeq.SequencePath([*map(nid, [0, 1, 2, 3, 4])])], pSeq.SequenceMetadata({'group': '1'})) } poagraph = pPoagraph.Poagraph(nodes, sequences) translator = poa.PoagraphPOTranslator(poagraph, [pSeq.SequenceID('seq0')]) actual_po_content = translator.get_input_po_content() expected_po_content = "VERSION=pangenome\n" \ "NAME=pangenome\n" \ "TITLE=pangenome\n" \ "LENGTH=5\n" \ "SOURCECOUNT=1\n" \ "SOURCENAME=seq0\n" \ "SOURCEINFO=5 0 100 -1 seq0\n" \ "a:S0\n" \ "a:L0S0\n" \ "c:L1S0\n" \ "a:L2S0\n" \ "t:L3S0" self.assertEqual(expected_po_content, actual_po_content)
def test_subpangraph_should_omit_edges_2(self): nodes = [ pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None), pNode.Node(node_id=nid(1), base=b('C'), aligned_to=None), pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None) ] sequences = { pSeq.SequenceID('seq1'): pSeq.Sequence(pSeq.SequenceID('seq1'), [pSeq.SequencePath([*map(nid, [0, 2])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq2'): pSeq.Sequence(pSeq.SequenceID('seq2'), [pSeq.SequencePath([*map(nid, [0, 1, 2])])], pSeq.SequenceMetadata({'group': '1'})) } poagraph = pPoagraph.Poagraph(nodes, sequences) translator = poa.PoagraphPOTranslator(poagraph, [pSeq.SequenceID('seq1')]) actual_po_content = translator.get_input_po_content() expected_po_content = "VERSION=pangenome\n" \ "NAME=pangenome\n" \ "TITLE=pangenome\n" \ "LENGTH=2\n" \ "SOURCECOUNT=1\n" \ "SOURCENAME=seq1\n" \ "SOURCEINFO=2 0 100 -1 seq1\n" \ "a:S0\n" \ "c:L0S0" self.assertEqual(expected_po_content, actual_po_content)
def test_2_three_sequences(self): fasta_path = self.fasta_dir + "test_2_three_sequences.fasta" fasta_provider = FromFile(Path(fasta_path)) sequence_id_1 = pSeq.SequenceID("seq1") self.raise_error_if_unequal(sequence_id_1, "ACTGGGTGGGA", fasta_provider) sequence_id_2 = pSeq.SequenceID("seq2") self.raise_error_if_unequal(sequence_id_2, "AA", fasta_provider) sequence_id_3 = pSeq.SequenceID("seq3") self.raise_error_if_unequal(sequence_id_3, "GT", fasta_provider)
class FromNCBITests(unittest.TestCase): def setUp(self) -> None: self.fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False) @unittest.skip("slow test - internet connection required") def test_0_get_10th_symbol_of_AB050936v1(self): sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False) actual_base = self.fasta_provider.get_base(sequence_id, 10) path = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta') expected_base = self.read_sequence(path)[10] self.assertEqual(expected_base, actual_base) @unittest.skip("slow test - internet connection required") def test_1_download_AB050936v1(self): fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False) sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False) actual_sequence = fasta_provider._download_from_ncbi(sequence_id) p = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta') expected_sequence = self.read_sequence(p) self.assertEqual(expected_sequence, actual_sequence) @unittest.skip("slow test - internet connection required") def test_2_failed_download(self): fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False) sequence_id = "" with self.assertRaises(Exception) as err: _ = fasta_provider._download_from_ncbi(sequence_id) self.assertEqual( str(err), f"Cannot download from Entrez sequence of ID: {sequence_id}") @data((pSeq.SequenceID("plain", False), "plain"), (pSeq.SequenceID("with.dot", False), "with.dot"), (pSeq.SequenceID("with.two.dots", False), "with.two.dots"), (pSeq.SequenceID("withv1", False), "with.1")) @unpack def test_3_guess_entrez_id(self, sequenceID: pSeq.SequenceID, expected_guessed_entrez_id: str): fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False) actual_guessed_entrez_id = fasta_provider._guess_ncbi_sequence_id( sequenceID) self.assertEqual(expected_guessed_entrez_id, actual_guessed_entrez_id) def read_sequence(self, path: Path): with open(path) as fasta_file_hanlder: _ = fasta_file_hanlder.readline() return fasta_file_hanlder.read().upper().replace("\n", "")
def test_1_download_sequence_and_save_to_cache(self): fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=True) cache_dir_path = pathtools.get_child_path(Path.cwd(), ".fastacache") if cache_dir_path.exists(): shutil.rmtree(cache_dir_path) sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False) actual_sequence = fasta_provider.get_base(sequence_id, 15) # cache directory creation cache_directory_created = cache_dir_path.exists() self.assertTrue(cache_directory_created) # file creation files_in_cache_dircetory = [*cache_dir_path.glob("*")] expected_filepath = pathtools.get_child_path(cache_dir_path, f"{sequence_id}.fasta") file_created_in_cache = expected_filepath in files_in_cache_dircetory self.assertTrue(file_created_in_cache) # file content control_fasta_path = Path( 'tests/data/fasta_providers/fasta_ncbi/AB050936.1.fasta') with open(control_fasta_path) as fasta_file_hanlder: expected_content = fasta_file_hanlder.read() with open(expected_filepath) as fasta_file_handler: actual_content = fasta_file_handler.read() self.assertEqual(expected_content, actual_content)
def test_1_download_AB050936v1(self): fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=False) sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False) actual_sequence = fasta_provider._download_from_ncbi(sequence_id) p = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta') expected_sequence = self.read_sequence(p) self.assertEqual(expected_sequence, actual_sequence)
def test_1_one_sequence(self): fasta_path = self.fasta_dir + "test_1_one_sequence.fasta" fasta_provider = FromFile(Path(fasta_path)) sequence_id = pSeq.SequenceID("seq1") expected_sequence = self.read_sequence(fasta_path) self.raise_error_if_unequal(sequence_id, expected_sequence, fasta_provider)
def test_1_one_sequence_one_file_in_zip(self): fasta_path = self.fasta_dir + "test_1_one_sequence_one_file_in_zip.zip" fasta_provider = FromFile(Path(fasta_path)) sequence_id = pSeq.SequenceID("seq1") expected_sequence = "ACTGGGTGGGA" self.raise_error_if_unequal(sequence_id, expected_sequence, fasta_provider)
def test_1_typical_pangraph(self): expected_po_content_path = Path(self.po_files_dir + "test_1.po") poagraph_nodes = [ pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)), pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)), pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)), pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)), pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)), pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)), pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None), pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None), pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)), pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)), pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)), pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)), pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)), pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)), pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None), pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)), pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)), pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15)) ] poagraph_sequences = { pSeq.SequenceID('seq0'): pSeq.Sequence(pSeq.SequenceID('seq0'), [ pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])]) ], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq1'): pSeq.Sequence(pSeq.SequenceID('seq1'), [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 9])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq2'): pSeq.Sequence( pSeq.SequenceID('seq2'), [pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12, 14, 17])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq3'): pSeq.Sequence(pSeq.SequenceID('seq3'), [pSeq.SequencePath([*map(nid, [11, 13, 14, 15])])], pSeq.SequenceMetadata({'group': '1'})), } poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences) actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph) expected_po_content = pathtools.get_file_content( expected_po_content_path) self.assertEqual(expected_po_content, actual_po_content)
def test_subpangraph_construction_from_pangraph_keep_seq_0_1(self): translator = poa.PoagraphPOTranslator( self.poagraph, [pSeq.SequenceID('seq0'), pSeq.SequenceID('seq1')]) actual_po_content = translator.get_input_po_content() expected_po_content = "VERSION=pangenome\n"\ "NAME=pangenome\n"\ "TITLE=pangenome\n"\ "LENGTH=9\n"\ "SOURCECOUNT=2\n"\ "SOURCENAME=seq0\n"\ "SOURCEINFO=7 0 0 -1 seq0\n"\ "SOURCENAME=seq1\n"\ "SOURCEINFO=5 1 100 -1 seq1\n"\ "t:S0\n"\ "a:L0S0S1\n"\ "a:L1S0S1\n"\ "a:L2S0A4\n"\ "c:L2S1A3\n"\ "a:L3L4S0S1\n"\ "c:L5S0A7\n"\ "t:L5S1A6\n"\ "a:L6S0" self.assertEqual(expected_po_content, actual_po_content)
def setUp(self): nodes = [ pNode.Node( node_id=nid(0), base=b('T'), aligned_to=None, ), pNode.Node(node_id=nid(1), base=b('A'), aligned_to=nid(2)), pNode.Node(node_id=nid(2), base=b('G'), aligned_to=nid(1)), pNode.Node(node_id=nid(3), base=b('A'), aligned_to=nid(4)), pNode.Node(node_id=nid(4), base=b('C'), aligned_to=nid(3)), pNode.Node(node_id=nid(5), base=b('A'), aligned_to=nid(6)), pNode.Node(node_id=nid(6), base=b('C'), aligned_to=nid(7)), pNode.Node(node_id=nid(7), base=b('G'), aligned_to=nid(8)), pNode.Node(node_id=nid(8), base=b('T'), aligned_to=nid(5)), pNode.Node(node_id=nid(9), base=b('A'), aligned_to=None), pNode.Node(node_id=nid(10), base=b('C'), aligned_to=nid(11)), pNode.Node(node_id=nid(11), base=b('T'), aligned_to=nid(10)), pNode.Node(node_id=nid(12), base=b('G'), aligned_to=None), pNode.Node(node_id=nid(13), base=b('A'), aligned_to=nid(14)), pNode.Node(node_id=nid(14), base=b('C'), aligned_to=nid(13)) ] sequences = { pSeq.SequenceID('seq0'): pSeq.Sequence( pSeq.SequenceID('seq0'), [pSeq.SequencePath([*map(nid, [0, 1, 3, 5, 9, 10, 13])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq1'): pSeq.Sequence(pSeq.SequenceID('seq1'), [pSeq.SequencePath([*map(nid, [1, 3, 6, 9, 11])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq2'): pSeq.Sequence( pSeq.SequenceID('seq2'), [pSeq.SequencePath([*map(nid, [2, 4, 7, 9, 11, 12])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq3'): pSeq.Sequence( pSeq.SequenceID('seq3'), [pSeq.SequencePath([*map(nid, [2, 4, 8, 9, 11, 12, 14])])], pSeq.SequenceMetadata({'group': '1'})), } self.poagraph = pPoagraph.Poagraph(nodes, sequences)
def setUp(self): self.fasta_dir = 'tests/output/fasta_files/' poagraph_nodes = [ pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)), pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)), pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)), pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)), pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)), pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)), pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None), pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None), pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)), pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)), pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)), pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)), pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)), pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)), pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None), pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)), pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)), pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15)) ] poagraph_sequences = { pSeq.SequenceID('seq0'): pSeq.Sequence(pSeq.SequenceID('seq0'), [ pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])]) ], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq1'): pSeq.Sequence(pSeq.SequenceID('seq1'), [], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq2'): pSeq.Sequence(pSeq.SequenceID('seq2'), [ pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12])]), pSeq.SequencePath([*map(nid, [14, 17])]) ], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq3'): pSeq.Sequence(pSeq.SequenceID('seq3'), [ pSeq.SequencePath([*map(nid, [11])]), pSeq.SequencePath([*map(nid, [13, 14, 15])]) ], pSeq.SequenceMetadata({'group': '1'})), } self.poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)
def test_read_consensus_path_seq1_only_in_input(self): translator = poa.PoagraphPOTranslator(self.poagraph, [pSeq.SequenceID('seq1')]) _ = translator.get_input_po_content() poa_lines = [ "VERSION=pangenome\n", "NAME=pangenome\n", "TITLE=pangenome\n", "LENGTH=5\n", "SOURCECOUNT=2\n", "SOURCENAME=seq1\n", "SOURCEINFO=5 0 100 0 seq1\n", "SOURCENAME=CONSENS0\n", "SOURCEINFO=5 0 100 0 CONSENS0\n", "a:S0S1\n", "a:L0S0S1\n", "c:L1S0S1\n", "a:L2S0S1\n", "t:L2S0S1" ] actual_consensus_path = translator.read_consensus_paths(poa_lines, [0]) expected_consensus_path = [1, 3, 6, 9, 11] self.assertEqual(expected_consensus_path, actual_consensus_path[0].path)
def test_subpangraph_unfilled_nodes(self): symbol_for_uknown = '?' nodes = [ pNode.Node(node_id=nid(0), base=b('A'), aligned_to=nid(1)), pNode.Node(node_id=nid(1), base=b('C'), aligned_to=nid(0)), pNode.Node(node_id=nid(2), base=b('G'), aligned_to=None), pNode.Node(node_id=nid(3), base=b(symbol_for_uknown), aligned_to=None), pNode.Node(node_id=nid(4), base=b(symbol_for_uknown), aligned_to=None), pNode.Node(node_id=nid(5), base=b('G'), aligned_to=None), pNode.Node(node_id=nid(6), base=b('C'), aligned_to=None), pNode.Node(node_id=nid(7), base=b('A'), aligned_to=None), pNode.Node(node_id=nid(5), base=b('T'), aligned_to=None) ] sequences = { pSeq.SequenceID('seq1'): pSeq.Sequence(pSeq.SequenceID('seq1'), [pSeq.SequencePath([*map(nid, [0, 2, 3, 4, 7, 8])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq2'): pSeq.Sequence(pSeq.SequenceID('seq2'), [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 8])])], pSeq.SequenceMetadata({'group': '1'})) } poagraph = pPoagraph.Poagraph(nodes, sequences) translator = poa.PoagraphPOTranslator( poagraph, [pSeq.SequenceID('seq1'), pSeq.SequenceID('seq2')]) actual_po_content = translator.get_input_po_content() expected_po_content = "VERSION=pangenome\n" \ "NAME=pangenome\n" \ "TITLE=pangenome\n" \ "LENGTH=9\n" \ "SOURCECOUNT=2\n" \ "SOURCENAME=seq1\n" \ "SOURCEINFO=6 0 100 -1 seq1\n" \ "SOURCENAME=seq2\n" \ "SOURCEINFO=6 1 100 -1 seq2\n" \ "a:S0A1\n" \ "c:S1A0\n" \ "g:L0L1S0S1\n" \ f"{symbol_for_uknown}:L2S0\n" \ f"{symbol_for_uknown}:L3S0\n" \ "g:L2S1\n" \ "c:L5S1\n" \ "a:L4L6S0S1\n" \ "t:L7S0S1" self.assertEqual(expected_po_content, actual_po_content)
def test_2_read_seqeunce_from_cache_instead_downloading(self): fasta_provider = FromNCBI(EmailAddress('*****@*****.**'), use_cache=True) cache_dir_path = pathtools.get_child_path(Path.cwd(), ".fastacache") if cache_dir_path.exists(): shutil.rmtree(cache_dir_path) cache_dir_path.mkdir() sequence_id = pSeq.SequenceID("seq1") fake_sequence = "foo" expected_base = pNode.Base("o") fake_fasta_path = pathtools.get_child_path(cache_dir_path, f"{sequence_id}.fasta") with open(fake_fasta_path, 'w') as fake_fasta_handler: fake_fasta_handler.write(f">{sequence_id} cached\n{fake_sequence}") actual_base = fasta_provider.get_base(sequence_id, 2) self.assertEqual(expected_base, actual_base)
def test_subpangraph_construction_from_pangraph_keep_seq3(self): translator = poa.PoagraphPOTranslator(self.poagraph, [pSeq.SequenceID('seq3')]) actual_po_content = translator.get_input_po_content() expected_po_content = "VERSION=pangenome\n" \ "NAME=pangenome\n" \ "TITLE=pangenome\n" \ "LENGTH=7\n" \ "SOURCECOUNT=1\n" \ "SOURCENAME=seq3\n" \ "SOURCEINFO=7 0 100 -1 seq3\n" \ "g:S0\n" \ "c:L0S0\n" \ "t:L1S0\n" \ "a:L2S0\n" \ "t:L3S0\n" \ "g:L4S0\n" \ "c:L5S0" self.assertEqual(expected_po_content, actual_po_content)
def test_0_get_10th_symbol_of_AB050936v1(self): sequence_id = pSeq.SequenceID("AB050936.1", skip_part_before_dot=False) actual_base = self.fasta_provider.get_base(sequence_id, 10) path = Path('tests/data/fasta_providers/fasta_files/AB050936.1.fasta') expected_base = self.read_sequence(path)[10] self.assertEqual(expected_base, actual_base)
def test_2_consensuses_tree_fasta(self): expected_consensuses_fasta_path = Path(self.fasta_dir + "consensuses.fasta") consensuses_tree = CT.ConsensusTree() consensuses_tree.nodes = [ # all members set CT.ConsensusNode(consensus_id=CT.ConsensusNodeID(0), parent_node_id=CT.ConsensusNodeID(-1), children_nodes_ids=[ CT.ConsensusNodeID(1), CT.ConsensusNodeID(2) ], sequences_ids=[ pSeq.SequenceID('seq0'), pSeq.SequenceID('seq1'), pSeq.SequenceID('seq2'), pSeq.SequenceID('seq3') ], mincomp=CT.CompatibilityToPath(0.5, P(1)), compatibilities_to_all={ pSeq.SequenceID('seq0'): CT.CompatibilityToPath(1.0, P(1)), pSeq.SequenceID('seq1'): CT.CompatibilityToPath(0.9, P(1)), pSeq.SequenceID('seq2'): CT.CompatibilityToPath(0.95, P(1)), pSeq.SequenceID('seq3'): CT.CompatibilityToPath(0.6, P(1)) }, consensus_path=pSeq.SequencePath([ nid(0), nid(2), nid(5), nid(6), nid(10), nid(12), nid(13), nid(16) ])), # no compatibilities to all, no mincomp CT.ConsensusNode(consensus_id=CT.ConsensusNodeID(1), parent_node_id=CT.ConsensusNodeID(0), sequences_ids=[ pSeq.SequenceID('seq0'), pSeq.SequenceID('seq1'), pSeq.SequenceID('seq2') ], consensus_path=pSeq.SequencePath([ nid(0), nid(2), nid(3), nid(6), nid(10), nid(11), nid(13), nid(17) ])) ] actual_consensuses_fasta_content = PangenomeFASTA.consensuses_tree_to_fasta( self.poagraph, consensuses_tree) expected_consensuses_fasta_content = pathtools.get_file_content( expected_consensuses_fasta_path) self.assertEqual(expected_consensuses_fasta_content, actual_consensuses_fasta_content)
def test_2_consensuses_and_empty_sequences(self): expected_po_content_path = Path(self.po_files_dir + "test_2.po") poagraph_nodes = [ pNode.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)), pNode.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)), pNode.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)), pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)), pNode.Node(node_id=nid(4), base=bid('C'), aligned_to=None), pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=None), pNode.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)), pNode.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)), pNode.Node(node_id=nid(8), base=bid('G'), aligned_to=None) ] poagraph_sequences = { pSeq.SequenceID('seq0'): pSeq.Sequence(pSeq.SequenceID('seq0'), [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 6, 8])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq1'): pSeq.Sequence(pSeq.SequenceID('seq1'), [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 7, 8])])], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq2'): pSeq.Sequence(pSeq.SequenceID('seq2'), [], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('seq3'): pSeq.Sequence(pSeq.SequenceID('seq3'), [], pSeq.SequenceMetadata({'group': '1'})), pSeq.SequenceID('CONSENS0'): pSeq.Sequence(pSeq.SequenceID('CONSENS0'), [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 7, 8])])], None), pSeq.SequenceID('CONSENS1'): pSeq.Sequence(pSeq.SequenceID('CONSENS1'), [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 6, 8])])], None), } poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences) actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph) expected_po_content = pathtools.get_file_content( expected_po_content_path) self.assertEqual(expected_po_content, actual_po_content)