class TestFastaParser(unittest.TestCase): def setUp(self): self.fasta_parser = FastaParser() self.example_data = ExampleData() def test_parse_1(self): fasta_fh = StringIO(self.example_data.fasta_seqs_1) self.assertEqual( list(self.fasta_parser.entries(fasta_fh)), [('test_1 a random sequence', 'TTTAGAAATTACACA'), ('test_2 another random sequence', 'ACGAGAAATTAAATTAAATT'), ('test_3 another random sequence', 'TAGAGACATTGGATTTTATT')]) def test_parse_empty_file(self): fasta_fh = StringIO("") self.assertEqual( list(self.fasta_parser.entries(fasta_fh)), []) def test_single_entry_file_header(self): fasta_fh = StringIO(self.example_data.fasta_seqs_2) self.assertEqual(self.fasta_parser.single_entry_file_header(fasta_fh), "test_4 a random sequence") def test_header_id_1(self): self.assertEqual( self.fasta_parser.header_id("seq_10101 An important protein"), "seq_10101") def test_header_id_2(self): self.assertEqual( self.fasta_parser.header_id("seq_10101\tAn important protein"), "seq_10101")
def _ref_ids_to_file(self, ref_seq_paths): """Translate the reference ID to file paths.""" ref_ids_to_file = {} fasta_parser = FastaParser() for ref_seq_path in ref_seq_paths: ref_seq_file = os.path.basename(ref_seq_path) with open(ref_seq_path) as ref_seq_fh: ref_seq_id = fasta_parser.header_id( fasta_parser.single_entry_file_header(ref_seq_fh)) ref_ids_to_file[ref_seq_id] = ref_seq_file return ref_ids_to_file
def _ref_ids_to_file(self, ref_seq_paths): """Translate the reference ID to file paths.""" ref_ids_to_file = {} fasta_parser = FastaParser() for ref_seq_path in ref_seq_paths: ref_seq_file = os.path.basename(ref_seq_path) with open(ref_seq_path) as ref_seq_fh: ref_seq_id = fasta_parser.header_id( fasta_parser.single_entry_file_header(ref_seq_fh)) ref_ids_to_file[ref_seq_id] = ref_seq_file return ref_ids_to_file
def test_fasta_parser(): # Define some dummy data & parser object fasta_parser = FastaParser() fasta_seqs_1 = """>test_1 a random sequence TTTAG AAATT ACACA >test_2 another random sequence ACGAG AAATT AAATT AAATT >test_3 another random sequence TAGAG ACATT GGATT TTATT """ fasta_seqs_2 = """>test_4 a random sequence TTTAG AAATT ACACA """ # test fasta entry fasta_fh = StringIO(fasta_seqs_1) assert list(fasta_parser.entries(fasta_fh)) == [ ('test_1 a random sequence', 'TTTAGAAATTACACA'), ('test_2 another random sequence', 'ACGAGAAATTAAATTAAATT'), ('test_3 another random sequence', 'TAGAGACATTGGATTTTATT') ] # test empty fasta file fasta_empty_fh = StringIO("") assert list(fasta_parser.entries(fasta_empty_fh)) == [] # test single entry file header fasta_header_fh = StringIO(fasta_seqs_2) assert fasta_parser.single_entry_file_header( fasta_header_fh) == "test_4 a random sequence" # test header id 1 assert fasta_parser.header_id( "seq_10101 An important protein") == "seq_10101" # test header id 2 assert fasta_parser.header_id( "seq_10101\tAn important protein") == "seq_10101"