def test_generate_training_files(self): app = RdpTaxonAssigner({ 'id_to_taxonomy_fp': self.id_to_taxonomy_file.name, 'reference_sequences_fp': self.reference_seqs_file.name, }) actual_taxonomy_file, actual_training_seqs_file = \ app._generate_training_files() # see note in test_build_tree() self.assertEqual(actual_taxonomy_file.read(), rdp_expected_taxonomy)
def test_parse_lineage(self): """Lineage in csv format is correctly parsed to a list """ str = 'Archaea;Euryarchaeota;Methanomicrobiales;Methanomicrobium et rel.;a;b' actual = RdpTaxonAssigner._parse_lineage(str) expected = ['Archaea', 'Euryarchaeota', 'Methanomicrobiales', 'Methanomicrobium et rel.', 'a', 'b'] self.assertEqual(actual, expected)
def test_build_tree(self): """RdpTaxonAssigner._build_tree() should return a tree with correct Rdp-format taxonomy """ tree = RdpTaxonAssigner._build_tree(self.id_to_taxonomy_file) actual = tree.rdp_taxonomy() # The order of the lines in this file depends on python's # dict() implementation, so we should ideally build two sets # of lines and check that their contents match. expected = rdp_expected_taxonomy self.assertEqual(actual, expected)
def test_parse_lineage(self): """Lineage in csv format is correctly parsed to a list """ str = 'Archaea;Euryarchaeota;Methanomicrobiales;Methanomicrobium et rel.;a;b' actual = RdpTaxonAssigner._parse_lineage(str) expected = [ 'Archaea', 'Euryarchaeota', 'Methanomicrobiales', 'Methanomicrobium et rel.', 'a', 'b' ] self.assertEqual(actual, expected)
def test_train_on_the_fly(self): """Training on-the-fly classifies reference sequence correctly with 100% certainty """ input_seqs_file = NamedTemporaryFile(prefix='RdpTaxonAssignerTest_', suffix='.fasta') input_seqs_file.write(test_seq_coll.toFasta()) input_seqs_file.seek(0) expected = rdp_trained_test1_expected_dict app = RdpTaxonAssigner({ 'id_to_taxonomy_fp': self.id_to_taxonomy_file.name, 'reference_sequences_fp': self.reference_seqs_file.name, }) actual = app(self.tmp_seq_filepath) key = 'X67228 some description' self.assertEqual(actual[key], expected[key])
def setUp(self): # Temporary input file self.tmp_seq_filepath = get_tmp_filename( prefix='RdpTaxonAssignerTest_', suffix='.fasta') seq_file = open(self.tmp_seq_filepath, 'w') seq_file.write(rdp_test1_fasta) seq_file.close() # Temporary results filename self.tmp_res_filepath = get_tmp_filename( prefix='RdpTaxonAssignerTestResult_', suffix='.tsv', ) # touch the file so we don't get an error trying to close it open(self.tmp_res_filepath, 'w').close() # Temporary log filename self.tmp_log_filepath = get_tmp_filename( prefix='RdpTaxonAssignerTestLog_', suffix='.txt', ) # touch the file so we don't get an error trying to close it open(self.tmp_log_filepath, 'w').close() self._paths_to_clean_up = \ [self.tmp_seq_filepath, self.tmp_res_filepath, self.tmp_log_filepath] self.id_to_taxonomy_file = NamedTemporaryFile( prefix='RdpTaxonAssignerTest_', suffix='.txt') self.id_to_taxonomy_file.write(rdp_id_to_taxonomy) self.id_to_taxonomy_file.seek(0) self.reference_seqs_file = NamedTemporaryFile( prefix='RdpTaxonAssignerTest_', suffix='.fasta') self.reference_seqs_file.write(rdp_reference_seqs) self.reference_seqs_file.seek(0) self.default_app = RdpTaxonAssigner({})
def test_generate_training_seqs(self): seqs = RdpTaxonAssigner._generate_training_seqs( self.reference_seqs_file, self.id_to_taxonomy_file) actual = LoadSeqs(data=seqs, aligned=False).toFasta() self.assertEqual(actual, rdp_expected_training_seqs)
def test_init(self): """RdpTaxonAssigner.__init__ should set default attributes and params """ a = RdpTaxonAssigner({}) self.assertEqual(a.Name, 'RdpTaxonAssigner')