def test_assign_taxonomy_file_output(self): """ assign_taxonomy wrapper writes correct file output when requested This function tests for sucessful completion of assign_taxonomy when writing to file, that the lines in the file roughly look correct by verifying how many are written (by zipping with expected), and that each line starts with the correct seq id. Actual testing of taxonomy data is performed elsewhere. """ _, output_fp = tempfile.mkstemp(prefix='RDPAssignTaxonomyTests', suffix='.txt') # convert the expected dict to a list of lines to match # file output expected_file_headers = self.expected_assignments1.keys() expected_file_headers.sort() actual_return_value = assign_taxonomy(\ self.test_input1,min_confidence=0.95,output_fp=output_fp) actual_file_output = list(open(output_fp)) actual_file_output.sort() # remove the output_fp before running the tests, so if they # fail the output file is still cleaned-up remove(output_fp) # None return value on write to file self.assertEqual(actual_return_value,None) # check that each line starts with the correct seq_id -- not # checking the taxonomies or confidences here as these are variable and # tested elsewhere for a,e in zip(actual_file_output,expected_file_headers): self.assertTrue(a.startswith(e))
def test_assign_taxonomy(self): """assign_taxonomy wrapper functions as expected This test may fail periodicially, but failure should be rare. """ unverified_seq_ids = set(self.expected_assignments1.keys()) for i in range(self.num_trials): obs_assignments = assign_taxonomy(self.test_input1) for seq_id in list(unverified_seq_ids): obs_lineage, obs_confidence = obs_assignments[seq_id] exp_lineage = self.expected_assignments1[seq_id] if (obs_lineage == exp_lineage): unverified_seq_ids.remove(seq_id) if not unverified_seq_ids: break messages = [] for seq_id in unverified_seq_ids: messages.append("Unable to verify %s trials" % self.num_trials) messages.append(" Sequence ID: %s" % seq_id) messages.append(" Expected: %s" % self.expected_assignments1[seq_id]) messages.append(" Observed: %s" % obs_assignments[seq_id][0]) messages.append(" Confidence: %s" % obs_assignments[seq_id][1]) # make sure all taxonomic results were correct at least once self.assertFalse(unverified_seq_ids, msg='\n'.join(messages))
def test_assign_taxonomy_short_sequence(self): """assign_taxonomy should return Unclassifiable if sequence is too short """ assignments = assign_taxonomy([ '>MySeq 1', 'TTCCGGTTGATCCTGCCGGACCCGACTGCTATCCGGA', ]) self.assertEqual(assignments, {'MySeq 1': ('Unassignable', 1.0)})
def __call__(self, seq_path, result_path=None, log_path=None): """Returns dict mapping {seq_id:(taxonomy, confidence)} for each seq. Parameters: seq_path: path to file of sequences result_path: path to file of results. If specified, dumps the result to the desired path instead of returning it. log_path: path to log, which should include dump of params. """ tmp_dir = get_qiime_temp_dir() min_conf = self.Params['Confidence'] training_data_properties_fp = self.Params[ 'training_data_properties_fp'] reference_sequences_fp = self.Params['reference_sequences_fp'] id_to_taxonomy_fp = self.Params['id_to_taxonomy_fp'] max_memory = self.Params['max_memory'] seq_file = open(seq_path, 'U') if reference_sequences_fp and id_to_taxonomy_fp: # Train and assign taxonomy taxonomy_file, training_seqs_file = self._generate_training_files() results = rdp_classifier.train_rdp_classifier_and_assign_taxonomy( training_seqs_file, taxonomy_file, seq_file, min_confidence=min_conf, classification_output_fp=result_path, max_memory=max_memory, tmp_dir=tmp_dir) if result_path is None: results = self._training_set.fix_results(results) else: self._training_set.fix_output_file(result_path) else: # Just assign taxonomy, using properties file if passed if training_data_properties_fp: fix_ranks = False else: fix_ranks = True results = rdp_classifier.assign_taxonomy( seq_file, min_confidence=min_conf, output_fp=result_path, training_data_fp=training_data_properties_fp, max_memory=max_memory, fixrank=fix_ranks, tmp_dir=tmp_dir) if log_path: self.writeLog(log_path) return results
def test_assign_taxonomy_alt_confidence(self): """assign_taxonomy wrapper functions as expected with alt confidence """ obs_assignments = assign_taxonomy( self.test_input1, min_confidence=0.95) for seq_id, assignment in obs_assignments.items(): obs_lineage, obs_confidence = assignment exp_lineage = self.expected_assignments1[seq_id] message = "Sequence ID: %s, assignment: %s" % (seq_id, assignment) self.assertTrue( exp_lineage.startswith(obs_lineage) or \ (obs_lineage == "Unclassified"), msg=message, ) self.assertTrue(obs_confidence >= 0.95, msg=message)