def test_train_rdp_classifier_and_assign_taxonomy_no_model_output(self): obs = train_rdp_classifier_and_assign_taxonomy( self.reference_file, self.taxonomy_file, self.test_trained_input) exp = {'X67228': ( 'Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;' 'Rhizobiaceae;Rhizobium', 1.0 )} self.assertEqual(obs, exp)
def __call__(self, seq_path, result_path=None, log_path=None): """Returns dict mapping {seq_id:(taxonomy, confidence)} for each seq. Parameters: seq_path: path to file of sequences result_path: path to file of results. If specified, dumps the result to the desired path instead of returning it. log_path: path to log, which should include dump of params. """ tmp_dir = get_qiime_temp_dir() min_conf = self.Params['Confidence'] training_data_properties_fp = self.Params[ 'training_data_properties_fp'] reference_sequences_fp = self.Params['reference_sequences_fp'] id_to_taxonomy_fp = self.Params['id_to_taxonomy_fp'] max_memory = self.Params['max_memory'] seq_file = open(seq_path, 'U') if reference_sequences_fp and id_to_taxonomy_fp: # Train and assign taxonomy taxonomy_file, training_seqs_file = self._generate_training_files() results = rdp_classifier.train_rdp_classifier_and_assign_taxonomy( training_seqs_file, taxonomy_file, seq_file, min_confidence=min_conf, classification_output_fp=result_path, max_memory=max_memory, tmp_dir=tmp_dir) if result_path is None: results = self._training_set.fix_results(results) else: self._training_set.fix_output_file(result_path) else: # Just assign taxonomy, using properties file if passed if training_data_properties_fp: fix_ranks = False else: fix_ranks = True results = rdp_classifier.assign_taxonomy( seq_file, min_confidence=min_conf, output_fp=result_path, training_data_fp=training_data_properties_fp, max_memory=max_memory, fixrank=fix_ranks, tmp_dir=tmp_dir) if log_path: self.writeLog(log_path) return results