示例#1
0
 def test_train_rdp_classifier_and_assign_taxonomy_no_model_output(self):
     obs = train_rdp_classifier_and_assign_taxonomy(
         self.reference_file, self.taxonomy_file, self.test_trained_input)
     exp = {'X67228': (
         'Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;'
         'Rhizobiaceae;Rhizobium', 1.0
         )}
     self.assertEqual(obs, exp)
示例#2
0
    def __call__(self, seq_path, result_path=None, log_path=None):
        """Returns dict mapping {seq_id:(taxonomy, confidence)} for
        each seq.

        Parameters:
        seq_path: path to file of sequences
        result_path: path to file of results. If specified, dumps the
            result to the desired path instead of returning it.
        log_path: path to log, which should include dump of params.
        """
        tmp_dir = get_qiime_temp_dir()
        min_conf = self.Params['Confidence']
        training_data_properties_fp = self.Params[
            'training_data_properties_fp']
        reference_sequences_fp = self.Params['reference_sequences_fp']
        id_to_taxonomy_fp = self.Params['id_to_taxonomy_fp']
        max_memory = self.Params['max_memory']

        seq_file = open(seq_path, 'U')
        if reference_sequences_fp and id_to_taxonomy_fp:
            # Train and assign taxonomy
            taxonomy_file, training_seqs_file = self._generate_training_files()
            results = rdp_classifier.train_rdp_classifier_and_assign_taxonomy(
                training_seqs_file,
                taxonomy_file,
                seq_file,
                min_confidence=min_conf,
                classification_output_fp=result_path,
                max_memory=max_memory,
                tmp_dir=tmp_dir)

            if result_path is None:
                results = self._training_set.fix_results(results)
            else:
                self._training_set.fix_output_file(result_path)
        else:
            # Just assign taxonomy, using properties file if passed
            if training_data_properties_fp:
                fix_ranks = False
            else:
                fix_ranks = True
            results = rdp_classifier.assign_taxonomy(
                seq_file,
                min_confidence=min_conf,
                output_fp=result_path,
                training_data_fp=training_data_properties_fp,
                max_memory=max_memory,
                fixrank=fix_ranks,
                tmp_dir=tmp_dir)

        if log_path:
            self.writeLog(log_path)

        return results
示例#3
0
    def __call__(self, seq_path, result_path=None, log_path=None):
        """Returns dict mapping {seq_id:(taxonomy, confidence)} for
        each seq.

        Parameters:
        seq_path: path to file of sequences
        result_path: path to file of results. If specified, dumps the
            result to the desired path instead of returning it.
        log_path: path to log, which should include dump of params.
        """
        tmp_dir = get_qiime_temp_dir()
        min_conf = self.Params['Confidence']
        training_data_properties_fp = self.Params[
            'training_data_properties_fp']
        reference_sequences_fp = self.Params['reference_sequences_fp']
        id_to_taxonomy_fp = self.Params['id_to_taxonomy_fp']
        max_memory = self.Params['max_memory']

        seq_file = open(seq_path, 'U')
        if reference_sequences_fp and id_to_taxonomy_fp:
            # Train and assign taxonomy
            taxonomy_file, training_seqs_file = self._generate_training_files()
            results = rdp_classifier.train_rdp_classifier_and_assign_taxonomy(
                training_seqs_file, taxonomy_file, seq_file,
                min_confidence=min_conf,
                classification_output_fp=result_path,
                max_memory=max_memory, tmp_dir=tmp_dir)

            if result_path is None:
                results = self._training_set.fix_results(results)
            else:
                self._training_set.fix_output_file(result_path)
        else:
            # Just assign taxonomy, using properties file if passed
            if training_data_properties_fp:
                fix_ranks = False
            else:
                fix_ranks = True
            results = rdp_classifier.assign_taxonomy(
                seq_file, min_confidence=min_conf, output_fp=result_path,
                training_data_fp=training_data_properties_fp,
                max_memory=max_memory, fixrank=fix_ranks, tmp_dir=tmp_dir)

        if log_path:
            self.writeLog(log_path)

        return results