示例#1
0
    def test_generate_training_files(self):
        app = RdpTaxonAssigner({
                'id_to_taxonomy_fp': self.id_to_taxonomy_file.name,
                'reference_sequences_fp': self.reference_seqs_file.name,
                })
        actual_taxonomy_file, actual_training_seqs_file = \
            app._generate_training_files()

        # see note in test_build_tree()
        self.assertEqual(actual_taxonomy_file.read(), rdp_expected_taxonomy)
示例#2
0
 def test_parse_lineage(self):
     """Lineage in csv format is correctly parsed to a list
     """
     str = 'Archaea;Euryarchaeota;Methanomicrobiales;Methanomicrobium et rel.;a;b'
     actual = RdpTaxonAssigner._parse_lineage(str)
     expected = ['Archaea', 'Euryarchaeota', 'Methanomicrobiales', 
                 'Methanomicrobium et rel.', 'a', 'b']
     self.assertEqual(actual, expected)
示例#3
0
 def test_build_tree(self):
     """RdpTaxonAssigner._build_tree() should return a tree with correct Rdp-format taxonomy
     """
     tree = RdpTaxonAssigner._build_tree(self.id_to_taxonomy_file)
     actual = tree.rdp_taxonomy()
     # The order of the lines in this file depends on python's
     # dict() implementation, so we should ideally build two sets
     # of lines and check that their contents match.
     expected = rdp_expected_taxonomy
     self.assertEqual(actual, expected)
示例#4
0
 def test_parse_lineage(self):
     """Lineage in csv format is correctly parsed to a list
     """
     str = 'Archaea;Euryarchaeota;Methanomicrobiales;Methanomicrobium et rel.;a;b'
     actual = RdpTaxonAssigner._parse_lineage(str)
     expected = [
         'Archaea', 'Euryarchaeota', 'Methanomicrobiales',
         'Methanomicrobium et rel.', 'a', 'b'
     ]
     self.assertEqual(actual, expected)
示例#5
0
 def test_build_tree(self):
     """RdpTaxonAssigner._build_tree() should return a tree with correct Rdp-format taxonomy
     """
     tree = RdpTaxonAssigner._build_tree(self.id_to_taxonomy_file)
     actual = tree.rdp_taxonomy()
     # The order of the lines in this file depends on python's
     # dict() implementation, so we should ideally build two sets
     # of lines and check that their contents match.
     expected = rdp_expected_taxonomy
     self.assertEqual(actual, expected)
示例#6
0
    def test_train_on_the_fly(self):
        """Training on-the-fly classifies reference sequence correctly with 100% certainty
        """
        input_seqs_file = NamedTemporaryFile(prefix='RdpTaxonAssignerTest_',
                                             suffix='.fasta')
        input_seqs_file.write(test_seq_coll.toFasta())
        input_seqs_file.seek(0)

        expected = rdp_trained_test1_expected_dict

        app = RdpTaxonAssigner({
            'id_to_taxonomy_fp':
            self.id_to_taxonomy_file.name,
            'reference_sequences_fp':
            self.reference_seqs_file.name,
        })
        actual = app(self.tmp_seq_filepath)

        key = 'X67228 some description'
        self.assertEqual(actual[key], expected[key])
示例#7
0
    def setUp(self):
        # Temporary input file
        self.tmp_seq_filepath = get_tmp_filename(
            prefix='RdpTaxonAssignerTest_', suffix='.fasta')
        seq_file = open(self.tmp_seq_filepath, 'w')
        seq_file.write(rdp_test1_fasta)
        seq_file.close()

        # Temporary results filename
        self.tmp_res_filepath = get_tmp_filename(
            prefix='RdpTaxonAssignerTestResult_',
            suffix='.tsv',
        )
        # touch the file so we don't get an error trying to close it
        open(self.tmp_res_filepath, 'w').close()

        # Temporary log filename
        self.tmp_log_filepath = get_tmp_filename(
            prefix='RdpTaxonAssignerTestLog_',
            suffix='.txt',
        )
        # touch the file so we don't get an error trying to close it
        open(self.tmp_log_filepath, 'w').close()

        self._paths_to_clean_up = \
         [self.tmp_seq_filepath, self.tmp_res_filepath, self.tmp_log_filepath]

        self.id_to_taxonomy_file = NamedTemporaryFile(
            prefix='RdpTaxonAssignerTest_', suffix='.txt')
        self.id_to_taxonomy_file.write(rdp_id_to_taxonomy)
        self.id_to_taxonomy_file.seek(0)

        self.reference_seqs_file = NamedTemporaryFile(
            prefix='RdpTaxonAssignerTest_', suffix='.fasta')
        self.reference_seqs_file.write(rdp_reference_seqs)
        self.reference_seqs_file.seek(0)

        self.default_app = RdpTaxonAssigner({})
示例#8
0
 def test_generate_training_seqs(self):
     seqs = RdpTaxonAssigner._generate_training_seqs(
         self.reference_seqs_file, self.id_to_taxonomy_file)
     actual = LoadSeqs(data=seqs, aligned=False).toFasta()
     self.assertEqual(actual, rdp_expected_training_seqs)
示例#9
0
 def test_init(self):
     """RdpTaxonAssigner.__init__ should set default attributes and params
     """
     a = RdpTaxonAssigner({})
     self.assertEqual(a.Name, 'RdpTaxonAssigner')
示例#10
0
 def test_generate_training_seqs(self):
     seqs = RdpTaxonAssigner._generate_training_seqs(
         self.reference_seqs_file, self.id_to_taxonomy_file)
     actual = LoadSeqs(data=seqs, aligned=False).toFasta()
     self.assertEqual(actual, rdp_expected_training_seqs)