Пример #1
0
 def test_computes_correct_tfidf_score(self):
     """
     tests that the correct tfidf value is emitted
     """
     results = self.run_mapper(args=self.default_args)
     results = mru.tokenize_key_value_pair(results[0])
     computed_tfidf = float(results[1][0])
     expected_tf = (7.0 / 12.0)
     expected_idf = log((float(self.default_args['corpus_size']) / 2.0), 10)
     expected_tfidf = expected_tf * expected_idf
     expected_tfidf = round(expected_tfidf, self.default_args['precision'])
     self.assertEqual(expected_tfidf, computed_tfidf)
Пример #2
0
 def test_has_correcct_precision(self):
     """
     tests to ensure that the precision argument is ensured.
     """
     # we split on '.' and then check the length of the string after
     # the period
     precision_to_test = 8
     results = self.run_mapper(args={'precision': precision_to_test,
                                     'corpus_size': 9})
     result = mru.tokenize_key_value_pair(results[0])
     computed_tfidf = result[1][0]
     computed_precision = len(computed_tfidf.strip().split('.')[1])
     self.assertEqual(precision_to_test, computed_precision)