def test_run_normalize_words(self): normalizer = LemmagenLemmatizer() corpus = normalizer(self.corpus) words = ["minor", "tree"] results = run(corpus, words, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(len(results.scores), 2) words = ["minors", "trees"] results = run(corpus, words, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(len(results.scores), 2)
def test_run_with_cached_results(self): results1 = run(self.corpus, None, {}, {ScoringMethods.TF_IDF, ScoringMethods.YAKE}, {}, AggregationMethods.MEAN, self.state) with patch("orangecontrib.text.keywords.tfidf_keywords") as mock: results2 = run(self.corpus, None, results1.all_keywords, {ScoringMethods.TF_IDF, ScoringMethods.YAKE}, {}, AggregationMethods.MEAN, self.state) mock.assert_not_called() self.assertNanEqual(results1.scores, results2.scores) self.assertNanEqual(results1.labels, results2.labels) self.assertNanEqual(results1.all_keywords, results2.all_keywords)
def test_run_no_methods(self): cached_keywords = Mock() results = run(self.corpus, None, cached_keywords, set(), {}, AggregationMethods.MEAN, Mock()) self.assertEqual(results.scores, []) self.assertEqual(results.labels, []) self.assertIs(results.all_keywords, cached_keywords)
def test_run_multiple_methods(self): results = run(self.corpus, None, {}, {ScoringMethods.TF_IDF, ScoringMethods.YAKE}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(results.scores[0][0], "system") self.assertAlmostEqual(results.scores[0][1], 0.114, 2) self.assertTrue(np.isnan(np.nan)) self.assertEqual(results.labels, ["TF-IDF", "YAKE!"])
def test_run_with_words(self): words = ["human", "graph", "minors", "trees"] results = run(self.corpus, words, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(len(results.scores), 4) words = ["foo", "bar"] results = run(self.corpus, words, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(len(results.scores), 0) words = [] results = run(self.corpus, words, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(len(results.scores), 42) words = None results = run(self.corpus, words, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(len(results.scores), 42)
def test_run_no_data(self): results = run(None, None, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, Mock()) self.assertEqual(results.scores, []) self.assertEqual(results.labels, []) self.assertEqual(results.all_keywords, {})
def test_run_default(self): results = run(self.corpus, None, {}, {ScoringMethods.TF_IDF}, {}, AggregationMethods.MEAN, self.state) self.assertEqual(results.scores[0][0], "system") self.assertAlmostEqual(results.scores[0][1], 0.114, 2) self.assertEqual(results.labels, ["TF-IDF"])