Пример #1
0
 def test_tfidf_with_all_datasets(self):
     should_succeed = ["movies"]
     should_fail = ["irisArr", "irisDf", "digits", "housing", "creditG", "drugRev"]
     for name in should_succeed:
         dataset = getattr(self, f"_{name}")
         TfidfVectorizer.validate_schema(**dataset)
     for name in should_fail:
         dataset = getattr(self, f"_{name}")
         with self.assertRaises(ValueError):
             TfidfVectorizer.validate_schema(**dataset)
Пример #2
0
 def test_more_hyperparam_values(self):
     with self.assertRaises(jsonschema.ValidationError):
         tf_idf = TfidfVectorizer(max_df=2.5, min_df=2,
                                 max_features=1000,
                                 stop_words='english')
     with self.assertRaises(jsonschema.ValidationError):
         tf_idf = TfidfVectorizer(max_df=2, min_df=2,
                                 max_features=1000,
                                 stop_words=['I', 'we', 'not', 'this', 'that'],
                                 analyzer = 'char')
Пример #3
0
 def test_tfidf_with_all_datasets(self):
     should_succeed = ['movies']
     should_fail = ['irisArr', 'irisDf', 'digits', 'housing', 'creditG', 'drugRev']
     for name in should_succeed:
         dataset = getattr(self, f'_{name}')
         TfidfVectorizer.validate_schema(**dataset)
     for name in should_fail:
         dataset = getattr(self, f'_{name}')
         with self.assertRaises(ValueError):
             TfidfVectorizer.validate_schema(**dataset)
Пример #4
0
 def test_more_hyperparam_values(self):
     with EnableSchemaValidation():
         with self.assertRaises(jsonschema.ValidationError):
             _ = TfidfVectorizer(
                 max_df=2.5, min_df=2, max_features=1000, stop_words="english"
             )
         with self.assertRaises(jsonschema.ValidationError):
             _ = TfidfVectorizer(
                 max_df=2,
                 min_df=2,
                 max_features=1000,
                 stop_words=["I", "we", "not", "this", "that"],
                 analyzer="char",
             )
Пример #5
0
 def test_non_null_tokenizer(self):
     # tokenize the doc and lemmatize its tokens
     def my_tokenizer():
         return 'abc'
     with self.assertRaises(jsonschema.ValidationError):
         tf_idf = TfidfVectorizer(max_df=2, min_df=2,
                                 max_features=1000,
                                 stop_words='english',
                                 tokenizer = my_tokenizer,
                                 analyzer = 'char')
Пример #6
0
    def test_non_null_tokenizer(self):
        # tokenize the doc and lemmatize its tokens
        def my_tokenizer():
            return "abc"

        with EnableSchemaValidation():
            with self.assertRaises(jsonschema.ValidationError):
                _ = TfidfVectorizer(
                    max_df=2,
                    min_df=2,
                    max_features=1000,
                    stop_words="english",
                    tokenizer=my_tokenizer,
                    analyzer="char",
                )