Пример #1
0
    def test_cooccurrence_vectorizer_should_load(self):
        # Given
        config = CooccurrenceVectorizerConfig()

        word_pairs = {("a", "b"): 0, ("a", 'c'): 12}

        serializable_word_pairs = {0: ["a", "b"], 12: ["a", "c"]}

        vectorizer_dict = {
            "unit_name": "cooccurrence_vectorizer",
            "language_code": "en",
            "word_pairs": serializable_word_pairs,
            "builtin_entity_scope": ["snips/datetime"],
            "config": config.to_dict(),
        }

        self.tmp_file_path.mkdir()
        self.writeJsonContent(self.tmp_file_path / "vectorizer.json",
                              vectorizer_dict)

        # When
        vectorizer = CooccurrenceVectorizer.from_path(self.tmp_file_path)

        # Then
        self.assertDictEqual(config.to_dict(), vectorizer.config.to_dict())
        self.assertEqual("en", vectorizer.language)
        self.assertDictEqual(vectorizer.word_pairs, word_pairs)
        self.assertEqual({"snips/datetime"}, vectorizer.builtin_entity_scope)
Пример #2
0
    def test_featurizer_config(self):
        # Given
        tfid_vectorizer_config = TfidfVectorizerConfig()
        cooccurrence_vectorizer_config = CooccurrenceVectorizerConfig()
        config_dict = {
            "unit_name": "featurizer",
            "pvalue_threshold": 0.2,
            "added_cooccurrence_feature_ratio": 0.2,
            "tfidf_vectorizer_config": tfid_vectorizer_config.to_dict(),
            "cooccurrence_vectorizer_config":
                cooccurrence_vectorizer_config.to_dict()
        }

        # When
        config = FeaturizerConfig.from_dict(config_dict)
        serialized_config = config.to_dict()

        # Then
        self.assertDictEqual(config_dict, serialized_config)