Exemplo n.º 1
0
    def test_intializing_embeds_from_config(self):
        feature_config = FeatureConfig(
            word_feat=WordFeatConfig(
                embedding_init_strategy=EmbedInitStrategy.RANDOM,
                embed_dim=5,
                pretrained_embeddings_path=tests_module.TEST_BASE_DIR,
            )
        )
        data_handler = JointModelDataHandler.from_config(
            JointModelDataHandler.Config(),
            feature_config,
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer.from_config(
                SimpleFeaturizer.Config(), feature_config
            ),
        )

        data_handler.init_metadata_from_path(TRAIN_FILE, EVAL_FILE, TEST_FILE)

        pretrained_embeds = data_handler.metadata.features[
            DatasetFieldName.TEXT_FIELD
        ].pretrained_embeds_weight
        # test random initialization (values should be non-0)
        np.testing.assert_array_less(
            [0, 0, 0, 0, 0], np.absolute(pretrained_embeds[11].numpy())
        )

        feature_config = FeatureConfig(
            word_feat=WordFeatConfig(
                embedding_init_strategy=EmbedInitStrategy.ZERO,
                embed_dim=5,
                pretrained_embeddings_path=tests_module.TEST_BASE_DIR,
            )
        )
        data_handler = JointModelDataHandler.from_config(
            JointModelDataHandler.Config(),
            feature_config,
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer.from_config(
                SimpleFeaturizer.Config(), feature_config
            ),
        )
        data_handler.init_metadata_from_path(TRAIN_FILE, EVAL_FILE, TEST_FILE)

        pretrained_embeds = data_handler.metadata.features[
            DatasetFieldName.TEXT_FIELD
        ].pretrained_embeds_weight
        # test zero initialization (values should all be 0)
        np.testing.assert_array_equal([0, 0, 0, 0, 0], pretrained_embeds[11].numpy())
Exemplo n.º 2
0
 class Config(Task.Config):
     model: SeqNNModel.Config = SeqNNModel.Config()
     trainer: Trainer.Config = Trainer.Config()
     labels: DocLabelConfig = DocLabelConfig()
     data_handler: SeqModelDataHandler.Config = SeqModelDataHandler.Config()
     metric_reporter: ClassificationMetricReporter.Config = (
         ClassificationMetricReporter.Config())
Exemplo n.º 3
0
 def example_config(cls):
     return cls.Config(
         labels=[DocLabelConfig(), WordLabelConfig()],
         model=BaggingDocEnsemble_Deprecated.Config(
             models=[DocModel_Deprecated.Config()]
         ),
     )
 def setUp(self):
     self.data_handler = JointModelDataHandler.from_config(
         JointModelDataHandler.Config(),
         FeatureConfig(),
         [DocLabelConfig(), WordLabelConfig()],
         featurizer=SimpleFeaturizer.from_config(SimpleFeaturizer.Config(),
                                                 FeatureConfig()),
     )
Exemplo n.º 5
0
 class Config(Task_Deprecated.Config):
     model: SeqNNModel_Deprecated.Config = SeqNNModel_Deprecated.Config()
     trainer: Trainer.Config = Trainer.Config()
     labels: DocLabelConfig = DocLabelConfig()
     data_handler: SeqModelDataHandler.Config = SeqModelDataHandler.Config()
     metric_reporter: ClassificationMetricReporter.Config = (
         ClassificationMetricReporter.Config())
     exporter: Optional[DenseFeatureExporter.Config] = None
    def setUp(self):
        file_name = tests_module.test_file(
            "contextual_intent_slot_train_tiny.tsv")
        self.dh = ContextualIntentSlotModelDataHandler.from_config(
            ContextualIntentSlotModelDataHandler.Config(),
            ModelInputConfig(),
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer(SimpleFeaturizer.Config(),
                                        ModelInputConfig()),
        )

        self.data = self.dh.read_from_file(file_name, self.dh.raw_columns)
Exemplo n.º 7
0
    def test_read_file_with_dense_features(self):
        data_handler_config = ContextualIntentSlotModelDataHandler.Config()
        data_handler_config.columns_to_read.append(ModelInput.DENSE)
        dense_file_name = tests_module.test_file(
            "contextual_intent_slot_train_tiny_dense.tsv")
        data_handler = ContextualIntentSlotModelDataHandler.from_config(
            data_handler_config,
            ModelInputConfig(),
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer(SimpleFeaturizer.Config(),
                                        ModelInputConfig()),
        )

        dense_data = list(
            data_handler.read_from_file(dense_file_name,
                                        data_handler.raw_columns))
        self.assertEqual(dense_data[0][ModelInput.DENSE], "[0,1,2,3,4]")
Exemplo n.º 8
0
    def setUp(self):
        self.train_data = [{
            DFColumn.DOC_LABEL:
            "cu:discuss_where",
            DFColumn.UTTERANCE:
            '["where do you wanna meet?", "MPK"]',
        }]

        self.eval_data = [
            {
                DFColumn.DOC_LABEL: "cu:discuss_where",
                DFColumn.UTTERANCE: '["how about SF?", "sounds good"]',
            },
            {
                DFColumn.DOC_LABEL: "cu:other",
                DFColumn.UTTERANCE: '["lol"]'
            },
        ]

        self.test_data = [
            {
                DFColumn.DOC_LABEL: "cu:discuss_where",
                DFColumn.UTTERANCE: '["MPK sounds good to me"]',
            },
            {
                DFColumn.DOC_LABEL: "cu:other",
                DFColumn.UTTERANCE: '["great", "awesome"]',
            },
        ]

        self.dh = SeqModelDataHandler.from_config(
            SeqModelDataHandler.Config(),
            FeatureConfig(),
            DocLabelConfig(),
            featurizer=SimpleFeaturizer.from_config(SimpleFeaturizer.Config(),
                                                    FeatureConfig()),
        )
Exemplo n.º 9
0
 def example_config(cls):
     return cls.Config(labels=[DocLabelConfig(), WordLabelConfig()])