Exemplo n.º 1
0
    def test_intializing_embeds_from_config(self):
        feature_config = FeatureConfig(
            word_feat=WordFeatConfig(
                embedding_init_strategy=EmbedInitStrategy.RANDOM,
                embed_dim=5,
                pretrained_embeddings_path=tests_module.TEST_BASE_DIR,
            )
        )
        data_handler = JointModelDataHandler.from_config(
            JointModelDataHandler.Config(),
            feature_config,
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer.from_config(
                SimpleFeaturizer.Config(), feature_config
            ),
        )

        data_handler.init_metadata_from_path(TRAIN_FILE, EVAL_FILE, TEST_FILE)

        pretrained_embeds = data_handler.metadata.features[
            DatasetFieldName.TEXT_FIELD
        ].pretrained_embeds_weight
        # test random initialization (values should be non-0)
        np.testing.assert_array_less(
            [0, 0, 0, 0, 0], np.absolute(pretrained_embeds[11].numpy())
        )

        feature_config = FeatureConfig(
            word_feat=WordFeatConfig(
                embedding_init_strategy=EmbedInitStrategy.ZERO,
                embed_dim=5,
                pretrained_embeddings_path=tests_module.TEST_BASE_DIR,
            )
        )
        data_handler = JointModelDataHandler.from_config(
            JointModelDataHandler.Config(),
            feature_config,
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer.from_config(
                SimpleFeaturizer.Config(), feature_config
            ),
        )
        data_handler.init_metadata_from_path(TRAIN_FILE, EVAL_FILE, TEST_FILE)

        pretrained_embeds = data_handler.metadata.features[
            DatasetFieldName.TEXT_FIELD
        ].pretrained_embeds_weight
        # test zero initialization (values should all be 0)
        np.testing.assert_array_equal([0, 0, 0, 0, 0], pretrained_embeds[11].numpy())
Exemplo n.º 2
0
 def example_config(cls):
     return cls.Config(
         labels=[DocLabelConfig(), WordLabelConfig()],
         model=BaggingDocEnsemble_Deprecated.Config(
             models=[DocModel_Deprecated.Config()]
         ),
     )
Exemplo n.º 3
0
 class Config(Task.Config):
     model: WordTaggingModel.Config = WordTaggingModel.Config()
     trainer: Trainer.Config = Trainer.Config()
     labels: WordLabelConfig = WordLabelConfig()
     data_handler: JointModelDataHandler.Config = JointModelDataHandler.Config(
     )
     metric_reporter: WordTaggingMetricReporter.Config = (
         WordTaggingMetricReporter.Config())
 def setUp(self):
     self.data_handler = JointModelDataHandler.from_config(
         JointModelDataHandler.Config(),
         FeatureConfig(),
         [DocLabelConfig(), WordLabelConfig()],
         featurizer=SimpleFeaturizer.from_config(SimpleFeaturizer.Config(),
                                                 FeatureConfig()),
     )
Exemplo n.º 5
0
 def _init_data_handler(self):
     data_handler = LanguageModelDataHandler.from_config(
         LanguageModelDataHandler.Config(),
         FeatureConfig(),
         WordLabelConfig(),
         featurizer=create_featurizer(SimpleFeaturizer.Config(), FeatureConfig()),
         shuffle=False,
     )
     data_handler.init_metadata_from_path(FILE_NAME, FILE_NAME, FILE_NAME)
     return data_handler
    def setUp(self):
        file_name = tests_module.test_file(
            "contextual_intent_slot_train_tiny.tsv")
        self.dh = ContextualIntentSlotModelDataHandler.from_config(
            ContextualIntentSlotModelDataHandler.Config(),
            ModelInputConfig(),
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer(SimpleFeaturizer.Config(),
                                        ModelInputConfig()),
        )

        self.data = self.dh.read_from_file(file_name, self.dh.raw_columns)
Exemplo n.º 7
0
    def test_read_file_with_dense_features(self):
        data_handler_config = ContextualIntentSlotModelDataHandler.Config()
        data_handler_config.columns_to_read.append(ModelInput.DENSE)
        dense_file_name = tests_module.test_file(
            "contextual_intent_slot_train_tiny_dense.tsv")
        data_handler = ContextualIntentSlotModelDataHandler.from_config(
            data_handler_config,
            ModelInputConfig(),
            [DocLabelConfig(), WordLabelConfig()],
            featurizer=SimpleFeaturizer(SimpleFeaturizer.Config(),
                                        ModelInputConfig()),
        )

        dense_data = list(
            data_handler.read_from_file(dense_file_name,
                                        data_handler.raw_columns))
        self.assertEqual(dense_data[0][ModelInput.DENSE], "[0,1,2,3,4]")
Exemplo n.º 8
0
    def test_data_handler(self):
        data_handler = BPTTLanguageModelDataHandler.from_config(
            BPTTLanguageModelDataHandler.Config(bptt_len=4),
            FeatureConfig(),
            WordLabelConfig(),
            featurizer=SimpleFeaturizer.from_config(
                SimpleFeaturizer.Config(), FeatureConfig()
            ),
        )
        data_handler.init_metadata_from_path(FILE_NAME, FILE_NAME, FILE_NAME)

        train_iter = data_handler.get_train_iter_from_path(FILE_NAME, BATCH_SIZE)

        batches = [t for t in train_iter]
        # There are two batches in the tiny dataset
        self.assertEqual(len(batches), 2)

        # batches of tuple(input, target, context)
        # input -> tuple(input_sequences, sequence_length)
        # input_sequence -> tensor of dim (bsize, max_seq_length)
        np.testing.assert_array_equal(
            batches[0][0][0],
            [[15, 19, 12, 16], [3, 13, 21, 8], [20, 7, 23, 4], [6, 5, 7, 22]],
        )
        # sequence_length -> tensor of dim (bsize)
        np.testing.assert_array_equal(batches[0][0][1], [4, 4, 4, 4])

        # target -> tensor of same dim as input_sequences (bsize, max_seq_length)
        np.testing.assert_array_equal(
            batches[0][1][0],
            [[19, 12, 16, 14], [13, 21, 8, 3], [7, 23, 4, 3], [5, 7, 22, 10]],
        )

        np.testing.assert_array_equal(
            batches[1][0][0], [[14, 17, 11], [3, 5, 18], [3, 8, 4], [10, 4, 9]]
        )
        np.testing.assert_array_equal(batches[1][0][1], [3, 3, 3, 3])
        np.testing.assert_array_equal(
            batches[1][1][0], [[17, 11, 4], [5, 18, 6], [8, 4, 3], [4, 9, 1]]
        )
Exemplo n.º 9
0
 def example_config(cls):
     return cls.Config(labels=[DocLabelConfig(), WordLabelConfig()])