Exemplo n.º 1
0
 class ModelInput(Model.Config.ModelInput):
     pos_response: TokenTensorizer.Config = TokenTensorizer.Config(
         column="pos_response")
     neg_response: TokenTensorizer.Config = TokenTensorizer.Config(
         column="neg_response")
     query: TokenTensorizer.Config = TokenTensorizer.Config(
         column="query")
Exemplo n.º 2
0
def get_tensorizers(add_dict_feat=False, add_contextual_feat=False):
    schema = {"source_sequence": str, "dict_feat": Gazetteer, "target_sequence": str}
    data_source = TSVDataSource.from_config(
        TSVDataSource.Config(
            train_filename=TEST_FILE_NAME,
            field_names=["source_sequence", "dict_feat", "target_sequence"],
        ),
        schema,
    )
    src_tensorizer = TokenTensorizer.from_config(
        TokenTensorizer.Config(
            column="source_sequence", add_eos_token=True, add_bos_token=True
        )
    )
    tgt_tensorizer = TokenTensorizer.from_config(
        TokenTensorizer.Config(
            column="target_sequence", add_eos_token=True, add_bos_token=True
        )
    )
    tensorizers = {"src_seq_tokens": src_tensorizer, "trg_seq_tokens": tgt_tensorizer}
    initialize_tensorizers(tensorizers, data_source.train)

    if add_dict_feat:
        tensorizers["dict_feat"] = GazetteerTensorizer.from_config(
            GazetteerTensorizer.Config(
                text_column="source_sequence", dict_column="dict_feat"
            )
        )
        initialize_tensorizers(
            {"dict_feat": tensorizers["dict_feat"]}, data_source.train
        )
    return tensorizers
Exemplo n.º 3
0
 class ModelInput(BasePairwiseModel.Config.ModelInput):
     tokens1: TokenTensorizer.Config = TokenTensorizer.Config(column="text1")
     tokens2: TokenTensorizer.Config = TokenTensorizer.Config(column="text2")
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
     # for metric reporter
     raw_text: JoinStringTensorizer.Config = JoinStringTensorizer.Config(
         columns=["text1", "text2"]
     )
Exemplo n.º 4
0
    def test_batch_predict_caffe2_model(self):
        with tempfile.NamedTemporaryFile() as snapshot_file, tempfile.NamedTemporaryFile() as caffe2_model_file:
            train_data = tests_module.test_file("train_data_tiny.tsv")
            eval_data = tests_module.test_file("test_data_tiny.tsv")
            config = PyTextConfig(
                task=DocumentClassificationTask.Config(
                    model=DocModel.Config(
                        inputs=DocModel.Config.ModelInput(
                            tokens=TokenTensorizer.Config(),
                            dense=FloatListTensorizer.Config(
                                column="dense", dim=1, error_check=True
                            ),
                            labels=LabelTensorizer.Config(),
                        )
                    ),
                    data=Data.Config(
                        source=TSVDataSource.Config(
                            train_filename=train_data,
                            eval_filename=eval_data,
                            test_filename=eval_data,
                            field_names=["label", "slots", "text", "dense"],
                        )
                    ),
                ),
                version=21,
                save_snapshot_path=snapshot_file.name,
                export_caffe2_path=caffe2_model_file.name,
            )
            task = create_task(config.task)
            task.export(task.model, caffe2_model_file.name)
            model = task.model
            save(config, model, meta=None, tensorizers=task.data.tensorizers)

            pt_results = task.predict(task.data.data_source.test)

            def assert_caffe2_results_correct(caffe2_results):
                for pt_res, res in zip(pt_results, caffe2_results):
                    np.testing.assert_array_almost_equal(
                        pt_res["score"].tolist()[0],
                        [score[0] for score in res.values()],
                    )

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=2
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=-1
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)
Exemplo n.º 5
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config(
         allow_unknown=True)
     doc_labels: LabelTensorizer.Config = LabelTensorizer.Config(
         allow_unknown=True)
     doc_weight: Optional[FloatTensorizer.Config] = None
     word_weight: Optional[FloatTensorizer.Config] = None
Exemplo n.º 6
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config(
         allow_unknown=True)
     doc_labels: LabelTensorizer.Config = LabelTensorizer.Config(
         allow_unknown=True)
     doc_weight: FloatTensorizer.Config = FloatTensorizer.Config(
         column="doc_weight")
     word_weight: FloatTensorizer.Config = FloatTensorizer.Config(
         column="word_weight")
 def _get_tensorizers(self):
     schema = {"source_sequence": str, "target_sequence": str}
     data_source = TSVDataSource.from_config(
         TSVDataSource.Config(
             train_filename=tests_module.test_file(
                 "compositional_seq2seq_unit.tsv"),
             field_names=["source_sequence", "target_sequence"],
         ),
         schema,
     )
     src_tensorizer = TokenTensorizer.from_config(
         TokenTensorizer.Config(column="source_sequence",
                                add_eos_token=True,
                                add_bos_token=True))
     tgt_tensorizer = TokenTensorizer.from_config(
         TokenTensorizer.Config(column="target_sequence",
                                add_eos_token=True,
                                add_bos_token=True))
     tensorizers = {
         "src_seq_tokens": src_tensorizer,
         "trg_seq_tokens": tgt_tensorizer,
     }
     initialize_tensorizers(tensorizers, data_source.train)
     return tensorizers
Exemplo n.º 8
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config(
         add_bos_token=True, add_eos_token=True)
Exemplo n.º 9
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
Exemplo n.º 10
0
 class RegressionModelInput(DocModel.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     labels: NumericLabelTensorizer.Config = NumericLabelTensorizer.Config(
     )
Exemplo n.º 11
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True)
     # for metric reporter
     raw_text: RawString.Config = RawString.Config(column="text")
Exemplo n.º 12
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     dense: Optional[FloatListTensorizer.Config] = None
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
Exemplo n.º 13
0
 class ModelInput(BaseModel.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config(
         column="tokenized_text")
     actions: AnnotationNumberizer.Config = AnnotationNumberizer.Config(
     )
Exemplo n.º 14
0
 class ModelInput(Model.Config.ModelInput):
     src_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     trg_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     dict_feat: Optional[GazetteerTensorizer.Config] = None
Exemplo n.º 15
0
 class ModelInput(BasePairwiseModel.Config.ModelInput):
     tokens1: TokenTensorizer.Config = TokenTensorizer.Config(
         column="text1")
     tokens2: TokenTensorizer.Config = TokenTensorizer.Config(
         column="text2")
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
Exemplo n.º 16
0
 class ModelInput(Model.Config.ModelInput):
     tokens: Optional[TokenTensorizer.Config] = TokenTensorizer.Config(
         add_bos_token=True, add_eos_token=True)
Exemplo n.º 17
0
 class ModelInput(Model.Config.ModelInput):
     src_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     trg_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     dict_feat: Optional[GazetteerTensorizer.Config] = None
     contextual_token_embedding: Optional[
         ByteTokenTensorizer.Config] = None
Exemplo n.º 18
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config(
         add_bos_token=True, add_eos_token=True
     )
     # for metric reporter
     raw_text: RawString.Config = RawString.Config(column="text")
Exemplo n.º 19
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     labels: WordLabelTensorizer.Config = WordLabelTensorizer.Config()
     # for metric reporter
     raw_text: RawString.Config = RawString.Config(column="text")
Exemplo n.º 20
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     slots: TokenTensorizer.Config = TokenTensorizer.Config(
         column="slots")