Пример #1
0
 class EncoderPairwiseModelInput(ModelInputBase):
     tokens1: Tensorizer.Config = Tensorizer.Config()
     tokens2: Tensorizer.Config = Tensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
     # for metric reporter
     num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(
         names=["tokens1", "tokens2"], indexes=[2, 2])
Пример #2
0
 class EncoderModelInput(BaseModel.Config.ModelInput):
     tokens: Tensorizer.Config = Tensorizer.Config()
     dense: Optional[FloatListTensorizer.Config] = None
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
     # for metric reporter
     num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(
         names=["tokens"], indexes=[2])
        class InputConfig(ConfigBase):
            right_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            left_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            right_dense: Optional[FloatListTensorizer.Config] = None
            left_dense: Optional[FloatListTensorizer.Config] = None

            labels: LabelTensorizer.Config = LabelTensorizer.Config()
Пример #4
0
    def test_batch_predict_caffe2_model(self):
        with tempfile.NamedTemporaryFile() as snapshot_file, tempfile.NamedTemporaryFile() as caffe2_model_file:
            train_data = tests_module.test_file("train_data_tiny.tsv")
            eval_data = tests_module.test_file("test_data_tiny.tsv")
            config = PyTextConfig(
                task=DocumentClassificationTask.Config(
                    model=DocModel.Config(
                        inputs=DocModel.Config.ModelInput(
                            tokens=TokenTensorizer.Config(),
                            dense=FloatListTensorizer.Config(
                                column="dense", dim=1, error_check=True
                            ),
                            labels=LabelTensorizer.Config(),
                        )
                    ),
                    data=Data.Config(
                        source=TSVDataSource.Config(
                            train_filename=train_data,
                            eval_filename=eval_data,
                            test_filename=eval_data,
                            field_names=["label", "slots", "text", "dense"],
                        )
                    ),
                ),
                version=21,
                save_snapshot_path=snapshot_file.name,
                export_caffe2_path=caffe2_model_file.name,
            )
            task = create_task(config.task)
            task.export(task.model, caffe2_model_file.name)
            model = task.model
            save(config, model, meta=None, tensorizers=task.data.tensorizers)

            pt_results = task.predict(task.data.data_source.test)

            def assert_caffe2_results_correct(caffe2_results):
                for pt_res, res in zip(pt_results, caffe2_results):
                    np.testing.assert_array_almost_equal(
                        pt_res["score"].tolist()[0],
                        [score[0] for score in res.values()],
                    )

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=2
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=-1
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)
Пример #5
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config(
         allow_unknown=True)
     doc_labels: LabelTensorizer.Config = LabelTensorizer.Config(
         allow_unknown=True)
     doc_weight: Optional[FloatTensorizer.Config] = None
     word_weight: Optional[FloatTensorizer.Config] = None
Пример #6
0
 class BertModelInput(BaseModel.Config.ModelInput):
     tokens: BERTTensorizer.Config = BERTTensorizer.Config(max_seq_len=128)
     dense: Optional[FloatListTensorizer.Config] = None
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
     # for metric reporter
     num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(
         names=["tokens"], indexes=[2]
     )
Пример #7
0
 class ModelInput(BasePairwiseModel.Config.ModelInput):
     tokens1: TokenTensorizer.Config = TokenTensorizer.Config(column="text1")
     tokens2: TokenTensorizer.Config = TokenTensorizer.Config(column="text2")
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
     # for metric reporter
     raw_text: JoinStringTensorizer.Config = JoinStringTensorizer.Config(
         columns=["text1", "text2"]
     )
Пример #8
0
 class ModelInput(BaseModel.Config.ModelInput):
     squad_input: Union[
         SquadForBERTTensorizer.Config, SquadForRoBERTaTensorizer.Config
     ] = SquadForBERTTensorizer.Config(max_seq_len=256)
     # is_impossible label
     has_answer: LabelTensorizer.Config = LabelTensorizer.Config(
         column="has_answer"
     )
 class ModelInput(ModelInputBase):
     tokens1: BERTTensorizerBase.Config = BERTTensorizer.Config(
         columns=["text1"], max_seq_len=128)
     tokens2: BERTTensorizerBase.Config = BERTTensorizer.Config(
         columns=["text2"], max_seq_len=128)
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
     # for metric reporter
     num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(
         names=["tokens1", "tokens2"], indexes=[2, 2])
Пример #10
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config(
         allow_unknown=True)
     doc_labels: LabelTensorizer.Config = LabelTensorizer.Config(
         allow_unknown=True)
     doc_weight: FloatTensorizer.Config = FloatTensorizer.Config(
         column="doc_weight")
     word_weight: FloatTensorizer.Config = FloatTensorizer.Config(
         column="word_weight")
Пример #11
0
 class InputConfig(ConfigBase):
     tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
Пример #12
0
 class ModelInput(Model.Config.ModelInput):
     tokens: WordTensorizer.Config = WordTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config(
         allow_unknown=True)
     # for metric reporter
     raw_text: MetaInput.Config = MetaInput.Config(column="text")
Пример #13
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
Пример #14
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True)
     # for metric reporter
     raw_text: RawString.Config = RawString.Config(column="text")
Пример #15
0
 class ModelInput(Model.Config.ModelInput):
     tokens: TokenTensorizer.Config = TokenTensorizer.Config()
     dense: Optional[FloatListTensorizer.Config] = None
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
        class InputConfig(ConfigBase):
            tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            right_dense: FloatListTensorizer.Config = None
            left_dense: FloatListTensorizer.Config = None

            labels: LabelTensorizer.Config = LabelTensorizer.Config()
Пример #17
0
 class Config(Model.Config, doc_model.DocModel.Config):
     inputs: Dict[str, Tensorizer.Config] = {
         "tokens": WordTensorizer.Config(),
         "labels": LabelTensorizer.Config(),
     }
     embedding: WordFeatConfig = WordFeatConfig()
Пример #18
0
 class ModelInput(BasePairwiseModel.Config.ModelInput):
     tokens1: TokenTensorizer.Config = TokenTensorizer.Config(
         column="text1")
     tokens2: TokenTensorizer.Config = TokenTensorizer.Config(
         column="text2")
     labels: LabelTensorizer.Config = LabelTensorizer.Config()
Пример #19
0
 class ModelInput(BaseModel.Config.ModelInput):
     squad_input: SquadTensorizer.Config = SquadTensorizer.Config()
     has_answer: LabelTensorizer.Config = LabelTensorizer.Config(
         column="has_answer")
Пример #20
0
 class ModelInput(NewModel.Config.ModelInput):
     tokens: WordTensorizer.Config = WordTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True)