class EncoderPairwiseModelInput(ModelInputBase): tokens1: Tensorizer.Config = Tensorizer.Config() tokens2: Tensorizer.Config = Tensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens1", "tokens2"], indexes=[2, 2])
class EncoderModelInput(BaseModel.Config.ModelInput): tokens: Tensorizer.Config = Tensorizer.Config() dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens"], indexes=[2])
class InputConfig(ConfigBase): right_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() left_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() right_dense: Optional[FloatListTensorizer.Config] = None left_dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config()
def test_batch_predict_caffe2_model(self): with tempfile.NamedTemporaryFile() as snapshot_file, tempfile.NamedTemporaryFile() as caffe2_model_file: train_data = tests_module.test_file("train_data_tiny.tsv") eval_data = tests_module.test_file("test_data_tiny.tsv") config = PyTextConfig( task=DocumentClassificationTask.Config( model=DocModel.Config( inputs=DocModel.Config.ModelInput( tokens=TokenTensorizer.Config(), dense=FloatListTensorizer.Config( column="dense", dim=1, error_check=True ), labels=LabelTensorizer.Config(), ) ), data=Data.Config( source=TSVDataSource.Config( train_filename=train_data, eval_filename=eval_data, test_filename=eval_data, field_names=["label", "slots", "text", "dense"], ) ), ), version=21, save_snapshot_path=snapshot_file.name, export_caffe2_path=caffe2_model_file.name, ) task = create_task(config.task) task.export(task.model, caffe2_model_file.name) model = task.model save(config, model, meta=None, tensorizers=task.data.tensorizers) pt_results = task.predict(task.data.data_source.test) def assert_caffe2_results_correct(caffe2_results): for pt_res, res in zip(pt_results, caffe2_results): np.testing.assert_array_almost_equal( pt_res["score"].tolist()[0], [score[0] for score in res.values()], ) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name, cache_size=2 ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name, cache_size=-1 ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results)
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config( allow_unknown=True) doc_labels: LabelTensorizer.Config = LabelTensorizer.Config( allow_unknown=True) doc_weight: Optional[FloatTensorizer.Config] = None word_weight: Optional[FloatTensorizer.Config] = None
class BertModelInput(BaseModel.Config.ModelInput): tokens: BERTTensorizer.Config = BERTTensorizer.Config(max_seq_len=128) dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens"], indexes=[2] )
class ModelInput(BasePairwiseModel.Config.ModelInput): tokens1: TokenTensorizer.Config = TokenTensorizer.Config(column="text1") tokens2: TokenTensorizer.Config = TokenTensorizer.Config(column="text2") labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter raw_text: JoinStringTensorizer.Config = JoinStringTensorizer.Config( columns=["text1", "text2"] )
class ModelInput(BaseModel.Config.ModelInput): squad_input: Union[ SquadForBERTTensorizer.Config, SquadForRoBERTaTensorizer.Config ] = SquadForBERTTensorizer.Config(max_seq_len=256) # is_impossible label has_answer: LabelTensorizer.Config = LabelTensorizer.Config( column="has_answer" )
class ModelInput(ModelInputBase): tokens1: BERTTensorizerBase.Config = BERTTensorizer.Config( columns=["text1"], max_seq_len=128) tokens2: BERTTensorizerBase.Config = BERTTensorizer.Config( columns=["text2"], max_seq_len=128) labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens1", "tokens2"], indexes=[2, 2])
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config( allow_unknown=True) doc_labels: LabelTensorizer.Config = LabelTensorizer.Config( allow_unknown=True) doc_weight: FloatTensorizer.Config = FloatTensorizer.Config( column="doc_weight") word_weight: FloatTensorizer.Config = FloatTensorizer.Config( column="word_weight")
class InputConfig(ConfigBase): tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config()
class ModelInput(Model.Config.ModelInput): tokens: WordTensorizer.Config = WordTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config( allow_unknown=True) # for metric reporter raw_text: MetaInput.Config = MetaInput.Config(column="text")
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config()
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True) # for metric reporter raw_text: RawString.Config = RawString.Config(column="text")
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config()
class InputConfig(ConfigBase): tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() right_dense: FloatListTensorizer.Config = None left_dense: FloatListTensorizer.Config = None labels: LabelTensorizer.Config = LabelTensorizer.Config()
class Config(Model.Config, doc_model.DocModel.Config): inputs: Dict[str, Tensorizer.Config] = { "tokens": WordTensorizer.Config(), "labels": LabelTensorizer.Config(), } embedding: WordFeatConfig = WordFeatConfig()
class ModelInput(BasePairwiseModel.Config.ModelInput): tokens1: TokenTensorizer.Config = TokenTensorizer.Config( column="text1") tokens2: TokenTensorizer.Config = TokenTensorizer.Config( column="text2") labels: LabelTensorizer.Config = LabelTensorizer.Config()
class ModelInput(BaseModel.Config.ModelInput): squad_input: SquadTensorizer.Config = SquadTensorizer.Config() has_answer: LabelTensorizer.Config = LabelTensorizer.Config( column="has_answer")
class ModelInput(NewModel.Config.ModelInput): tokens: WordTensorizer.Config = WordTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True)