示例#1
0
    def test_create_float_list_tensor(self):
        tensorizer = FloatListTensorizer(
            column="dense", dim=2, error_check=True, normalize=False
        )
        tests = [
            ("[0.1,0.2]", [0.1, 0.2]),  # comma
            ("[0.1, 0.2]", [0.1, 0.2]),  # comma with single space
            ("[0.1,  0.2]", [0.1, 0.2]),  # comma with multiple spaces
            ("[0.1 0.2]", [0.1, 0.2]),  # space
            ("[0.1  0.2]", [0.1, 0.2]),  # multiple spaces
            ("[ 0.1  0.2]", [0.1, 0.2]),  # space after [
            ("[0.1  0.2 ]", [0.1, 0.2]),  # space before ]
            ("[0.  1.]", [0.0, 1.0]),  # 0., 1.
        ]
        for raw, expected in tests:
            row = {"dense": load_float_list(raw)}
            numberized = tensorizer.numberize(row)
            self.assertEqual(expected, numberized)

        precision.FP16_ENABLED = True
        batch = []
        for raw, _ in tests:
            row = {"dense": load_float_list(raw)}
            batch.append(tensorizer.numberize(row))
        tensor = tensorizer.tensorize(batch)
        self.assertEqual(list(tensor.size()), [8, 2])
        self.assertEqual(tensor.dtype, torch.float16)
        precision.FP16_ENABLED = False
示例#2
0
 def test_float_list_tensor_prepare_input(self):
     tensorizer = FloatListTensorizer(
         column="dense", dim=2, error_check=True, normalize=False
     )
     tests = [("[0.1,0.2]", [0.1, 0.2])]
     for raw, expected in tests:
         row = {"dense": load_float_list(raw)}
         numberized = tensorizer.prepare_input(row)
         self.assertEqual(expected, numberized)
示例#3
0
    def test_create_float_list_tensor(self):
        tensorizer = FloatListTensorizer(column="dense", dim=2, error_check=True)
        rows = [
            {"dense": "[0.1,0.2]"},  # comma
            {"dense": "[0.1, 0.2]"},  # comma with single space
            {"dense": "[0.1,  0.2]"},  # comma with multiple spaces
            {"dense": "[0.1 0.2]"},  # space
            {"dense": "[0.1  0.2]"},  # multiple spaces
            {"dense": "[ 0.1  0.2]"},  # space after [
            {"dense": "[0.1  0.2 ]"},  # space before ]
        ]

        tensors = (tensorizer.numberize(row) for row in rows)
        for tensor in tensors:
            self.assertEqual([0.1, 0.2], tensor)
 def _get_config_with_export_list(
     self,
     task_class: Type[NewTask],
     model_class: Type[Model],
     test_file_metadata: TestFileMetadata,
 ) -> PyTextConfig:
     return PyTextConfig(
         task=task_class.Config(
             data=Data.Config(
                 source=TSVDataSource.Config(
                     train_filename=test_file_metadata.filename,
                     eval_filename=test_file_metadata.filename,
                     test_filename=test_file_metadata.filename,
                     field_names=test_file_metadata.field_names,
                 ),
                 batcher=PoolingBatcher.Config(train_batch_size=1,
                                               test_batch_size=1),
             ),
             trainer=TaskTrainer.Config(epochs=1),
             model=model_class.Config(
                 inputs=type(model_class.Config.inputs)(
                     dense=FloatListTensorizer.Config(
                         column=test_file_metadata.dense_col_name,
                         error_check=True,
                         dim=test_file_metadata.dense_feat_dim,
                     ))),
         ),
         use_tensorboard=False,
         use_cuda_if_available=False,
         export=ExportConfig(
             export_torchscript_path="/tmp/model_torchscript.pt"),
         version=LATEST_VERSION,
     )
示例#5
0
 def _get_pytext_config(
     self,
     test_file_name: TestFileName,
     task_class: Type[NewTask],
     model_class: Type[Model],
 ) -> PyTextConfig:
     test_file_metadata = get_test_file_metadata(test_file_name)
     return PyTextConfig(
         task=task_class.Config(
             data=Data.Config(
                 source=TSVDataSource.Config(
                     train_filename=test_file_metadata.filename,
                     eval_filename=test_file_metadata.filename,
                     test_filename=test_file_metadata.filename,
                     field_names=test_file_metadata.field_names,
                 ),
                 batcher=Batcher.Config(
                 ),  # Use Batcher to avoid shuffling.
             ),
             trainer=TaskTrainer.Config(epochs=1),
             model=model_class.Config(
                 inputs=type(model_class.Config.inputs)(
                     dense=FloatListTensorizer.Config(
                         column=test_file_metadata.dense_col_name,
                         dim=test_file_metadata.dense_feat_dim,
                     ))),
         ),
         use_tensorboard=False,
         use_cuda_if_available=False,
         version=LATEST_VERSION,
     )
示例#6
0
 def test_create_float_list_tensor(self):
     tensorizer = FloatListTensorizer(column="dense", dim=2, error_check=True)
     tests = [
         ("[0.1,0.2]", [0.1, 0.2]),  # comma
         ("[0.1, 0.2]", [0.1, 0.2]),  # comma with single space
         ("[0.1,  0.2]", [0.1, 0.2]),  # comma with multiple spaces
         ("[0.1 0.2]", [0.1, 0.2]),  # space
         ("[0.1  0.2]", [0.1, 0.2]),  # multiple spaces
         ("[ 0.1  0.2]", [0.1, 0.2]),  # space after [
         ("[0.1  0.2 ]", [0.1, 0.2]),  # space before ]
         ("[0.  1.]", [0.0, 1.0]),  # 0., 1.
     ]
     for raw, expected in tests:
         row = {"dense": load_float_list(raw)}
         numberized = tensorizer.numberize(row)
         self.assertEqual(expected, numberized)
示例#7
0
    def test_batch_predict_caffe2_model(self):
        with tempfile.NamedTemporaryFile() as snapshot_file, tempfile.NamedTemporaryFile() as caffe2_model_file:
            train_data = tests_module.test_file("train_data_tiny.tsv")
            eval_data = tests_module.test_file("test_data_tiny.tsv")
            config = PyTextConfig(
                task=DocumentClassificationTask.Config(
                    model=DocModel.Config(
                        inputs=DocModel.Config.ModelInput(
                            tokens=TokenTensorizer.Config(),
                            dense=FloatListTensorizer.Config(
                                column="dense", dim=1, error_check=True
                            ),
                            labels=LabelTensorizer.Config(),
                        )
                    ),
                    data=Data.Config(
                        source=TSVDataSource.Config(
                            train_filename=train_data,
                            eval_filename=eval_data,
                            test_filename=eval_data,
                            field_names=["label", "slots", "text", "dense"],
                        )
                    ),
                ),
                version=21,
                save_snapshot_path=snapshot_file.name,
                export_caffe2_path=caffe2_model_file.name,
            )
            task = create_task(config.task)
            task.export(task.model, caffe2_model_file.name)
            model = task.model
            save(config, model, meta=None, tensorizers=task.data.tensorizers)

            pt_results = task.predict(task.data.data_source.test)

            def assert_caffe2_results_correct(caffe2_results):
                for pt_res, res in zip(pt_results, caffe2_results):
                    np.testing.assert_array_almost_equal(
                        pt_res["score"].tolist()[0],
                        [score[0] for score in res.values()],
                    )

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=2
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=-1
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)
示例#8
0
    def test_create_normalized_float_list_tensor(self):
        def round_list(l):
            return [float("%.4f" % n) for n in l]

        data = TSVDataSource(
            SafeFileWrapper(tests_module.test_file("train_dense_features_tiny.tsv")),
            eval_file=None,
            field_names=["label", "slots", "text", "dense_feat"],
            schema={"text": str, "label": str, "dense_feat": List[float]},
        )
        tensorizer = FloatListTensorizer(
            column="dense_feat", dim=10, error_check=True, normalize=True
        )
        self._initialize_tensorizer(tensorizer, data)
        self.assertEqual(10, tensorizer.normalizer.num_rows)
        self.assertEqual(
            round_list(
                [
                    7.56409,
                    8.2388,
                    0.5531,
                    0.2403,
                    1.03130,
                    6.2888,
                    3.1595,
                    0.1538,
                    0.2403,
                    5.3463,
                ]
            ),
            round_list(tensorizer.normalizer.feature_sums),
        )
        self.assertEqual(
            round_list(
                [
                    5.80172,
                    7.57586,
                    0.30591,
                    0.05774,
                    0.52762,
                    5.22811,
                    2.51727,
                    0.02365,
                    0.05774,
                    4.48798,
                ]
            ),
            round_list(tensorizer.normalizer.feature_squared_sums),
        )
        self.assertEqual(
            round_list(
                [
                    0.75640,
                    0.82388,
                    0.05531,
                    0.02403,
                    0.10313,
                    0.62888,
                    0.31595,
                    0.01538,
                    0.02403,
                    0.53463,
                ]
            ),
            round_list(tensorizer.normalizer.feature_avgs),
        )
        self.assertEqual(
            round_list(
                [
                    0.08953,
                    0.28072,
                    0.16593,
                    0.07209,
                    0.20524,
                    0.35682,
                    0.38974,
                    0.04614,
                    0.07209,
                    0.40369,
                ]
            ),
            round_list(tensorizer.normalizer.feature_stddevs),
        )

        row = [0.64840776, 0.7575, 0.5531, 0.2403, 0, 0.9481, 0, 0.1538, 0.2403, 0.3564]
        output = tensorizer.numberize({"dense_feat": row})

        self.assertEqual(
            round_list(
                [
                    -1.20619,
                    -0.23646,
                    2.99999,
                    3.0,
                    -0.50246,
                    0.89462,
                    -0.81066,
                    2.99999,
                    3.0,
                    -0.44149,
                ]
            ),
            round_list(output),
        )