Exemplo n.º 1
0
 def test_initialize_list_tensorizers(self):
     tensorizers = {
         "intent": LabelListTensorizer(
             label_column="intent", pad_in_vocab=True, allow_unknown=True
         ),
         "goal": LabelListTensorizer(label_column="goal"),
     }
     initialize_tensorizers(tensorizers, self.data.train)
     self.assertEqual(9, len(tensorizers["intent"].vocab))
     self.assertEqual(7, len(tensorizers["goal"].vocab))
Exemplo n.º 2
0
    def test_label_list_tensors_pad_missing(self):
        ds = SessionPandasDataSource(
            test_df=pd.DataFrame(
                # test None and empty case
                {
                    "session_id": [1, 1, 1, 1],
                    "label": ["positive", "negative", None, ""],
                }
            ),
            schema={"label": List[str]},
            id_col="session_id",
        )
        tensorizers = {
            "label": LabelListTensorizer(
                pad_missing=True,
                label_column="label",
                pad_in_vocab=False,
                allow_unknown=False,
            )
        }
        initialize_tensorizers(tensorizers, ds.test)
        self.assertEqual(2, len(tensorizers["label"].vocab))
        # only one row in test data
        label_idx_list, lens = tensorizers["label"].numberize(next(iter(ds.test)))
        self.assertEqual([0, 1, -1, -1], label_idx_list)

        tensorizers["label"].pad_missing = False
        with self.assertRaises(Exception):
            tensorizers["label"].numberize(next(iter(ds.test)))
Exemplo n.º 3
0
 def test_create_label_list_tensors(self):
     tensorizers = {
         "intent":
         LabelListTensorizer(label_column="intent",
                             pad_in_vocab=True,
                             allow_unknown=True)
     }
     initialize_tensorizers(tensorizers, self.data.train)
     tensors = [
         tensorizers["intent"].numberize(row) for row in self.data.train
     ]
     # test label idx
     self.assertEqual([2, 3], tensors[0][0])
     self.assertEqual([4, 5], tensors[1][0])
     self.assertEqual([6, 7, 8], tensors[2][0])
     # test seq lens
     self.assertEqual(2, tensors[0][1])
     self.assertEqual(2, tensors[1][1])
     self.assertEqual(3, tensors[2][1])
     self.assertEqual(3, len(tensors))
     tensors, lens = tensorizers["intent"].tensorize(tensors)
     np.testing.assert_array_almost_equal(
         np.array([[2, 3, 1], [4, 5, 1], [6, 7, 8]]),
         tensors.detach().numpy())
     np.testing.assert_array_almost_equal(np.array([2, 2, 3]),
                                          lens.detach().numpy())
Exemplo n.º 4
0
 def test_label_list_tensors_no_pad_in_vocab(self):
     tensorizers = {
         "intent": LabelListTensorizer(
             label_column="intent", pad_in_vocab=False, allow_unknown=True
         )
     }
     initialize_tensorizers(tensorizers, self.data.train)
     self.assertEqual(8, len(tensorizers["intent"].vocab))
     tensors = []
     for row in self.data.train:
         row["intent"].append("unknown")
         tensors.append(tensorizers["intent"].numberize(row))
     tensors, lens = tensorizers["intent"].tensorize(tensors)
     np.testing.assert_array_almost_equal(
         np.array([[1, 2, 0, -1], [3, 4, 0, -1], [5, 6, 7, 0]]),
         tensors.detach().numpy(),
     )