Пример #1
0
 def _invalid_check(data):
     with self.assertRaisesRegex(
         TypeError,
         "Data must be imported using the "
         "data_readers, pd.DataFrames, "
         "np.ndarrays, or lists.",
     ):
         BaseDataLabeler._check_and_return_valid_data_format(data)
Пример #2
0
 def _valid_check(data):
     try:
         print("\tChecking data format: {}".format(str(type(data))))
         data = BaseDataLabeler._check_and_return_valid_data_format(
             data, fit_or_predict='predict')
     except Exception as e:
         self.fail("Exception raised on input of accepted types.")
     return data
Пример #3
0
 def _invalid_check(data):
     with self.assertRaisesRegex(
             TypeError, "Data must either be imported using "
             "the data_readers or pd.DataFrame."):
         BaseDataLabeler._check_and_return_valid_data_format(data)
Пример #4
0
    def test_check_and_return_valid_data_format(self):
        # test incorrect fit_or_predict value
        with self.assertRaisesRegex(ValueError, '`fit_or_predict` must equal '
                                                '`fit` or `predict`'):
            BaseDataLabeler._check_and_return_valid_data_format([], 'oops')

        # test incorrect data type
        with self.assertRaisesRegex(TypeError, "Data must be imported using the"
                                               " data_readers, pd.DataFrames, "
                                               "np.ndarrays, or lists."):
            BaseDataLabeler._check_and_return_valid_data_format('oops')

        # test proper conversion of 2 dimensional structured data
        two_dim = [["this", "is"], ["two", "dimensions"]]
        two_dim_pred = np.array(["this", "is", "two", "dimensions"])
        # for fit
        self.assertTrue(
            np.array_equal(np.array(two_dim),
                           BaseDataLabeler._check_and_return_valid_data_format(
                           two_dim, fit_or_predict='fit')))
        self.assertTrue(
            np.array_equal(np.array(two_dim),
                           BaseDataLabeler._check_and_return_valid_data_format(
                           pd.DataFrame(two_dim), fit_or_predict='fit')))
        self.assertTrue(
            np.array_equal(np.array(two_dim),
                           BaseDataLabeler._check_and_return_valid_data_format(
                           np.array(two_dim), fit_or_predict='fit')))
        # for predict
        self.assertTrue(
            np.array_equal(two_dim_pred,
                           BaseDataLabeler._check_and_return_valid_data_format(
                           two_dim, fit_or_predict='predict')))
        self.assertTrue(
            np.array_equal(two_dim_pred,
                           BaseDataLabeler._check_and_return_valid_data_format(
                           pd.DataFrame(two_dim), fit_or_predict='predict')))
        self.assertTrue(
            np.array_equal(two_dim_pred,
                           BaseDataLabeler._check_and_return_valid_data_format(
                           np.array(two_dim), fit_or_predict='predict')))

        # test proper conversion of 1 dimensional data
        one_dim = ["this", "is", "one", "dimension"]
        one_dim_pred = np.array(one_dim)
        # for fit
        self.assertTrue(
            np.array_equal(np.array(one_dim),
                           BaseDataLabeler._check_and_return_valid_data_format(
                           one_dim, fit_or_predict='fit')))
        self.assertTrue(
            np.array_equal(np.array(one_dim),
                           BaseDataLabeler._check_and_return_valid_data_format(
                           pd.Series(one_dim), fit_or_predict='fit')))
        self.assertTrue(
            np.array_equal(np.array(one_dim),
                           BaseDataLabeler._check_and_return_valid_data_format(
                           np.array(one_dim), fit_or_predict='fit')))
        # for predict
        self.assertTrue(
            np.array_equal(one_dim_pred,
                           BaseDataLabeler._check_and_return_valid_data_format(
                           one_dim, fit_or_predict='predict')))
        self.assertTrue(
            np.array_equal(one_dim_pred,
                           BaseDataLabeler._check_and_return_valid_data_format(
                           pd.DataFrame(one_dim), fit_or_predict='predict')))
        self.assertTrue(
            np.array_equal(one_dim_pred,
                           BaseDataLabeler._check_and_return_valid_data_format(
                           np.array(one_dim), fit_or_predict='predict')))

        # test proper conversion of unstructured labels
        labels = [[(0, 4, "UNKNOWN"), (4, 10, "ADDRESS")],
                  [(0, 5, "SSN"), (5, 8, "UNKNOWN")]]
        validated_labels = \
            BaseDataLabeler._check_and_return_valid_data_format(labels)
        self.assertIsInstance(validated_labels, np.ndarray)
        self.assertEqual(len(validated_labels), 2)
        self.assertEqual(len(validated_labels[0]), 2)
        self.assertEqual(len(validated_labels[0][0]), 3)
        self.assertEqual(validated_labels[0][0][0], 0)
        self.assertEqual(validated_labels[0][1][1], 10)
        self.assertEqual(validated_labels[1][0][2], "SSN")

        # test proper conversion of data reader objects
        for dt in ["csv", "json", "parquet"]:
            data_obj = dp.Data(data=pd.DataFrame(two_dim), data_type=dt)
            val = BaseDataLabeler._check_and_return_valid_data_format(data_obj)
            self.assertTrue(np.array_equal(np.array(two_dim), val))