def test_model_details(self, mock_stdout, *mocks): # Default Model Construct model = CharLoadTFModel(self.model_path, self.label_mapping) # Test Details model.details() self.assertIn("input", mock_stdout.getvalue()) self.assertIn("dense", mock_stdout.getvalue()) self.assertIn("softmax_output", mock_stdout.getvalue()) self.assertIn("Total params", mock_stdout.getvalue())
def test_predict(self, *mocks): # model model = CharLoadTFModel(self.model_path, self.label_mapping) data_gen = [np.array([[1, 3], [1, 2]])] result = model.predict(data_gen) self.assertIn("pred", result) self.assertEqual((2, 2), np.array(result["pred"]).shape) result = model.predict(data_gen, show_confidences=True) self.assertIn("pred", result) self.assertIn("conf", result) self.assertEqual((2, 2, model.num_labels), np.array(result["conf"]).shape)
def test_validation_evaluate_and_classification_report(self, *mocks): model = CharLoadTFModel(self.model_path, self.label_mapping) model._construct_model() # must make model to do priv validate func # validation data val_gen = [[ np.ones((2, 20)), # x_data np.zeros((2, 20, model.num_labels)), # y_data ]] val_gen[0][1][0, :11, self.label_mapping["ADDRESS"]] = 1 f1, f1_report = model._validate_training(val_gen, 32, True, True) self.assertIsNotNone(f1) self.assertIsNotNone(f1_report) self.assertEqual(11, f1_report["ADDRESS"]["support"])
def test_init(self, *mocks): # load default model = CharLoadTFModel(self.model_path, self.label_mapping) expected_labels = [ "PAD", "UNKNOWN", "ADDRESS", "BAN", "CREDIT_CARD", "EMAIL_ADDRESS", "UUID", "HASH_OR_KEY", "IPV4", "IPV6", "MAC_ADDRESS", "PERSON", "PHONE_NUMBER", "SSN", "URL", "DATETIME", "INTEGER_BIG", ] self.assertDictEqual(self.label_mapping, model.label_mapping) self.assertEqual(self.model_path, model._parameters["model_path"]) self.assertListEqual(expected_labels, model.labels)
def test_reverse_label_mapping(self, *mocks): # load default model = CharLoadTFModel(self.model_path, self.label_mapping) # should notice that CITY does not exist in reverse expected_reverse_label_mapping = { 0: "PAD", 1: "UNKNOWN", 2: "ADDRESS", 3: "BAN", 4: "CREDIT_CARD", 5: "EMAIL_ADDRESS", 6: "UUID", 7: "HASH_OR_KEY", 8: "IPV4", 9: "IPV6", 10: "MAC_ADDRESS", 11: "PERSON", 12: "PHONE_NUMBER", 13: "SSN", 14: "URL", 15: "DATETIME", 16: "INTEGER_BIG", } self.assertDictEqual(expected_reverse_label_mapping, model.reverse_label_mapping)
def test_param_validation(self, *mocks): # Make sure all parameters can be altered. Make sure non-valid params # are caught parameters = { "default_label": "UNKNOWN", } invalid_parameters = { "fake_extra_param": "fails", } model = CharLoadTFModel(self.model_path, label_mapping=self.label_mapping, parameters=parameters) model._construct_model() self.assertDictEqual(parameters, model._parameters) with self.assertRaises(ValueError): CharLoadTFModel( self.model_path, label_mapping=self.label_mapping, parameters=invalid_parameters, )
def test_save(self, mock_open, mock_tf_save, *mocks): # setup mock mock_file = setup_save_mock_open(mock_open) # Save and load a CNN Model with custom parameters parameters = {} label_mapping = mock_label_mapping model = CharLoadTFModel(self.model_path, label_mapping, parameters) # save file and test save_path = "./fake/path" model.save_to_disk(save_path) self.assertEqual( # model parameters '{"default_label": "UNKNOWN", "pad_label": "PAD"}' # label_mapping '{"PAD": 0, "CITY": 1, "UNKNOWN": 1, "ADDRESS": 2}', mock_file.getvalue(), ) mock_tf_save.assert_called_with(save_path) # close mock StringIO.close(mock_file)
def test_fit_and_predict(self, *mocks): # model model = CharLoadTFModel(self.model_path, self.label_mapping) # data for model data_gen = [[ np.array([[1, 3], [1, 2]]), # x_data np.zeros((2, 2, model.num_labels)), # y_data ]] cv_gen = data_gen # Basic Fit with Validation Data with self.assertLogs("DataProfiler.labelers.char_load_tf_model", level="INFO") as logs: history, f1, f1_report = model.fit(data_gen, cv_gen, reset_weights=True) # Ensure info was logged during fit self.assertTrue(len(logs.output)) data_gen = [np.array([[1, 3], [1, 2]])] model.predict(data_gen) # fit with new labels new_label_mapping = { "PAD": 0, "TEST": 1, "NEW": 2, "MAPPING": 3, model._parameters["default_label"]: 4, } data_gen = [[ np.array([[1, 3], [1, 2]]), # x_data np.zeros((2, 2, len(new_label_mapping))), # y_data ]] history, f1, f1_report = model.fit(data_gen, cv_gen, label_mapping=new_label_mapping) # predict after fitting on just the text model.predict(data_gen[0][0])
def test_load(self, *mocks): dir = "fake/path/" loaded_model = CharLoadTFModel.load_from_disk(dir) self.assertIsInstance(loaded_model, CharLoadTFModel)
def test_help(self, mock_stdout, *mocks): CharLoadTFModel.help() self.assertIn("CharLoadTFModel", mock_stdout.getvalue()) self.assertIn("Parameters", mock_stdout.getvalue())
def test_set_label_mapping(self, *mocks): # load default model = CharLoadTFModel(self.model_path, self.label_mapping) # test not dict label_mapping = None with self.assertRaisesRegex( TypeError, "Labels must either be a non-empty encoding dict " "which maps labels to index encodings or a list.", ): model.set_label_mapping(label_mapping) # test label_mapping without PAD label_mapping = { "CITY": 1, # SAME AS UNKNOWN "UNKNOWN": 1, "ADDRESS": 2, } model.set_label_mapping(label_mapping) label_mapping["PAD"] = 0 self.assertDictEqual(label_mapping, model.label_mapping) # test list without pad sets PAD: 0 labels = [ "UNKNOWN", "ADDRESS", ] label_mapping = { "PAD": 1, "UNKNOWN": 2, "ADDRESS": 3, } model.set_label_mapping(labels) self.assertDictEqual(label_mapping, model.label_mapping) # test label_mapping with PAD: 0 label_mapping = { "PAD": 0, "CITY": 1, # SAME AS UNKNOWN "UNKNOWN": 1, "ADDRESS": 2, } model.set_label_mapping(label_mapping) self.assertDictEqual(label_mapping, model.label_mapping) # test if pad not set, but 0 taken set to last ind # test label_mapping without PAD label_mapping = { "CITY": 0, "UNKNOWN": 1, "ADDRESS": 2, } model.set_label_mapping(label_mapping) label_mapping["PAD"] = 3 self.assertDictEqual(label_mapping, model.label_mapping)