def test_predict(self, mock_stdout): parameters = { 'regex_patterns': { 'PAD': [r'\W'], 'BACKGROUND': [r'\w'] }, 'ignore_case': True, 'default_label': 'BACKGROUND', } model = RegexModel(label_mapping=self.label_mapping, parameters=parameters) # test only pad and background separate expected_output = { 'pred': [ np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0]]), np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0]]) ] } model_output = model.predict([' ', 'hello']) self.assertIn('pred', model_output) for expected, output in zip(expected_output['pred'], model_output['pred']): self.assertTrue(np.array_equal(expected, output)) # check verbose printing self.assertIn('Data Samples', mock_stdout.getvalue()) # test pad with background expected_output = { 'pred': [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ] } model_output = model.predict([' h w.']) self.assertIn('pred', model_output) for expected, output in zip(expected_output['pred'], model_output['pred']): self.assertTrue(np.array_equal(expected, output)) # test show confidences expected_output = { 'pred': [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ], 'conf': [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ] } model_output = model.predict([' h w.'], show_confidences=True) self.assertIn('pred', model_output) self.assertIn('conf', model_output) for expected, output in zip(expected_output['pred'], model_output['pred']): self.assertTrue(np.array_equal(expected, output)) for expected, output in zip(expected_output['conf'], model_output['conf']): self.assertTrue(np.array_equal(expected, output)) # test verbose = False # clear stdout mock_stdout.seek(0) mock_stdout.truncate(0) model_output = model.predict(['hello world.'], verbose=False) self.assertNotIn('Data Samples', mock_stdout.getvalue())
def test_predict(self, mock_stdout): parameters = { "regex_patterns": { "PAD": [r"\W"], "UNKNOWN": [r"\w"] }, "ignore_case": True, "default_label": "UNKNOWN", } model = RegexModel(label_mapping=self.label_mapping, parameters=parameters) # test only pad and background separate expected_output = { "pred": [ np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0]]), np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0]]), ] } with self.assertLogs("DataProfiler.labelers.regex_model", level="INFO") as logs: model_output = model.predict([" ", "hello"]) self.assertIn("pred", model_output) for expected, output in zip(expected_output["pred"], model_output["pred"]): self.assertTrue(np.array_equal(expected, output)) # check verbose printing self.assertIn("Data Samples", mock_stdout.getvalue()) # check verbose logging self.assertTrue(len(logs.output)) # test pad with background expected_output = { "pred": [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ] } model_output = model.predict([" h w."]) self.assertIn("pred", model_output) for expected, output in zip(expected_output["pred"], model_output["pred"]): self.assertTrue(np.array_equal(expected, output)) # test show confidences expected_output = { "pred": [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ], "conf": [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ], } model_output = model.predict([" h w."], show_confidences=True) self.assertIn("pred", model_output) self.assertIn("conf", model_output) for expected, output in zip(expected_output["pred"], model_output["pred"]): self.assertTrue(np.array_equal(expected, output)) for expected, output in zip(expected_output["conf"], model_output["conf"]): self.assertTrue(np.array_equal(expected, output)) # clear stdout mock_stdout.seek(0) mock_stdout.truncate(0) # test verbose = False # Want to ensure no INFO logged with self.assertRaisesRegex( AssertionError, "no logs of level INFO or higher triggered " "on DataProfiler.labelers.regex_model", ): with self.assertLogs("DataProfiler.labelers.regex_model", level="INFO"): model.predict(["hello world."], verbose=False) # Not in stdout self.assertNotIn("Data Samples", mock_stdout.getvalue())
def test_predict(self, mock_stdout): parameters = { 'regex_patterns': { 'PAD': [r'\W'], 'UNKNOWN': [r'\w'] }, 'ignore_case': True, 'default_label': 'UNKNOWN', } model = RegexModel(label_mapping=self.label_mapping, parameters=parameters) # test only pad and background separate expected_output = { 'pred': [ np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0]]), np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0]]) ] } with self.assertLogs('DataProfiler.labelers.regex_model', level='INFO') as logs: model_output = model.predict([' ', 'hello']) self.assertIn('pred', model_output) for expected, output in zip(expected_output['pred'], model_output['pred']): self.assertTrue(np.array_equal(expected, output)) # check verbose printing self.assertIn('Data Samples', mock_stdout.getvalue()) # check verbose logging self.assertTrue(len(logs.output)) # test pad with background expected_output = { 'pred': [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ] } model_output = model.predict([' h w.']) self.assertIn('pred', model_output) for expected, output in zip(expected_output['pred'], model_output['pred']): self.assertTrue(np.array_equal(expected, output)) # test show confidences expected_output = { 'pred': [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ], 'conf': [ np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) ] } model_output = model.predict([' h w.'], show_confidences=True) self.assertIn('pred', model_output) self.assertIn('conf', model_output) for expected, output in zip(expected_output['pred'], model_output['pred']): self.assertTrue(np.array_equal(expected, output)) for expected, output in zip(expected_output['conf'], model_output['conf']): self.assertTrue(np.array_equal(expected, output)) # clear stdout mock_stdout.seek(0) mock_stdout.truncate(0) # test verbose = False # Want to ensure no INFO logged with self.assertRaisesRegex( AssertionError, 'no logs of level INFO or higher triggered ' 'on DataProfiler.labelers.regex_model'): with self.assertLogs('DataProfiler.labelers.regex_model', level='INFO'): model.predict(['hello world.'], verbose=False) # Not in stdout self.assertNotIn('Data Samples', mock_stdout.getvalue())