def test_should_update_the_batch_with_outcomes_in_exec_mode(self): values = [1, 2, 3] expression = FunctionExpression(lambda x: values, mode=ExecutionMode.EXEC, name="test") expected_batch = FrameBatch(frames=pd.DataFrame(), outcomes={"test": [1, 2, 3]}) input_batch = FrameBatch(frames=pd.DataFrame()) expression.evaluate(input_batch) self.assertEqual(expected_batch, input_batch)
def test_should_use_the_same_function_if_not_gpu_compatible(self): mock_function = MagicMock(return_value=pd.DataFrame()) expression = FunctionExpression(mock_function, mode=ExecutionMode.EXEC, name="test", is_temp=True) input_batch = Batch(frames=pd.DataFrame()) expression.evaluate(input_batch) mock_function.assert_called()
def test_should_update_temp_outcomes_when_is_temp_set_exec_mode(self): values = [1, 2, 3] expression = FunctionExpression(lambda x: values, mode=ExecutionMode.EXEC, name="test", is_temp=True) expected_batch = Batch(frames=pd.DataFrame(), temp_outcomes={"test": [1, 2, 3]}) input_batch = Batch(frames=pd.DataFrame()) expression.evaluate(input_batch) self.assertEqual(expected_batch, input_batch)
def bind_function_expr(expr: FunctionExpression, column_mapping): catalog = CatalogManager() udf_obj = catalog.get_udf_by_name(expr.name) if expr.output: expr.output_obj = catalog.get_udf_io_by_name(expr.output) if expr.output_obj is None: LoggingManager().log( 'Invalid output {} selected for UDF {}'.format( expr.output, expr.name), LoggingLevel().ERROR) expr.function = path_to_class(udf_obj.impl_file_path, udf_obj.name)()
def visitUdfFunction(self, ctx: evaql_parser.UdfFunctionContext): udf_name = None udf_args = None if ctx.simpleId(): udf_name = self.visit(ctx.simpleId()) else: LoggingManager().log('UDF function name missing.', LoggingLevel.ERROR) udf_args = self.visit(ctx.functionArgs()) func_expr = FunctionExpression(None, name=udf_name) for arg in udf_args: func_expr.append_child(arg) return func_expr
def test_should_execute_same_function_if_no_gpu(self, context): context_instance = context.return_value mock_function = MagicMock(spec=GPUCompatible, return_value=pd.DataFrame()) context_instance.gpu_device.return_value = NO_GPU expression = FunctionExpression(mock_function, mode=ExecutionMode.EXEC, name="test", is_temp=True) input_batch = Batch(frames=pd.DataFrame()) expression.evaluate(input_batch) mock_function.assert_called()
def test_function_move_the_device_to_gpu_if_compatible(self, context): context_instance = context.return_value mock_function = MagicMock(spec=GPUCompatible) gpu_mock_function = Mock(return_value=pd.DataFrame()) gpu_device_id = '2' mock_function.to_device.return_value = gpu_mock_function context_instance.gpu_device.return_value = gpu_device_id expression = FunctionExpression(mock_function, mode=ExecutionMode.EXEC, name="test", is_temp=True) input_batch = Batch(frames=pd.DataFrame()) expression.evaluate(input_batch) mock_function.to_device.assert_called_with(gpu_device_id) gpu_mock_function.assert_called()
def test_bind_function_value_expr(self, mock_str_path, mock_catalog): func_expr = FunctionExpression(None, name='temp') mock_output = MagicMock() mock_output.name = 'name' mock_output.impl_file_path = 'path' mock_catalog.return_value.get_udf_by_name.return_value = mock_output bind_function_expr(func_expr, None) mock_catalog.return_value.get_udf_by_name.assert_called_with('temp') mock_str_path.assert_called_with('path', 'name') self.assertEqual(func_expr.function, mock_str_path.return_value.return_value)
def test_when_function_executor_with_a_child_should_allow_chaining(self): expression = FunctionExpression(lambda x: pd.DataFrame(x)) child = FunctionExpression(lambda x: x + 1) expression.append_child(child) values = Batch(pd.DataFrame([1, 2, 3])) actual = expression.evaluate(values) expected = Batch(pd.DataFrame([2, 3, 4])) self.assertEqual(expected, actual)
def test_when_function_executor_with_a_child_should_allow_chaining(self): expression = FunctionExpression(lambda x: x) child = FunctionExpression(lambda x: list(map(lambda t: t + 1, x))) expression.append_child(child) values = [1, 2, 3] actual = expression.evaluate(values) expected = [2, 3, 4] self.assertEqual(expected, actual)
def test_func_expr_with_cmpr_and_const_expr_should_work(self): frame_1 = Frame(1, np.ones((1, 1)), None) frame_2 = Frame(1, 2 * np.ones((1, 1)), None) outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6]) outcome_2 = Prediction(frame_1, ["bus"], [0.6]) func = FunctionExpression(lambda x: [outcome_1, outcome_2]) value_expr = ConstantValueExpression("car") expression_tree = ComparisonExpression(ExpressionType.COMPARE_EQUAL, func, value_expr) batch = FrameBatch(frames=[frame_1, frame_2]) self.assertEqual([True, False], expression_tree.evaluate(batch))
def test_func_expr_with_cmpr_and_const_expr_should_work(self): frames = create_dataframe(2) outcome_1 = Outcome(pd.DataFrame( {'labels': ["car", "bus"], 'scores': [0.5, 0.6]}), 'labels') outcome_2 = Outcome(pd.DataFrame( {'labels': ["bus"], 'scores': [0.6]}), 'labels') func = FunctionExpression(lambda x: [outcome_1, outcome_2]) value_expr = ConstantValueExpression("car") expression_tree = ComparisonExpression(ExpressionType.COMPARE_EQUAL, func, value_expr) batch = Batch(frames=frames) self.assertEqual([True, False], expression_tree.evaluate(batch))
def test_should_return_false_for_unequal_expressions(self): const_exp1 = ConstantValueExpression(0) const_exp2 = ConstantValueExpression(1) func_expr = FunctionExpression(lambda x: x + 1) cmpr_exp = ComparisonExpression(ExpressionType.COMPARE_NEQ, const_exp1, const_exp2) tuple_expr = TupleValueExpression(col_name='id') aggr_expr = AggregationExpression(ExpressionType.AGGREGATION_MAX, None, tuple_expr) logical_expr = LogicalExpression(ExpressionType.LOGICAL_OR, cmpr_exp, cmpr_exp) self.assertNotEqual(const_exp1, const_exp2) self.assertNotEqual(cmpr_exp, const_exp1) self.assertNotEqual(func_expr, cmpr_exp) self.assertNotEqual(tuple_expr, aggr_expr) self.assertNotEqual(aggr_expr, tuple_expr) self.assertNotEqual(tuple_expr, cmpr_exp) self.assertNotEqual(logical_expr, cmpr_exp)
def bind_function_expr(expr: FunctionExpression, column_mapping): catalog = CatalogManager() udf_obj = catalog.get_udf_by_name(expr.name) class_path = '.'.join([udf_obj.impl_file_path, udf_obj.name]) expr.function = str_to_class(class_path)()
def test_should_filter_function_output(self): expression = FunctionExpression(lambda x: x + 1, output='id') values = pd.DataFrame({'id': [1, 2], 'data': [1, 2]}) actual = expression.evaluate(Batch(values)) expected = Batch(pd.DataFrame(values['id']) + 1) self.assertEqual(expected, actual)
def test_should_throw_assert_error_when_name_not_provided_exec_mode(self): self.assertRaises(AssertionError, lambda _=None: FunctionExpression(lambda x: [], mode=ExecutionMode.EXEC), )
def test_should_work_for_function_without_children_eval_mode(self): expression = FunctionExpression(lambda x: pd.DataFrame(x)) values = Batch(pd.DataFrame([1, 2, 3])) actual = expression.evaluate(values) self.assertEqual(values, actual)
def test_bind_columns_calls_bind_func_expr_if_type_functional( self, mock_bind): func_expr = FunctionExpression(None, name='temp') bind_columns_expr([func_expr], {}) mock_bind.assert_called_with(func_expr, {})
def test_should_work_for_function_without_children_eval_mode(self): expression = FunctionExpression(lambda x: x) values = [1, 2, 3] actual = expression.evaluate(values) self.assertEqual(values, actual)