def execute_inspections(op_id, caller_filename, lineno,
                                optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('statsmodel.api', 'add_constant')
            input_info = get_input_info(args[0], caller_filename, lineno,
                                        function_info, optional_code_reference,
                                        optional_source_code)

            operator_context = OperatorContext(OperatorType.PROJECTION_MODIFY,
                                               function_info)
            input_infos = SklearnBackend.before_call(
                operator_context, [input_info.annotated_dfobject])
            result = original(input_infos[0].result_data, *args[1:], **kwargs)
            backend_result = SklearnBackend.after_call(operator_context,
                                                       input_infos, result)
            new_return_value = backend_result.annotated_dfobject.result_data

            dag_node = DagNode(
                op_id, BasicCodeLocation(caller_filename,
                                         lineno), operator_context,
                DagNodeDetails("Adds const column", ["array"]),
                get_optional_code_info_or_none(optional_code_reference,
                                               optional_source_code))
            add_dag_node(dag_node, [input_info.dag_node], backend_result)

            return new_return_value
Exemplo n.º 2
0
    def patched_fit_transform(self, *args, **kwargs):
        """ Patch for ('example_pipelines.healthcare.healthcare_utils.MyW2VTransformer', 'fit_transform') """
        # pylint: disable=no-method-argument
        self.mlinspect_fit_transform_active = True  # pylint: disable=attribute-defined-outside-init
        original = gorilla.get_original_attribute(
            healthcare_utils.MyW2VTransformer, 'fit_transform')
        function_info = FunctionInfo(
            'example_pipelines.healthcare.healthcare_utils',
            'MyW2VTransformer')
        input_info = get_input_info(args[0], self.mlinspect_caller_filename,
                                    self.mlinspect_lineno, function_info,
                                    self.mlinspect_optional_code_reference,
                                    self.mlinspect_optional_source_code)

        operator_context = OperatorContext(OperatorType.TRANSFORMER,
                                           function_info)
        input_infos = SklearnBackend.before_call(
            operator_context, [input_info.annotated_dfobject])
        result = original(self, input_infos[0].result_data, *args[1:],
                          **kwargs)
        backend_result = SklearnBackend.after_call(operator_context,
                                                   input_infos, result)
        new_return_value = backend_result.annotated_dfobject.result_data
        assert isinstance(new_return_value, MlinspectNdarray)
        dag_node = DagNode(
            singleton.get_next_op_id(),
            BasicCodeLocation(self.mlinspect_caller_filename,
                              self.mlinspect_lineno), operator_context,
            DagNodeDetails("Word2Vec: fit_transform", ['array']),
            get_optional_code_info_or_none(
                self.mlinspect_optional_code_reference,
                self.mlinspect_optional_source_code))
        add_dag_node(dag_node, [input_info.dag_node], backend_result)
        self.mlinspect_fit_transform_active = False  # pylint: disable=attribute-defined-outside-init
        return new_return_value
def add_test_label_node(test_label_arg, caller_filename, function_info, lineno,
                        optional_code_reference, optional_source_code):
    """Add a Test Label DAG Node for a estimator.score call"""
    # pylint: disable=too-many-arguments
    operator_context = OperatorContext(OperatorType.TEST_LABELS, function_info)
    input_info_test_labels = get_input_info(test_label_arg, caller_filename,
                                            lineno, function_info,
                                            optional_code_reference,
                                            optional_source_code)
    test_label_op_id = _pipeline_executor.singleton.get_next_op_id()
    test_labels_dag_node = DagNode(
        test_label_op_id, BasicCodeLocation(caller_filename,
                                            lineno), operator_context,
        DagNodeDetails(None, get_column_names(test_label_arg)),
        get_optional_code_info_or_none(optional_code_reference,
                                       optional_source_code))
    input_infos = SklearnBackend.before_call(
        operator_context, [input_info_test_labels.annotated_dfobject])
    label_backend_result = SklearnBackend.after_call(operator_context,
                                                     input_infos,
                                                     test_label_arg)
    add_dag_node(test_labels_dag_node, [input_info_test_labels.dag_node],
                 label_backend_result)
    test_labels_result = label_backend_result.annotated_dfobject.result_data
    return label_backend_result, test_labels_dag_node, test_labels_result
def add_train_data_node(estimator, train_data_arg, function_info):
    """Add a Train Label DAG Node for a estimator.fit call"""
    input_info_train_data = get_input_info(
        train_data_arg, estimator.mlinspect_caller_filename,
        estimator.mlinspect_lineno, function_info,
        estimator.mlinspect_optional_code_reference,
        estimator.mlinspect_optional_source_code)
    train_data_op_id = _pipeline_executor.singleton.get_next_op_id()
    operator_context = OperatorContext(OperatorType.TRAIN_DATA, function_info)
    train_data_dag_node = DagNode(
        train_data_op_id,
        BasicCodeLocation(estimator.mlinspect_caller_filename,
                          estimator.mlinspect_lineno), operator_context,
        DagNodeDetails(None, ["array"]),
        get_optional_code_info_or_none(
            estimator.mlinspect_optional_code_reference,
            estimator.mlinspect_optional_source_code))
    input_infos = SklearnBackend.before_call(
        operator_context, [input_info_train_data.annotated_dfobject])
    data_backend_result = SklearnBackend.after_call(operator_context,
                                                    input_infos,
                                                    train_data_arg)
    add_dag_node(train_data_dag_node, [input_info_train_data.dag_node],
                 data_backend_result)
    train_data_result = data_backend_result.annotated_dfobject.result_data
    return data_backend_result, train_data_dag_node, train_data_result
Exemplo n.º 5
0
        def execute_inspections(op_id, caller_filename, lineno,
                                optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('numpy.random', 'random')
            operator_context = OperatorContext(OperatorType.DATA_SOURCE,
                                               function_info)
            input_infos = SklearnBackend.before_call(operator_context, [])
            result = original(*args, **kwargs)
            backend_result = SklearnBackend.after_call(operator_context,
                                                       input_infos, result)

            dag_node = DagNode(
                op_id, BasicCodeLocation(caller_filename, lineno),
                operator_context, DagNodeDetails("random", ['array']),
                get_optional_code_info_or_none(optional_code_reference,
                                               optional_source_code))
            add_dag_node(dag_node, [], backend_result)
            new_return_value = backend_result.annotated_dfobject.result_data
            return new_return_value
    def patched_fit(self, *args, **kwargs):
        """ Patch for ('statsmodel.api.OLS', 'fit') """
        # pylint: disable=no-method-argument, too-many-locals
        original = gorilla.get_original_attribute(api.OLS, 'fit')
        function_info = FunctionInfo('statsmodel.api.OLS', 'fit')

        # Train data
        # pylint: disable=no-member
        data_backend_result, train_data_node, train_data_result = add_train_data_node(
            self, self.data.exog, function_info)
        self.data.exog = train_data_result
        # pylint: disable=no-member
        label_backend_result, train_labels_node, train_labels_result = add_train_label_node(
            self, self.data.endog, function_info)
        self.data.endog = train_labels_result

        # Estimator
        operator_context = OperatorContext(OperatorType.ESTIMATOR,
                                           function_info)
        input_dfs = [
            data_backend_result.annotated_dfobject,
            label_backend_result.annotated_dfobject
        ]
        input_infos = SklearnBackend.before_call(operator_context, input_dfs)
        result = original(self, *args, **kwargs)
        estimator_backend_result = SklearnBackend.after_call(
            operator_context, input_infos, None)

        dag_node = DagNode(
            singleton.get_next_op_id(),
            BasicCodeLocation(self.mlinspect_caller_filename,
                              self.mlinspect_lineno), operator_context,
            DagNodeDetails("Decision Tree", []),
            get_optional_code_info_or_none(
                self.mlinspect_optional_code_reference,
                self.mlinspect_optional_source_code))
        add_dag_node(dag_node, [train_data_node, train_labels_node],
                     estimator_backend_result)
        return result