Пример #1
0
    def _retrieve_NaiveBayes_Model(self, modelname, verbose=False):
        """
        Retrieve information about the model to print the results. The Naive 
        Bayes IDAX function stores its result in 2 tables:
            * <MODELNAME>_MODEL
            * <MODELNAME>_DISCRANGES

        Parameters
        ----------
        modelname : str
            The name of the model that is retrieved.

        verbose : bol, default: False
            Verbosity mode.

        Notes
        -----
        Needs better formatting instead of printing the tables.
        """
        modelname = ibmdbpy.utils.check_tablename(modelname)

        if self._idadb is None:
            raise IdaNaiveBayesError(
                "The Naive Bayes model was not trained before.")

        model_main = self._idadb.ida_query('SELECT * FROM "' +
                                           self._idadb.current_schema + '"."' +
                                           modelname + '_MODEL"')
        model_main.columns = [
            'ATTRIBUTE', 'VAL', 'CLASS', 'CLASSVALCOUNT', 'ATTRCLASSCOUNT',
            'CLASSCOUNT', 'TOTALCOUNT'
        ]
        model_main.columns = [x.upper() for x in model_main.columns]

        disc = self._idadb.ida_query('SELECT * FROM "' +
                                     self._idadb.current_schema + '"."' +
                                     modelname + '_DISCRANGES"')
        disc.columns = ['COLNAME', 'BREAK']
        disc.columns = [x.upper() for x in disc.columns]

        if verbose is True:
            print("MODEL")
            print(model_main)
            print("DISCRANGES")
            print(disc)

        return
Пример #2
0
    def predict(self,
                idadf,
                column_id=None,
                outtable=None,
                outtableProb=None,
                mestimation=False):
        """
        Use the Naive Bayes predict stored procedure to apply a Naive Bayes model
        to generate classification predictions for a data set.

        Parameters
        ----------
        idadf : IdaDataFrame
             IdaDataFrame to be used as input.

        column_id : str, optional
            The column of the input table that identifies a unique instance ID.
            By default, the same id column that is specified in the stored
            procedure to build the model.

        outtable : str, optional
            The name of the output table where the predictions are stored.
            It should contain only alphanumerical characters and underscores.
            All lower case characters will be converted to upper case characters.
            If this parameter is not specified, it is generated automatically. If
            the parameter corresponds to an existing table in the database, it
            will be replaced.

        outtableProb : str, optional
            The name of the output table where the probabilities for each of the classes are stored.
            It should contain only alphanumerical characters and underscores.
            All lower case characters will be converted to upper case characters.
            If this parameter is not specified, the table is not created.
            If the parameter corresponds to an existing table in the database, it
            will be replaced.

        mestimation : flag, default: False
            A flag that indicates the use of m-estimation for probabilities.
            This kind of estimation might be slower than other ones, but it
            might produce better results for small or unbalanced data sets.

        Returns
        -------
        IdaDataFrame
            IdaDataFrame containing the classification decision for each
            datapoints referenced by their ID.
        """
        if not isinstance(idadf, ibmdbpy.IdaDataFrame):
            raise TypeError("Argument should be an IdaDataFrame")

        idadf._idadb._check_procedure("PREDICT_NAIVEBAYES",
                                      "Prediction for Naive Bayes")

        # Check the ID
        if column_id is None:
            column_id = self._column_id
        if column_id not in idadf.columns:
            raise ValueError(
                "No id columns is available in IdaDataFrame:" + column_id +
                ". Either create a new ID column using add_column_id function"
                + " or give the name of a column that can be used as ID")

        if self._idadb is None:
            raise IdaNaiveBayesError(
                "The Naive Bayes model was not trained before.")

        # Check or create an outtable name, drop it if it already exists.
        if outtable is None:
            outtable = idadf._idadb._get_valid_tablename('PREDICT_NAIVEBAYES_')
        else:
            outtable = ibmdbpy.utils.check_tablename(outtable)
            if idadf._idadb.exists_table(outtable):
                idadf._idadb.drop_table(outtable)

        if outtableProb is not None:
            outtableProb = ibmdbpy.utils.check_tablename(outtableProb)
            if idadf._idadb.exists_table(outtableProb):
                idadf._idadb.drop_table(outtableProb)

        self.outtable = outtable
        self.outtableProb = outtableProb
        self.mestimation = mestimation

        # Create a temporay view
        idadf.internal_state._create_view()
        tmp_view_name = idadf.internal_state.current_state

        #if "." in tmp_view_name:
        #tmp_view_name = tmp_view_name.split('.')[-1]

        try:
            idadf._idadb._call_stored_procedure("IDAX.PREDICT_NAIVEBAYES ",
                                                model=self.modelname,
                                                intable=tmp_view_name,
                                                id=column_id,
                                                outtable=self.outtable,
                                                outtableProb=self.outtableProb,
                                                mestimation=self.mestimation)
        except:
            raise
        finally:
            idadf.internal_state._delete_view()
            idadf._idadb._autocommit()

        self.labels_ = ibmdbpy.IdaDataFrame(idadf._idadb, self.outtable)
        return self.labels_