def _retrieve_NaiveBayes_Model(self, modelname, verbose=False): """ Retrieve information about the model to print the results. The Naive Bayes IDAX function stores its result in 2 tables: * <MODELNAME>_MODEL * <MODELNAME>_DISCRANGES Parameters ---------- modelname : str The name of the model that is retrieved. verbose : bol, default: False Verbosity mode. Notes ----- Needs better formatting instead of printing the tables. """ modelname = ibmdbpy.utils.check_tablename(modelname) if self._idadb is None: raise IdaNaiveBayesError( "The Naive Bayes model was not trained before.") model_main = self._idadb.ida_query('SELECT * FROM "' + self._idadb.current_schema + '"."' + modelname + '_MODEL"') model_main.columns = [ 'ATTRIBUTE', 'VAL', 'CLASS', 'CLASSVALCOUNT', 'ATTRCLASSCOUNT', 'CLASSCOUNT', 'TOTALCOUNT' ] model_main.columns = [x.upper() for x in model_main.columns] disc = self._idadb.ida_query('SELECT * FROM "' + self._idadb.current_schema + '"."' + modelname + '_DISCRANGES"') disc.columns = ['COLNAME', 'BREAK'] disc.columns = [x.upper() for x in disc.columns] if verbose is True: print("MODEL") print(model_main) print("DISCRANGES") print(disc) return
def predict(self, idadf, column_id=None, outtable=None, outtableProb=None, mestimation=False): """ Use the Naive Bayes predict stored procedure to apply a Naive Bayes model to generate classification predictions for a data set. Parameters ---------- idadf : IdaDataFrame IdaDataFrame to be used as input. column_id : str, optional The column of the input table that identifies a unique instance ID. By default, the same id column that is specified in the stored procedure to build the model. outtable : str, optional The name of the output table where the predictions are stored. It should contain only alphanumerical characters and underscores. All lower case characters will be converted to upper case characters. If this parameter is not specified, it is generated automatically. If the parameter corresponds to an existing table in the database, it will be replaced. outtableProb : str, optional The name of the output table where the probabilities for each of the classes are stored. It should contain only alphanumerical characters and underscores. All lower case characters will be converted to upper case characters. If this parameter is not specified, the table is not created. If the parameter corresponds to an existing table in the database, it will be replaced. mestimation : flag, default: False A flag that indicates the use of m-estimation for probabilities. This kind of estimation might be slower than other ones, but it might produce better results for small or unbalanced data sets. Returns ------- IdaDataFrame IdaDataFrame containing the classification decision for each datapoints referenced by their ID. """ if not isinstance(idadf, ibmdbpy.IdaDataFrame): raise TypeError("Argument should be an IdaDataFrame") idadf._idadb._check_procedure("PREDICT_NAIVEBAYES", "Prediction for Naive Bayes") # Check the ID if column_id is None: column_id = self._column_id if column_id not in idadf.columns: raise ValueError( "No id columns is available in IdaDataFrame:" + column_id + ". Either create a new ID column using add_column_id function" + " or give the name of a column that can be used as ID") if self._idadb is None: raise IdaNaiveBayesError( "The Naive Bayes model was not trained before.") # Check or create an outtable name, drop it if it already exists. if outtable is None: outtable = idadf._idadb._get_valid_tablename('PREDICT_NAIVEBAYES_') else: outtable = ibmdbpy.utils.check_tablename(outtable) if idadf._idadb.exists_table(outtable): idadf._idadb.drop_table(outtable) if outtableProb is not None: outtableProb = ibmdbpy.utils.check_tablename(outtableProb) if idadf._idadb.exists_table(outtableProb): idadf._idadb.drop_table(outtableProb) self.outtable = outtable self.outtableProb = outtableProb self.mestimation = mestimation # Create a temporay view idadf.internal_state._create_view() tmp_view_name = idadf.internal_state.current_state #if "." in tmp_view_name: #tmp_view_name = tmp_view_name.split('.')[-1] try: idadf._idadb._call_stored_procedure("IDAX.PREDICT_NAIVEBAYES ", model=self.modelname, intable=tmp_view_name, id=column_id, outtable=self.outtable, outtableProb=self.outtableProb, mestimation=self.mestimation) except: raise finally: idadf.internal_state._delete_view() idadf._idadb._autocommit() self.labels_ = ibmdbpy.IdaDataFrame(idadf._idadb, self.outtable) return self.labels_