def get_validation_results(self): val_results = self.model.evals_result() train = gu.create_data_frame(val_results['validation_0']) train = train.add_prefix('train_') test = gu.create_data_frame(val_results['validation_1']) test = test.add_prefix('test_') train_test = pd.concat([train, test], axis=1) train_test.index.name = 'epoch' train_test.reset_index(level=0, inplace=True) train_test['model'] = self.atomic_metrics['model'] train_test['ts'] = self.atomic_metrics['ts'] return train_test
def get_roc_values(self): fpr, tpr, _ = metrics.roc_curve(self.y_actual, self.y_predicted_prob_one) roc_df = gu.create_data_frame({'fpr': fpr, 'tpr': tpr}) roc_df.index.name = 'index' roc_df.reset_index(level=0, inplace=True) roc_df['model'] = self.atomic_metrics['model'] roc_df['ts'] = self.atomic_metrics['ts'] self.plots['roc'] = roc_df
def confusion_matrix(self): conf_df = gu.create_data_frame({ 'actual': self.y_actual, 'predicted': self.y_predicted }) conf_df = conf_df.groupby(['actual', 'predicted'], as_index=False).size() conf_df.rename(columns={'size': 'count'}) conf_df['model'] = self.atomic_metrics['model'] conf_df['ts'] = self.atomic_metrics['ts'] return conf_df
def get_pr_values(self): precision, recall, _ = metrics.precision_recall_curve( self.y_actual, self.y_predicted_prob_one) pr_df = gu.create_data_frame({ 'precision': precision, 'recall': recall }) pr_df.index.name = 'index' pr_df.reset_index(level=0, inplace=True) pr_df['model'] = self.atomic_metrics['model'] pr_df['ts'] = self.atomic_metrics['ts'] self.plots['pr'] = pr_df
def get_prob_values(self): cols = ['predicted', 'actual', 'prob_class_1'] data = np.column_stack( [self.y_predicted, self.y_actual, self.y_predicted_prob_one]) prob_df = gu.create_data_frame(data=data, columns=cols) prob_df['classification'] = np.where( prob_df['predicted'] == prob_df['actual'], 'Positives', 'Negatives') prob_df.index.name = 'index' prob_df.reset_index(level=0, inplace=True) prob_df['model'] = self.atomic_metrics['model'] prob_df['ts'] = self.atomic_metrics['ts'] self.plots['prob'] = prob_df
def get_features_corr_matrix(self, matrix): x_pd= gu.create_data_frame(matrix) corr_mtx= x_pd.corr() ###################################################### #should be removed when we have the feature names corr_mtx= corr_mtx.add_prefix('f') ###################################################### corr_mtx= corr_mtx.melt(ignore_index= False) corr_mtx= corr_mtx.rename(columns={"variable": "feature_1"}) corr_mtx.index.name ='feature' corr_mtx.reset_index(level=0, inplace=True) corr_mtx['model']= self.atomic_metrics['model'] corr_mtx['ts']= self.atomic_metrics['ts'] self.feature_metrics['corr_matrix'] = corr_mtx
def test_create_data_frame(): """Test function for create_data_frame() function in generalutils """ test_data = { 'col1': [1, 2, 3], 'col2': ['abc', 'def', 'ghi'], 'col3':[1.0, 2.0, 3.0] } df = gu.create_data_frame(test_data) assert isinstance(df,pd.DataFrame) assert df['col1'].dtypes.name == 'int64' assert df['col2'].dtypes.name == 'object' assert df['col3'].dtypes.name == 'float64'
def get_importance(self): importance_types = [ 'weight', 'gain', 'cover', 'total_gain', 'total_cover' ] importance = {} for imp_type in importance_types: try: importance[imp_type] = self.booster.get_score( importance_type=imp_type) except Exception as e: print(e) importance_df = gu.create_data_frame(importance) importance_df.index.name = 'feature' importance_df.reset_index(level=0, inplace=True) importance_df['model'] = self.atomic_metrics['model'] importance_df['ts'] = self.atomic_metrics['ts'] return importance_df