示例#1
0
 def _check_agreements(self, model):
     self.check_agreement = get_columns_in_df(self._check_agreement, self.flist.predictors)
     agreement_prob = model.predict_proba(self.check_agreement.values)[:,1]
     ks = compute_ks(
         agreement_prob[self._check_agreement['signal'].values == 0],
         agreement_prob[self._check_agreement['signal'].values == 1],
         self._check_agreement[self._check_agreement['signal'] == 0]['weight'].values,
         self._check_agreement[self._check_agreement['signal'] == 1]['weight'].values)
     return ks 
示例#2
0
 def _check_correlations(self, model):
     self.check_correlation = get_columns_in_df(self._check_correlation, self.flist.predictors)
     correlation_probs = model.predict_proba(self.check_correlation.values)[:,1]
     cvm = compute_cvm(correlation_probs, self._check_correlation['mass'])
     return cvm 
示例#3
0
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from blue.featurelist import FeatureList
from blue.pandas_utils import get_columns_in_df
from blue.estimators import HyperoptEstimator

from evaluation import roc_auc_truncated

train_file = './data/training.csv'
test_file = './data/test.csv'
flist = FeatureList(train_file, spec='features.yml', derived_list=None)

df_train = pd.read_csv(train_file, index_col='id')
df_train = get_columns_in_df(df_train, flist.universe)

df_test = pd.read_csv(test_file)
df_test = get_columns_in_df(df_test, flist.predictors)

hpest = HyperoptEstimator(RandomForestClassifier, max_evals=5, n_jobs=3, metric=lambda x,y : - roc_auc_truncated(x,y))
hpest.fit(df_train[flist.predictors].values, df_train[flist.target].values)