def __init__(self, calibrationTable, score_func=accuracy_score): """ calibrate a classifier @param calibrationTable: a pandas data frame """ print '--------- Calibrating Imputer -----------' X_cal, y_cal, _, _ = impute_risk_factors(calibrationTable) bestScore = -1 bestPipe = None bestParams = None for name, (pipe, params) in make_pipes().iteritems(): print '>'*10, name, '<'*10 _, cur_bestParams, cur_bestScore = fitClfWithGridSearch( name + '_risk', pipe, params, DatasetPair(np.array(X_cal), y_cal), saveToDir='/home/jj/code/Kaggle/allstate/output/gridSearchOutput', useJJ=True, score_func=score_func, n_jobs=N_JOBS, verbosity=0, minimize=False, cvSplitNum=5, maxLearningSteps=10, numConvergenceSteps=4, convergenceTolerance=0, eliteProportion=0.1, parentsProportion=0.4, mutationProportion=0.1, mutationProbability=0.1, mutationStdDev=None, populationSize=6) if cur_bestScore > bestScore: bestScore = cur_bestScore bestPipe = clone(pipe) bestPipe.set_params(**cur_bestParams) bestParams = cur_bestParams print '----> best score:', bestScore pprint(bestParams) self._imputer = bestPipe
# plot_feature_importances(X_train, outputTable, inputTable.columns) print '----------- individual accuracy score' indivClfs = [] for col in outputTable_cal.columns: print '>'*20, col, '<'*20 cur_y = np.array(outputTable_cal[col]) bestScore = -1 bestPipe = None bestParams = None for name, (pipe, params) in make_pipes().iteritems(): print '>'*10, name, '<'*10 _, cur_bestParams, cur_bestScore = fitClfWithGridSearch( '_'.join([name, col, calibrationName]), pipe, params, DatasetPair(X_cal, cur_y), saveToDir='/home/jj/code/Kaggle/allstate/output/gridSearchOutput', useJJ=True, score_func=accuracy_score, n_jobs=N_JOBS, verbosity=0, minimize=False, cvSplitNum=5, maxLearningSteps=10, numConvergenceSteps=4, convergenceTolerance=0, eliteProportion=0.1, parentsProportion=0.4, mutationProportion=0.1, mutationProbability=0.1, mutationStdDev=None, populationSize=6) if cur_bestScore > bestScore: bestScore = cur_bestScore