def model_train_cv_parallel(model, X, y, cv=None, proba=True, n_jobs=8): if cv is None: cv = skl.cross_validation.KFold(len(y), n_folds=16, shuffle=True, random_state=1234) return pd.concat( Parallel(n_jobs=n_jobs, verbose=True)( delayed(model_train_cv)(fold, model, X, y, proba) for fold in cv))
def tune_gamma(self): scores = self.score_gamma() best_score = scores.iloc[0] self.best_gamma = best_score['gamma'] gamma_range = self.detail_range(self.best_gamma, 0.05, min=0) scores2 = self.score_gamma(gamma_range) best_score2 = scores2.iloc[0] self.best_gamma = best_score2['gamma'] scores = pd.concat([scores, scores2], ignore_index=True) return scores.drop_duplicates().sort_values('score', ascending=self.metric_ascending)
def tune_gamma(self): scores = self.score_gamma() best_score = scores.iloc[0] self.best_gamma = best_score['gamma'] gamma_range = self.detail_range(self.best_gamma, 0.05, min=0) scores2 = self.score_gamma(gamma_range) best_score2 = scores2.iloc[0] self.best_gamma = best_score2['gamma'] scores = pd.concat([scores, scores2], ignore_index=True) return scores.drop_duplicates().sort_values( 'score', ascending=self.metric_ascending)
def tune_sample(self): scores = self.score_samples() best_score = scores.iloc[0] self.best_subsample = best_score['subsample'] self.best_colsample_bytree = best_score['colsample_bytree'] subsample_range = self.detail_range(self.best_subsample, 0.05, max=1) colsample_bytree_range = self.detail_range(self.best_colsample_bytree, 0.05, max=1) scores2 = self.score_samples(subsample_range, colsample_bytree_range) best_score2 = scores2.iloc[0] self.best_subsample = best_score2['subsample'] self.best_colsample_bytree = best_score2['colsample_bytree'] scores = pd.concat([scores, scores2], ignore_index=True) return scores.drop_duplicates().sort_values('score', ascending=self.metric_ascending)
def tune_depth(self): scores = self.score_depth() best_score = scores.iloc[0] self.best_max_depth = int(best_score['max_depth']) self.best_min_child_weight = int(best_score['min_child_weight']) max_depth_range = self.detail_range(self.best_max_depth, 1, min=1) min_child_weight_range = self.detail_range(self.best_min_child_weight, 1, min=1) scores2 = self.score_depth(max_depth_range, min_child_weight_range) best_score2 = scores2.iloc[0] self.best_max_depth = int(best_score2['max_depth']) self.best_min_child_weight = int(best_score2['min_child_weight']) scores = pd.concat([scores, scores2], ignore_index=True) return scores.drop_duplicates().sort_values('score', ascending=self.metric_ascending)
def tune_sample(self): scores = self.score_samples() best_score = scores.iloc[0] self.best_subsample = best_score['subsample'] self.best_colsample_bytree = best_score['colsample_bytree'] subsample_range = self.detail_range(self.best_subsample, 0.05, max=1) colsample_bytree_range = self.detail_range(self.best_colsample_bytree, 0.05, max=1) scores2 = self.score_samples(subsample_range, colsample_bytree_range) best_score2 = scores2.iloc[0] self.best_subsample = best_score2['subsample'] self.best_colsample_bytree = best_score2['colsample_bytree'] scores = pd.concat([scores, scores2], ignore_index=True) return scores.drop_duplicates().sort_values( 'score', ascending=self.metric_ascending)
def tune_depth(self): scores = self.score_depth() best_score = scores.iloc[0] self.best_max_depth = int(best_score['max_depth']) self.best_min_child_weight = int(best_score['min_child_weight']) max_depth_range = self.detail_range(self.best_max_depth, 1, min=1) min_child_weight_range = self.detail_range(self.best_min_child_weight, 1, min=1) scores2 = self.score_depth(max_depth_range, min_child_weight_range) best_score2 = scores2.iloc[0] self.best_max_depth = int(best_score2['max_depth']) self.best_min_child_weight = int(best_score2['min_child_weight']) scores = pd.concat([scores, scores2], ignore_index=True) return scores.drop_duplicates().sort_values( 'score', ascending=self.metric_ascending)
def model_train_cv_parallel(model, X, y, cv=None, proba=True, n_jobs=8): if cv is None: cv = skl.cross_validation.KFold(len(y), n_folds=16, shuffle=True, random_state=1234) return pd.concat(Parallel(n_jobs=n_jobs, verbose=True)(delayed(model_train_cv)(fold, model, X, y, proba) for fold in cv))
def synergy_calc_cv_parallel(heroes, y, cv): return pd.concat(Parallel(n_jobs=8, verbose=True)(delayed(synergy_calc_cv)(fold, heroes, y) for fold in cv))