示例#1
0
def model_train_cv_parallel(model, X, y, cv=None, proba=True, n_jobs=8):
    if cv is None:
        cv = skl.cross_validation.KFold(len(y),
                                        n_folds=16,
                                        shuffle=True,
                                        random_state=1234)

    return pd.concat(
        Parallel(n_jobs=n_jobs, verbose=True)(
            delayed(model_train_cv)(fold, model, X, y, proba) for fold in cv))
示例#2
0
文件: model.py 项目: tyz910/kaggle
    def tune_gamma(self):
        scores = self.score_gamma()
        best_score = scores.iloc[0]
        self.best_gamma = best_score['gamma']

        gamma_range = self.detail_range(self.best_gamma, 0.05, min=0)
        scores2 = self.score_gamma(gamma_range)
        best_score2 = scores2.iloc[0]
        self.best_gamma = best_score2['gamma']

        scores = pd.concat([scores, scores2], ignore_index=True)
        return scores.drop_duplicates().sort_values('score', ascending=self.metric_ascending)
示例#3
0
    def tune_gamma(self):
        scores = self.score_gamma()
        best_score = scores.iloc[0]
        self.best_gamma = best_score['gamma']

        gamma_range = self.detail_range(self.best_gamma, 0.05, min=0)
        scores2 = self.score_gamma(gamma_range)
        best_score2 = scores2.iloc[0]
        self.best_gamma = best_score2['gamma']

        scores = pd.concat([scores, scores2], ignore_index=True)
        return scores.drop_duplicates().sort_values(
            'score', ascending=self.metric_ascending)
示例#4
0
文件: model.py 项目: tyz910/kaggle
    def tune_sample(self):
        scores = self.score_samples()
        best_score = scores.iloc[0]
        self.best_subsample = best_score['subsample']
        self.best_colsample_bytree = best_score['colsample_bytree']

        subsample_range = self.detail_range(self.best_subsample, 0.05, max=1)
        colsample_bytree_range = self.detail_range(self.best_colsample_bytree, 0.05, max=1)
        scores2 = self.score_samples(subsample_range, colsample_bytree_range)
        best_score2 = scores2.iloc[0]
        self.best_subsample = best_score2['subsample']
        self.best_colsample_bytree = best_score2['colsample_bytree']

        scores = pd.concat([scores, scores2], ignore_index=True)
        return scores.drop_duplicates().sort_values('score', ascending=self.metric_ascending)
示例#5
0
文件: model.py 项目: tyz910/kaggle
    def tune_depth(self):
        scores = self.score_depth()
        best_score = scores.iloc[0]
        self.best_max_depth = int(best_score['max_depth'])
        self.best_min_child_weight = int(best_score['min_child_weight'])

        max_depth_range = self.detail_range(self.best_max_depth, 1, min=1)
        min_child_weight_range = self.detail_range(self.best_min_child_weight, 1, min=1)
        scores2 = self.score_depth(max_depth_range, min_child_weight_range)
        best_score2 = scores2.iloc[0]
        self.best_max_depth = int(best_score2['max_depth'])
        self.best_min_child_weight = int(best_score2['min_child_weight'])

        scores = pd.concat([scores, scores2], ignore_index=True)
        return scores.drop_duplicates().sort_values('score', ascending=self.metric_ascending)
示例#6
0
    def tune_sample(self):
        scores = self.score_samples()
        best_score = scores.iloc[0]
        self.best_subsample = best_score['subsample']
        self.best_colsample_bytree = best_score['colsample_bytree']

        subsample_range = self.detail_range(self.best_subsample, 0.05, max=1)
        colsample_bytree_range = self.detail_range(self.best_colsample_bytree,
                                                   0.05,
                                                   max=1)
        scores2 = self.score_samples(subsample_range, colsample_bytree_range)
        best_score2 = scores2.iloc[0]
        self.best_subsample = best_score2['subsample']
        self.best_colsample_bytree = best_score2['colsample_bytree']

        scores = pd.concat([scores, scores2], ignore_index=True)
        return scores.drop_duplicates().sort_values(
            'score', ascending=self.metric_ascending)
示例#7
0
    def tune_depth(self):
        scores = self.score_depth()
        best_score = scores.iloc[0]
        self.best_max_depth = int(best_score['max_depth'])
        self.best_min_child_weight = int(best_score['min_child_weight'])

        max_depth_range = self.detail_range(self.best_max_depth, 1, min=1)
        min_child_weight_range = self.detail_range(self.best_min_child_weight,
                                                   1,
                                                   min=1)
        scores2 = self.score_depth(max_depth_range, min_child_weight_range)
        best_score2 = scores2.iloc[0]
        self.best_max_depth = int(best_score2['max_depth'])
        self.best_min_child_weight = int(best_score2['min_child_weight'])

        scores = pd.concat([scores, scores2], ignore_index=True)
        return scores.drop_duplicates().sort_values(
            'score', ascending=self.metric_ascending)
示例#8
0
文件: model.py 项目: tyz910/kaggle
def model_train_cv_parallel(model, X, y, cv=None, proba=True, n_jobs=8):
    if cv is None:
        cv = skl.cross_validation.KFold(len(y), n_folds=16, shuffle=True, random_state=1234)

    return pd.concat(Parallel(n_jobs=n_jobs, verbose=True)(delayed(model_train_cv)(fold, model, X, y, proba) for fold in cv))
示例#9
0
def synergy_calc_cv_parallel(heroes, y, cv):
    return pd.concat(Parallel(n_jobs=8, verbose=True)(delayed(synergy_calc_cv)(fold, heroes, y) for fold in cv))