Python stratified_split示例，otto_utils.utils.stratified_split Python示例

示例#1

0

显示文件

文件： xgboost.py 项目： shreyas12joshi/ML_GRAD_PROJECT

    clf = XGBoost(max_iterations=4800, max_depth=12, min_child_weight=4.9208250938262745,
                  row_subsample=.9134478530382129, min_loss_reduction=.5132278416508804,
                  column_subsample=.730128689911957, step_size=.009)

    if MODE == 'cv':
        scores, predictions = utils.make_blender_cv(clf, train, labels, calibrate=False)
        print ('CV:', scores, 'Mean log loss:', np.mean(scores))
        utils.write_blender_data(consts.BLEND_PATH, MODEL_NAME + '.csv', predictions)
    elif MODE == 'submission':
        clf.fit(train, labels)
        predictions = clf.predict_proba(test)
        utils.save_submission(consts.DATA_SAMPLE_SUBMISSION_PATH,
                              os.path.join(consts.ENSEMBLE_PATH, MODEL_NAME + '.csv'),
                              predictions)
    elif MODE == 'holdout':
        train, labels, _, _ = utils.stratified_split(train, labels, test_size=.7)
        score = utils.hold_out_evaluation(clf, train, labels, calibrate=False)
        print ('Log loss:', score)
    elif MODE == 'tune':
        # Objective function
        def objective(args):
            max_depth, min_child_weight, row_subsample, min_loss_reduction, column_subsample = args
            clf = XGBoost(max_depth=max_depth, min_child_weight=min_child_weight,
                          row_subsample=row_subsample, min_loss_reduction=min_loss_reduction,
                          column_subsample=column_subsample, verbose=False)
            score = utils.hold_out_evaluation(clf, train, labels, calibrate=False)
            print ('max_depth, min_child_weight, row_subsample, min_loss_reduction, column_subsample, logloss')
            print (args, score)
            return score
        # Searching space
        space = (

示例#2

0

显示文件

                                                 train,
                                                 labels,
                                                 calibrate=False)
     print('CV:', scores, 'Mean log loss:', np.mean(scores))
     utils.write_blender_data(consts.BLEND_PATH, MODEL_NAME + '.csv',
                              predictions)
 elif MODE == 'submission':
     clf.fit(train, labels)
     predictions = clf.predict_proba(test)
     utils.save_submission(
         consts.DATA_SAMPLE_SUBMISSION_PATH,
         os.path.join(consts.ENSEMBLE_PATH, MODEL_NAME + '.csv'),
         predictions)
 elif MODE == 'holdout':
     train, labels, _, _ = utils.stratified_split(train,
                                                  labels,
                                                  test_size=.7)
     score = utils.hold_out_evaluation(clf, train, labels, calibrate=False)
     print('Log loss:', score)
 elif MODE == 'tune':
     # Objective function
     def objective(args):
         max_depth, min_child_weight, row_subsample, min_loss_reduction, column_subsample = args
         clf = XGBoost(max_depth=max_depth,
                       min_child_weight=min_child_weight,
                       row_subsample=row_subsample,
                       min_loss_reduction=min_loss_reduction,
                       column_subsample=column_subsample,
                       verbose=False)
         score = utils.hold_out_evaluation(clf,
                                           train,

示例#3

0

显示文件

文件： svm.py 项目： wuzhongdehua/kaggle_otto

                                                   cv=utils.get_cv(labels))
    fitted_classifier = calibrated_classifier.fit(train, labels)
    predictions = fitted_classifier.predict_proba(test)
    utils.save_submission(
        consts.DATA_SAMPLE_SUBMISSION_PATH,
        os.path.join(consts.ENSEMBLE_PATH, MODEL_NAME + '.csv'), predictions)
elif MODE == 'holdout':
    score = utils.hold_out_evaluation(clf,
                                      train,
                                      labels,
                                      calibrate=False,
                                      test_size=0.9)
    print 'Log loss:', score
elif MODE == 'tune':
    train, labels, valid, valid_labels = utils.stratified_split(train,
                                                                labels,
                                                                test_size=.8)
    from sklearn.metrics import log_loss

    # Objective function
    def objective(args):
        c, gamma = args
        clf = OneVsRestClassifier(
            svm.SVC(C=c,
                    kernel='rbf',
                    tol=.001,
                    gamma=gamma,
                    probability=True,
                    random_state=23))
        score1 = 0
        score2 = utils.hold_out_evaluation(clf, train, labels, calibrate=False)

示例#4

0

显示文件

文件： svm.py 项目： ShrikanthRamanathan/kaggle_otto

if MODE == 'cv':
    scores, predictions = utils.make_blender_cv(clf, train, labels, calibrate=True)
    print 'CV:', scores, 'Mean log loss:', np.mean(scores)
    utils.write_blender_data(consts.BLEND_PATH, MODEL_NAME + '.csv', predictions)
elif MODE == 'submission':
    calibrated_classifier = CalibratedClassifierCV(clf, method='isotonic', cv=utils.get_cv(labels))
    fitted_classifier = calibrated_classifier.fit(train, labels)
    predictions = fitted_classifier.predict_proba(test)
    utils.save_submission(consts.DATA_SAMPLE_SUBMISSION_PATH,
                          os.path.join(consts.ENSEMBLE_PATH, MODEL_NAME + '.csv'),
                          predictions)
elif MODE == 'holdout':
    score = utils.hold_out_evaluation(clf, train, labels, calibrate=False, test_size=0.9)
    print 'Log loss:', score
elif MODE == 'tune':
    train, labels, valid, valid_labels = utils.stratified_split(train, labels, test_size=.8)
    from sklearn.metrics import log_loss
    # Objective function
    def objective(args):
        c, gamma = args
        clf = OneVsRestClassifier(svm.SVC(C=c, kernel='rbf', tol=.001, gamma=gamma,
                                  probability=True, random_state=23))
        score1 = 0
        score2 = utils.hold_out_evaluation(clf, train, labels, calibrate=False)
        score = log_loss(valid_labels, clf.predict_proba(valid))
        print 'C=%f, gamma=%f, score1=%f, score2=%f, score=%f' % (c, gamma, score1, score2, score)
        return score
    # Searching space
    space = (
        hp.uniform('c', 4, 10),
        hp.uniform('gamma', 0.3, 3)