Пример #1
0
def get_regressor(x,
                  y,
                  n_estimators=1500,
                  pCut=0.05,
                  n_tries=5,
                  verbose=False):
    if verbose:
        sys.stderr.write('getting regressor\n')
    clfs = []
    oob_scores = []

    for i in range(n_tries):
        if verbose:
            sys.stderr.write('%d.' % i)

        clf = ExtraTreesRegressor(n_estimators=n_estimators,
                                  oob_score=True,
                                  bootstrap=True,
                                  max_features='sqrt',
                                  n_jobs=1,
                                  random_state=i).fit(x, y)
        clfs.append(clf)
        oob_scores.append(clf.oob_score_)
    clf = clfs[np.argmax(oob_scores)]
    clf.feature_importances = pd.Series(clf.feature_importances_,
                                        index=x.columns)

    return clf, oob_scores
Пример #2
0
def get_regressor(x, y, n_estimators=1500, n_tries=5, verbose=False):
    """Calculate an ExtraTreesRegressor on predictor and target variables

    Parameters
    ----------
    x : numpy.array
        Predictor vector
    y : numpy.array
        Target vector
    n_estimators : int, optional
        Number of estimators to use
    n_tries : int, optional
        Number of attempts to calculate regression
    verbose : bool, optional
        If True, output progress statements

    Returns
    -------
    classifier : sklearn.ensemble.ExtraTreesRegressor
        The classifier with the highest out of bag scores of all the
        attempted "tries"
    oob_scores : numpy.array
        Out of bag scores of the classifier
    """
    if verbose:
        sys.stderr.write('Getting regressor\n')
    clfs = []
    oob_scores = []

    for i in range(n_tries):
        if verbose:
            sys.stderr.write('%d.' % i)

        clf = ExtraTreesRegressor(n_estimators=n_estimators,
                                  oob_score=True,
                                  bootstrap=True,
                                  max_features='sqrt',
                                  n_jobs=1,
                                  random_state=i).fit(x, y)
        clfs.append(clf)
        oob_scores.append(clf.oob_score_)
    clf = clfs[np.argmax(oob_scores)]
    clf.feature_importances = pd.Series(clf.feature_importances_,
                                        index=x.columns)

    return clf, oob_scores
Пример #3
0
def get_regressor(x, y, n_estimators=1500, n_tries=5,
                  verbose=False):
    """Calculate an ExtraTreesRegressor on predictor and target variables

    Parameters
    ----------
    x : numpy.array
        Predictor vector
    y : numpy.array
        Target vector
    n_estimators : int, optional
        Number of estimators to use
    n_tries : int, optional
        Number of attempts to calculate regression
    verbose : bool, optional
        If True, output progress statements

    Returns
    -------
    classifier : sklearn.ensemble.ExtraTreesRegressor
        The classifier with the highest out of bag scores of all the
        attempted "tries"
    oob_scores : numpy.array
        Out of bag scores of the classifier
    """
    if verbose:
        sys.stderr.write('Getting regressor\n')
    clfs = []
    oob_scores = []

    for i in range(n_tries):
        if verbose:
            sys.stderr.write('%d.' % i)

        clf = ExtraTreesRegressor(n_estimators=n_estimators, oob_score=True,
                                  bootstrap=True, max_features='sqrt',
                                  n_jobs=1, random_state=i).fit(x, y)
        clfs.append(clf)
        oob_scores.append(clf.oob_score_)
    clf = clfs[np.argmax(oob_scores)]
    clf.feature_importances = pd.Series(clf.feature_importances_,
                                        index=x.columns)

    return clf, oob_scores
Пример #4
0
def get_regressor(x, y, n_estimators=1500, pCut=0.05, n_tries=5,
                  verbose=False):
    if verbose:
        sys.stderr.write('getting regressor\n')
    clfs = []
    oob_scores = []

    for i in range(n_tries):
        if verbose:
            sys.stderr.write('%d.' % i)

        clf = ExtraTreesRegressor(n_estimators=n_estimators, oob_score=True,
                                  bootstrap=True, max_features='sqrt',
                                  n_jobs=1, random_state=i).fit(x, y)
        clfs.append(clf)
        oob_scores.append(clf.oob_score_)
    clf = clfs[np.argmax(oob_scores)]
    clf.feature_importances = pd.Series(clf.feature_importances_,
                                        index=x.columns)

    return clf, oob_scores