Пример #1
0
def test_randomized_logistic_sparse():
    # Check randomized sparse logistic regression on sparse data
    iris = load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    X = X[y != 2]
    y = y[y != 2]

    # center here because sparse matrices are usually not centered
    # labels should not be centered
    X, _, _, _, _ = _preprocess_data(X, y, True, True)

    X_sp = sparse.csr_matrix(X)

    F, _ = f_classif(X, y)

    scaling = 0.3
    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
                                       scaling=scaling, n_resampling=50,
                                       tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
                                       scaling=scaling, n_resampling=50,
                                       tol=1e-3)
    feature_scores_sp = clf.fit(X_sp, y).scores_
    assert_array_equal(feature_scores, feature_scores_sp)
Пример #2
0
def test_randomized_logistic():
    # Check randomized sparse logistic regression
    iris = load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    X = X[y != 2]
    y = y[y != 2]

    F, _ = f_classif(X, y)

    scaling = 0.3
    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
                                       scaling=scaling, n_resampling=50,
                                       tol=1e-3)
    X_orig = X.copy()
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(X, X_orig)   # fit does not modify X
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))

    clf = RandomizedLogisticRegression(verbose=False, C=[1., 0.5],
                                       random_state=42, scaling=scaling,
                                       n_resampling=50, tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))

    clf = RandomizedLogisticRegression(verbose=False, C=[[1., 0.5]])
    assert_raises(ValueError, clf.fit, X, y)
Пример #3
0
def test_randomized_logistic():
    # Check randomized sparse logistic regression
    iris = load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    X = X[y != 2]
    y = y[y != 2]

    F, _ = f_classif(X, y)

    scaling = 0.3
    clf = RandomizedLogisticRegression(verbose=False,
                                       C=1.,
                                       random_state=42,
                                       scaling=scaling,
                                       n_resampling=50,
                                       tol=1e-3)
    X_orig = X.copy()
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(X, X_orig)  # fit does not modify X
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))

    clf = RandomizedLogisticRegression(verbose=False,
                                       C=[1., 0.5],
                                       random_state=42,
                                       scaling=scaling,
                                       n_resampling=50,
                                       tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))

    clf = RandomizedLogisticRegression(verbose=False, C=[[1., 0.5]])
    assert_raises(ValueError, clf.fit, X, y)
Пример #4
0
def test_randomized_logistic():
    """Check randomized sparse logistic regression"""
    iris = load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    X = X[y != 2]
    y = y[y != 2]

    F, _ = f_classif(X, y)

    scaling = 0.3
    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
                                scaling=scaling, n_resampling=50, tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    assert_equal(np.argsort(F), np.argsort(feature_scores))

    clf = RandomizedLogisticRegression(verbose=False, C=[1., 0.5],
            random_state=42, scaling=scaling, n_resampling=50, tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    assert_equal(np.argsort(F), np.argsort(feature_scores))
Пример #5
0
def test_randomized_logistic_sparse():
    # Check randomized sparse logistic regression on sparse data
    iris = load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    X = X[y != 2]
    y = y[y != 2]

    # center here because sparse matrices are usually not centered
    # labels should not be centered
    X, _, _, _, _ = _preprocess_data(X, y, True, True)

    X_sp = sparse.csr_matrix(X)

    F, _ = f_classif(X, y)

    scaling = 0.3
    clf = RandomizedLogisticRegression(verbose=False,
                                       C=1.,
                                       random_state=42,
                                       scaling=scaling,
                                       n_resampling=50,
                                       tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    clf = RandomizedLogisticRegression(verbose=False,
                                       C=1.,
                                       random_state=42,
                                       scaling=scaling,
                                       n_resampling=50,
                                       tol=1e-3)
    feature_scores_sp = clf.fit(X_sp, y).scores_
    assert_array_equal(feature_scores, feature_scores_sp)
Пример #6
0
def test_randomized_logistic():
    """Check randomized sparse logistic regression"""
    iris = load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    X = X[y != 2]
    y = y[y != 2]

    F, _ = f_classif(X, y)

    scaling = 0.3
    clf = RandomizedLogisticRegression(verbose=False,
                                       C=1.,
                                       random_state=42,
                                       scaling=scaling,
                                       n_resampling=50,
                                       tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))

    clf = RandomizedLogisticRegression(verbose=False,
                                       C=[1., 0.5],
                                       random_state=42,
                                       scaling=scaling,
                                       n_resampling=50,
                                       tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))
def get_lasso_feature_scores(x, y, mode=CLASSIFICATION, scaling=0.5, 
                             sample_fraction=0.75, n_resampling=200,
                             random_state=None):
    '''
    Calculate features scores using a randomized lasso (regression) or 
    randomized logistic regression (classification). This is also known as 
    stability selection.
    
    see http://scikit-learn.org/stable/modules/feature_selection.html for 
    details. 
    
    Parameters
    ----------   
    x : structured array
    y : 1D nd.array
    mode : {CLASSIFICATION, REGRESSION}
    scaling : float, optional
              scaling parameter, should be between 0 and 1
    sample_fraction : float, optional
                      the fraction of samples to used in each randomized 
                      dataset
    n_resmpling : int, optional
                  the number of times the model is trained on a random subset 
                  of the data
    random_state : int, optional
                   if it is an int, it specifies the seed to use, defaults to 
                   None.
                         
    Returns
    -------
    pandas DataFrame
        sorted in descending order of tuples with uncertainty and feature 
        scores         
         
    '''
    
    uncs = recfunctions.get_names(x.dtype)
    
    x = _prepare_experiments(x)
    
    if mode==CLASSIFICATION:

        lfs = RandomizedLogisticRegression(scaling=scaling, 
                                           sample_fraction=sample_fraction,
                                           n_resampling=n_resampling, 
                                           random_state=random_state)
        lfs.fit(x,y)
    elif  mode==REGRESSION:
        # we use LassoLarsCV to determine alpha see
        # http://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_recovery.html
        lars_cv = LassoLarsCV(cv=6).fit(x, y,)
        alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6)
        
        # fit the randomized lasso        
        lfs = RandomizedLasso(alpha=alphas,scaling=scaling, 
                              sample_fraction=sample_fraction,
                              n_resampling=n_resampling,
                              random_state=random_state)
        lfs.fit(x, y)
    else:
        raise ValueError('{} invalid value for mode'.format(mode))

    importances = lfs.scores_
    importances = zip(uncs, importances)
    importances = list(importances)
    importances.sort(key=itemgetter(1), reverse=True)
    importances = pd.DataFrame(importances)


    return importances
Пример #8
0
def get_lasso_feature_scores(x, y, mode=CLASSIFICATION, scaling=0.5, 
                             sample_fraction=0.75, n_resampling=200,
                             random_state=None):
    '''
    Calculate features scores using a randomized lasso (regression) or 
    randomized logistic regression (classification). This is also known as 
    stability selection.
    
    see http://scikit-learn.org/stable/modules/feature_selection.html for 
    details. 
    
    Parameters
    ----------   
    x : structured array
    y : 1D nd.array
    mode : {CLASSIFICATION, REGRESSION}
    scaling : float, optional
              scaling parameter, should be between 0 and 1
    sample_fraction : float, optional
                      the fraction of samples to used in each randomized 
                      dataset
    n_resmpling : int, optional
                  the number of times the model is trained on a random subset 
                  of the data
    random_state : int, optional
                   if it is an int, it specifies the seed to use, defaults to 
                   None.
                         
    Returns
    -------
    pandas DataFrame
        sorted in descending order of tuples with uncertainty and feature 
        scores         
         
    '''
    
    uncs = recfunctions.get_names(x.dtype)
    
    x = _prepare_experiments(x)
    
    if mode==CLASSIFICATION:

        lfs = RandomizedLogisticRegression(scaling=scaling, 
                                           sample_fraction=sample_fraction,
                                           n_resampling=n_resampling, 
                                           random_state=random_state)
        lfs.fit(x,y)
    elif  mode==REGRESSION:
        # we use LassoLarsCV to determine alpha see
        # http://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_recovery.html
        lars_cv = LassoLarsCV(cv=6).fit(x, y,)
        alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6)
        
        # fit the randomized lasso        
        lfs = RandomizedLasso(alpha=alphas,scaling=scaling, 
                              sample_fraction=sample_fraction,
                              n_resampling=n_resampling,
                              random_state=random_state)
        lfs.fit(x, y)
    else:
        raise ValueError('{} invalid value for mode'.format(mode))

    importances = lfs.scores_
    importances = zip(uncs, importances)
    importances = list(importances)
    importances.sort(key=itemgetter(1), reverse=True)
    importances = pd.DataFrame(importances)


    return importances
Пример #9
0
			'PLSCanonical':PLSCanonical(),
			'PLSRegression':PLSRegression(),
			'PLSSVD':PLSSVD(),
			'PassiveAggressiveClassifier':PassiveAggressiveClassifier(),
			'PassiveAggressiveRegressor':PassiveAggressiveRegressor(),
			'Perceptron':Perceptron(),
			'ProjectedGradientNMF':ProjectedGradientNMF(),
			'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(),
			'RANSACRegressor':RANSACRegressor(),
			'RBFSampler':RBFSampler(),
			'RadiusNeighborsClassifier':RadiusNeighborsClassifier(),
			'RadiusNeighborsRegressor':RadiusNeighborsRegressor(),
			'RandomForestClassifier':RandomForestClassifier(),
			'RandomForestRegressor':RandomForestRegressor(),
			'RandomizedLasso':RandomizedLasso(),
			'RandomizedLogisticRegression':RandomizedLogisticRegression(),
			'RandomizedPCA':RandomizedPCA(),
			'Ridge':Ridge(),
			'RidgeCV':RidgeCV(),
			'RidgeClassifier':RidgeClassifier(),
			'RidgeClassifierCV':RidgeClassifierCV(),
			'RobustScaler':RobustScaler(),
			'SGDClassifier':SGDClassifier(),
			'SGDRegressor':SGDRegressor(),
			'SVC':SVC(),
			'SVR':SVR(),
			'SelectFdr':SelectFdr(),
			'SelectFpr':SelectFpr(),
			'SelectFwe':SelectFwe(),
			'SelectKBest':SelectKBest(),
			'SelectPercentile':SelectPercentile(),
def get_lasso_feature_scores(results,
                             classify,
                             scaling=0.5,
                             sample_fraction=0.75,
                             n_resampling=200,
                             random_state=None):
    '''
    Calculate features scores using a randomized lasso (regression) or 
    randomized logistic regression (classification). This is also known as 
    stability selection.
    
    see http://scikit-learn.org/stable/modules/feature_selection.html for 
    details. 
    
    Parameters
    ----------   
    results : tuple
    classify : callable or str
               a classify function or variable analogous to PRIM
    scaling : float, optional
              scaling parameter, should be between 0 and 1
    sample_fraction : float, optional
                      the fraction of samples to used in each randomized 
                      dataset
    n_resmpling : int, optional
                  the number of times the model is trained on a random subset 
                  of the data
    random_state : int, optional
                   if it is an int, it specifies the seed to use, defaults to 
                   None.
                         
    Returns
    -------
    list of tuples 
        sorted in descending order of tuples with uncertainty and feature 
        scores         
         
    '''

    experiments, outcomes = results
    uncs = recfunctions.get_names(experiments.dtype)

    x = _prepare_experiments(experiments)
    y, categorical = _prepare_outcomes(outcomes, classify)

    if categorical:

        lfs = RandomizedLogisticRegression(scaling=scaling,
                                           sample_fraction=sample_fraction,
                                           n_resampling=n_resampling,
                                           random_state=random_state)
        lfs.fit(x, y)
    else:
        # we use LassoLarsCV to determine alpha see
        # http://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_recovery.html
        lars_cv = LassoLarsCV(cv=6).fit(
            x,
            y,
        )
        alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6)

        # fit the randomized lasso
        lfs = RandomizedLasso(alpha=alphas,
                              scaling=scaling,
                              sample_fraction=sample_fraction,
                              n_resampling=n_resampling,
                              random_state=random_state)
        lfs.fit(x, y)

    importances = lfs.scores_
    importances = zip(uncs, importances)
    importances = list(importances)
    importances.sort(key=itemgetter(1), reverse=True)

    return importances