Пример #1
0
class LassoLarsCVImpl():

    def __init__(self, fit_intercept=True, verbose=False, max_iter=500, normalize=True, precompute='auto', cv=3, max_n_alphas=1000, n_jobs=None, eps=2.220446049250313e-16, copy_X=True, positive=False):
        self._hyperparams = {
            'fit_intercept': fit_intercept,
            'verbose': verbose,
            'max_iter': max_iter,
            'normalize': normalize,
            'precompute': precompute,
            'cv': cv,
            'max_n_alphas': max_n_alphas,
            'n_jobs': n_jobs,
            'eps': eps,
            'copy_X': copy_X,
            'positive': positive}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Пример #2
0
 def __init__(self,
              fit_intercept=True,
              verbose=False,
              max_iter=500,
              normalize=True,
              precompute='auto',
              cv=3,
              max_n_alphas=1000,
              n_jobs=None,
              eps=2.220446049250313e-16,
              copy_X=True,
              positive=False):
     self._hyperparams = {
         'fit_intercept': fit_intercept,
         'verbose': verbose,
         'max_iter': max_iter,
         'normalize': normalize,
         'precompute': precompute,
         'cv': cv,
         'max_n_alphas': max_n_alphas,
         'n_jobs': n_jobs,
         'eps': eps,
         'copy_X': copy_X,
         'positive': positive
     }
     self._wrapped_model = Op(**self._hyperparams)
Пример #3
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Пример #4
0
    def connectWidgets(self):
        # LassoLARS
        ll = LassoLars()
        self.alpha_text.setText(str(ll.alpha))
        self.fit_interceptCheckBox.setChecked(ll.fit_intercept)
        self.verboseCheckBox.setChecked(ll.verbose)
        self.normalizeCheckBox.setChecked(ll.normalize)
        self.setComboBox(self.precomputeComboBox,
                         ['True', 'False', 'auto', 'array-like'])
        self.defaultComboItem(self.precomputeComboBox, ll.precompute)
        self.max_iterSpinBox.setValue(ll.max_iter)
        self.copy_XCheckBox.setChecked(ll.copy_X)
        self.fit_pathCheckBox.setChecked(ll.fit_path)
        self.positiveCheckBox.setChecked(ll.positive)

        # LassoLarsCV
        llcv = LassoLarsCV()
        self.max_n_alphasSpinBox.setValue(llcv.max_n_alphas)
        self.n_jobsSpinBox.setValue(llcv.n_jobs)

        # LassoLarsIC
        llic = LassoLarsIC()
        self.cvSpinBox.setValue(3)
        self.setComboBox(self.criterionComboBox, ['aic', 'bic'])
        self.defaultComboItem(self.criterionComboBox, llic.criterion)
Пример #5
0
    def function(self):
        model = self.modelComboBox.currentIndex()
        if model == 0:
            params = {
                'alpha': self.alpha_text.text(),
                'fit_intercept': self.fit_interceptCheckBox.isChecked(),
                'verbose': self.fit_interceptCheckBox.isChecked(),
                'normalize': self.normalizeCheckBox.isChecked(),
                'precompute': self.precomputeComboBox.currentText(),
                'max_iter': self.max_iterSpinBox.value(),
                'copy_X': self.copy_XCheckBox.isChecked(),
                'fit_path': self.fit_pathCheckBox.isChecked(),
                'positive': self.positiveCheckBox.isChecked(),
                'model': model
            }
            params_check = dict(params)
            params_check.pop('model')
            return params, self.getChangedValues(params_check, LassoLars())

        elif model == 1:
            params = {
                'fit_intercept': self.fit_interceptCheckBox.isChecked(),
                'verbose': self.fit_interceptCheckBox.isChecked(),
                'max_iter': self.max_iterSpinBox.value(),
                'normalize': self.normalizeCheckBox.isChecked(),
                'precompute': self.precomputeComboBox.currentText(),
                'cv': self.cvSpinBox.value(),
                'max_n_alphas': self.max_n_alphasSpinBox.value(),
                'n_jobs': self.n_jobsSpinBox.value(),
                'copy_X': self.copy_XCheckBox.isChecked(),
                'positive': self.positiveCheckBox.isChecked(),
                'model': model
            }
            params_check = dict(params)
            params_check.pop('model')
            return params, self.getChangedValues(params_check, LassoLarsCV())

        elif model == 2:
            params = {
                'criterion': self.criterionComboBox.currentText(),
                'fit_intercept': self.fit_interceptCheckBox.isChecked(),
                'verbose': self.fit_interceptCheckBox.isChecked(),
                'normalize': self.normalizeCheckBox.isChecked(),
                'precompute': self.precomputeComboBox.currentText(),
                'max_iter': self.max_iterSpinBox.value(),
                'copy_X': self.copy_XCheckBox.isChecked(),
                'positive': self.positiveCheckBox.isChecked(),
                'model': model
            }
            params_check = dict(params)
            params_check.pop('model')
            return params, self.getChangedValues(params_check, LassoLarsIC())
        else:
            params = {}
            print("Error")
Пример #6
0
def set_learning_method(config, X_train, y_train):
    """
    Instantiates the sklearn's class corresponding to the value set in the 
    configuration file for running the learning method.
    
    TODO: use reflection to instantiate the classes
    
    @param config: configuration object
    @return: an estimator with fit() and predict() methods
    """
    estimator = None

    learning_cfg = config.get("learning", None)
    if learning_cfg:
        p = learning_cfg.get("parameters", None)
        o = learning_cfg.get("optimize", None)
        scorers = \
        set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse']))

        method_name = learning_cfg.get("method", None)
        if method_name == "SVR":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVR(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            elif p:
                estimator = SVR(C=p.get("C", 10),
                                epsilon=p.get('epsilon', 0.01),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0034),
                                tol=p.get('tol', 1e-3),
                                verbose=False)
            else:
                estimator = SVR()

        elif method_name == "RandomForestRegressor":
            if o:
                tune_params = set_optimization_params(o)
                print tune_params
                estimator = optimize_model(RandomForestRegressor(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            elif p:
                estimator = RandomForestRegressor(
                    n_estimators=p.get("n_estimators", 100),
                    criterion=p.get("criterion", 'mse'),
                    n_jobs=p.get("n_jobs", -1),
                    random_state=p.get("random_state", 0),
                    max_features=p.get("max_features", 'auto'))

        elif method_name == "SVC":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVC(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get('cv', 5),
                                           o.get('verbose', True),
                                           o.get('n_jobs', 1))

            elif p:
                estimator = SVC(C=p.get('C', 1.0),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0),
                                coef0=p.get('coef0', 0.0),
                                tol=p.get('tol', 1e-3),
                                verbose=p.get('verbose', False))
            else:
                estimator = SVC()

        elif method_name == "LassoCV":
            if p:
                estimator = LassoCV(eps=p.get('eps', 1e-3),
                                    n_alphas=p.get('n_alphas', 100),
                                    normalize=p.get('normalize', False),
                                    precompute=p.get('precompute', 'auto'),
                                    max_iter=p.get('max_iter', 1000),
                                    tol=p.get('tol', 1e-4),
                                    cv=p.get('cv', 10),
                                    verbose=False)
            else:
                estimator = LassoCV()

        elif method_name == "LassoLars":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(LassoLars(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            if p:
                estimator = LassoLars(alpha=p.get('alpha', 1.0),
                                      fit_intercept=p.get(
                                          'fit_intercept', True),
                                      verbose=p.get('verbose', False),
                                      normalize=p.get('normalize', True),
                                      max_iter=p.get('max_iter', 500),
                                      fit_path=p.get('fit_path', True))
            else:
                estimator = LassoLars()

        elif method_name == "LassoLarsCV":
            if p:
                estimator = LassoLarsCV(max_iter=p.get('max_iter', 500),
                                        normalize=p.get('normalize', True),
                                        max_n_alphas=p.get(
                                            'max_n_alphas', 1000),
                                        n_jobs=p.get('n_jobs', 1),
                                        cv=p.get('cv', 10),
                                        verbose=False)
            else:
                estimator = LassoLarsCV()

    return estimator, scorers
Пример #7
0
def get_lasso_feature_scores(x, y, mode=CLASSIFICATION, scaling=0.5, 
                             sample_fraction=0.75, n_resampling=200,
                             random_state=None):
    '''
    Calculate features scores using a randomized lasso (regression) or 
    randomized logistic regression (classification). This is also known as 
    stability selection.
    
    see http://scikit-learn.org/stable/modules/feature_selection.html for 
    details. 
    
    Parameters
    ----------   
    x : structured array
    y : 1D nd.array
    mode : {CLASSIFICATION, REGRESSION}
    scaling : float, optional
              scaling parameter, should be between 0 and 1
    sample_fraction : float, optional
                      the fraction of samples to used in each randomized 
                      dataset
    n_resmpling : int, optional
                  the number of times the model is trained on a random subset 
                  of the data
    random_state : int, optional
                   if it is an int, it specifies the seed to use, defaults to 
                   None.
                         
    Returns
    -------
    pandas DataFrame
        sorted in descending order of tuples with uncertainty and feature 
        scores         
         
    '''
    
    uncs = recfunctions.get_names(x.dtype)
    
    x = _prepare_experiments(x)
    
    if mode==CLASSIFICATION:

        lfs = RandomizedLogisticRegression(scaling=scaling, 
                                           sample_fraction=sample_fraction,
                                           n_resampling=n_resampling, 
                                           random_state=random_state)
        lfs.fit(x,y)
    elif  mode==REGRESSION:
        # we use LassoLarsCV to determine alpha see
        # http://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_recovery.html
        lars_cv = LassoLarsCV(cv=6).fit(x, y,)
        alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6)
        
        # fit the randomized lasso        
        lfs = RandomizedLasso(alpha=alphas,scaling=scaling, 
                              sample_fraction=sample_fraction,
                              n_resampling=n_resampling,
                              random_state=random_state)
        lfs.fit(x, y)
    else:
        raise ValueError('{} invalid value for mode'.format(mode))

    importances = lfs.scores_
    importances = zip(uncs, importances)
    importances = list(importances)
    importances.sort(key=itemgetter(1), reverse=True)
    importances = pd.DataFrame(importances)


    return importances
Пример #8
0
			'KMeans':KMeans(),
			'KNeighborsClassifier':KNeighborsClassifier(),
			'KNeighborsRegressor':KNeighborsRegressor(),
			'KernelCenterer':KernelCenterer(),
			'KernelDensity':KernelDensity(),
			'KernelPCA':KernelPCA(),
			'KernelRidge':KernelRidge(),
			'LSHForest':LSHForest(),
			'LabelPropagation':LabelPropagation(),
			'LabelSpreading':LabelSpreading(),
			'Lars':Lars(),
			'LarsCV':LarsCV(),
			'Lasso':Lasso(),
			'LassoCV':LassoCV(),
			'LassoLars':LassoLars(),
			'LassoLarsCV':LassoLarsCV(),
			'LassoLarsIC':LassoLarsIC(),
			'LatentDirichletAllocation':LatentDirichletAllocation(),
			'LedoitWolf':LedoitWolf(),
			'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(),
			'LinearRegression':LinearRegression(),
			'LinearSVC':LinearSVC(),
			'LinearSVR':LinearSVR(),
			'LocallyLinearEmbedding':LocallyLinearEmbedding(),
			'LogisticRegression':LogisticRegression(),
			'LogisticRegressionCV':LogisticRegressionCV(),
			'MDS':MDS(),
			'MLPClassifier':MLPClassifier(),
			'MLPRegressor':MLPRegressor(),
			'MaxAbsScaler':MaxAbsScaler(),
			'MeanShift':MeanShift(),
Пример #9
0
# Apply Some Featuring
poly_reg = PolynomialFeatures(degree=1)

# Transform into numpy object
x_train = poly_reg.fit_transform(X_train)
X_test = poly_reg.fit_transform(X_test)
y_test = np.array(y_test.ix[:, 0])
y_train = np.array(y_train.ix[:, 0])

# Build model with good params
model = LassoLarsCV(copy_X=True,
                    cv=None,
                    eps=2.2204460492503131e-16,
                    fit_intercept=True,
                    max_iter=500,
                    max_n_alphas=1000,
                    n_jobs=1,
                    normalize=True,
                    positive=False,
                    precompute='auto',
                    verbose=False)

# Fit the model
model.fit(x_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Scoring
if regression:
    print('Score on test set:', mean_absolute_error(y_test, y_pred))
def get_lasso_feature_scores(results,
                             classify,
                             scaling=0.5,
                             sample_fraction=0.75,
                             n_resampling=200,
                             random_state=None):
    '''
    Calculate features scores using a randomized lasso (regression) or 
    randomized logistic regression (classification). This is also known as 
    stability selection.
    
    see http://scikit-learn.org/stable/modules/feature_selection.html for 
    details. 
    
    Parameters
    ----------   
    results : tuple
    classify : callable or str
               a classify function or variable analogous to PRIM
    scaling : float, optional
              scaling parameter, should be between 0 and 1
    sample_fraction : float, optional
                      the fraction of samples to used in each randomized 
                      dataset
    n_resmpling : int, optional
                  the number of times the model is trained on a random subset 
                  of the data
    random_state : int, optional
                   if it is an int, it specifies the seed to use, defaults to 
                   None.
                         
    Returns
    -------
    list of tuples 
        sorted in descending order of tuples with uncertainty and feature 
        scores         
         
    '''

    experiments, outcomes = results
    uncs = recfunctions.get_names(experiments.dtype)

    x = _prepare_experiments(experiments)
    y, categorical = _prepare_outcomes(outcomes, classify)

    if categorical:

        lfs = RandomizedLogisticRegression(scaling=scaling,
                                           sample_fraction=sample_fraction,
                                           n_resampling=n_resampling,
                                           random_state=random_state)
        lfs.fit(x, y)
    else:
        # we use LassoLarsCV to determine alpha see
        # http://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_recovery.html
        lars_cv = LassoLarsCV(cv=6).fit(
            x,
            y,
        )
        alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6)

        # fit the randomized lasso
        lfs = RandomizedLasso(alpha=alphas,
                              scaling=scaling,
                              sample_fraction=sample_fraction,
                              n_resampling=n_resampling,
                              random_state=random_state)
        lfs.fit(x, y)

    importances = lfs.scores_
    importances = zip(uncs, importances)
    importances = list(importances)
    importances.sort(key=itemgetter(1), reverse=True)

    return importances