示例#1
0
def test_multitask_enet_and_lasso_cv():
    X, y, _, _ = build_dataset(n_features=50, n_targets=3)
    clf = MultiTaskElasticNetCV(cv=3).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.00556, 3)
    clf = MultiTaskLassoCV(cv=3).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.00278, 3)

    X, y, _, _ = build_dataset(n_targets=3)
    clf = MultiTaskElasticNetCV(n_alphas=10,
                                eps=1e-3,
                                max_iter=100,
                                l1_ratio=[0.3, 0.5],
                                tol=1e-3,
                                cv=3)
    clf.fit(X, y)
    assert 0.5 == clf.l1_ratio_
    assert (3, X.shape[1]) == clf.coef_.shape
    assert (3, ) == clf.intercept_.shape
    assert (2, 10, 3) == clf.mse_path_.shape
    assert (2, 10) == clf.alphas_.shape

    X, y, _, _ = build_dataset(n_targets=3)
    clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3, cv=3)
    clf.fit(X, y)
    assert (3, X.shape[1]) == clf.coef_.shape
    assert (3, ) == clf.intercept_.shape
    assert (10, 3) == clf.mse_path_.shape
    assert 10 == len(clf.alphas_)
示例#2
0
class MultiLasso_model(Lasso_model):
    def __init__(self, train_path, test_path, pred_path):
        super().__init__(train_path, test_path, pred_path)
        self.multiLasso_model = MultiTaskLassoCV(
            alphas=[float(i) * 0.05 for i in range(1, 100)],
            cv=8,
            max_iter=1000000)

    def train(self, X_train, Y_train):
        self.multiLasso_model.fit(X_train, Y_train)

    def pred(self, X_test):
        return self.multiLasso_model.predict(X_test)

    def run(self):
        X_train_PMNF, X_test_PMNF, y_trains, y_tests = super().get_train_test()
        self.train(X_train_PMNF, np.asarray(y_trains).T)
        y_preds = self.pred(X_test_PMNF).T

        print(y_preds.shape, np.asarray(y_tests).shape)

        with open(self.pred_path, "w", newline='') as f:
            csv_writer = csv.writer(f)
            for i in range(len(y_trains)):
                for row in self.data_train_split[i]:
                    csv_writer.writerow(row)

                group = self.data_test_split[i][self.split_train_len:, :]
                for j in range(len(group)):
                    row = np.append(group[j, :], y_preds[i][j])
                    csv_writer.writerow(row)
示例#3
0
    def _informativeness(self, z_p, z):
        if isinstance(self.regressor, LassoCV):
            regressor = MultiTaskLassoCV(cv=self.regressor.cv,
                                         max_iter=2000,
                                         selection='random')

        regressor.fit(z_p, z)
        return self.regressor.score(z_p)
示例#4
0
def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])
示例#5
0
class MultiTaskLassoCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
示例#6
0
def select_features(x, y, methods=('variance', 'correlation', 'l1', 'forest')):
    ''' methods = ('variance', 'correlation', 'l1', 'forest')
        - variance: use variance threshold to discard features that are mostly 0 or 1
        - correlation: use chi2 test to remove most very correlated features
        - l1: use l1 penalty to remove features that make solution sparse
        - forest: use ExtraTreesClassifier to point out importance of features
                    select important ones
    '''
    features = x.loc[:,'Feature_1':'Feature_2']

    if 'variance' in methods:
        vt = VT(threshold=(0.99*(1-0.99)))
        vt.fit(features)
        

    if 'correlation' in methods:
        cr = SP(f_regression, percentile=80)

    if 'l1' in methods:
        rgr = MultiTaskLassoCV(cv=5, n_jobs=-1)
        m = SFM(rgr)
        

    if 'forest' in methods:
        clf = RandomRorestRegressor(n_estimators=300, max_features=0.7,n_jobs=-1).fit(x,y)
        m = SFM(clf)
        m.fit(x.values, y.values)

    for indices in idx_list:
        x_indices = x_indices & indices
    print 'All: %s' % len(x_indices)

    return list(x_indices)
示例#7
0
def _MTLassoCV_MatchSpace(X,
                          Y,
                          v_pens=None,
                          n_v_cv=5,
                          sample_frac=1,
                          Y_col_block_size=None,
                          se_factor=None,
                          normalize=True,
                          **kwargs):  # pylint: disable=missing-param-doc, unused-argument
    # A fake MT would do Lasso on y_mean = Y.mean(axis=1)
    if sample_frac < 1:
        N = X.shape[0]
        sample = np.random.choice(N, int(sample_frac * N), replace=False)
        X = X[sample, :]
        Y = Y[sample, :]
    if Y_col_block_size is not None:
        Y = _block_summ_cols(Y, Y_col_block_size)
    varselectorfit = MultiTaskLassoCV(normalize=normalize,
                                      cv=n_v_cv,
                                      alphas=v_pens).fit(X, Y)
    best_v_pen = varselectorfit.alpha_
    if se_factor is not None:
        best_v_pen = _neg_se_rule(varselectorfit, factor=se_factor)
        varselectorfit = MultiTaskLasso(alpha=best_v_pen,
                                        normalize=normalize).fit(X, Y)
    V = np.sqrt(np.sum(np.square(varselectorfit.coef_),
                       axis=0))  # n_tasks x n_features -> n_feature
    m_sel = V != 0
    transformer = SelMatchSpace(m_sel)
    return transformer, V[m_sel], best_v_pen, (V, varselectorfit)
示例#8
0
    def fit(self, df_X, df_y):
        logger.info("Fitting MultiTaskLasso")

        if not df_y.shape[0] == df_X.shape[0]:
            raise ValueError("number of regions is not equal")

        if self.scale:
            logger.debug("Scaling motif scores")
            # Scale motif scores
            df_X.loc[:, :] = scale(df_X, axis=0)

        # logger.debug("Scaling y")

        # Normalize across samples and features
        # y = df_y.apply(scale, 1).apply(scale, 0)
        y = df_y

        X = df_X.loc[y.index]

        model = Pipeline([
            ("scale", StandardScaler()),
            (
                "reg",
                MultiTaskLassoCV(fit_intercept=False,
                                 n_alphas=20,
                                 n_jobs=self.ncpus),
            ),
        ])
        logger.debug("Fitting model")
        model.fit(df_X, df_y)
        logger.info("Done")

        self.act_ = pd.DataFrame(model.steps[1][1].coef_,
                                 index=y.columns,
                                 columns=X.columns).T
示例#9
0
def _MTLassoMixed_MatchSpace(X, Y, fit_model_wrapper, v_pens=None, n_v_cv = 5, **kwargs): #pylint: disable=missing-param-doc, unused-argument
    #Note that MultiTaskLasso(CV).path with the same alpha doesn't produce same results as MultiTaskLasso(CV)
    mtlasso_cv_fit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas = v_pens).fit(X, Y)
    #V_cv = np.sqrt(np.sum(np.square(mtlasso_cv_fit.coef_), axis=0)) #n_tasks x n_features -> n_feature
    #v_pen_cv = mtlasso_cv_fit.alpha_
    #m_sel_cv = (V_cv!=0)
    #sc_fit_cv = fit_model_wrapper(SelMatchSpace(m_sel_cv), V_cv[m_sel_cv])

    v_pens = mtlasso_cv_fit.alphas_
    #fits_single = {}
    Vs_single = {}
    scores = np.zeros((len(v_pens)))
    #R2s = np.zeros((len(v_pens)))
    for i, v_pen in enumerate(v_pens):
        mtlasso_i_fit = MultiTaskLasso(alpha=v_pen, normalize=True).fit(X, Y)
        V_i = np.sqrt(np.sum(np.square(mtlasso_i_fit.coef_), axis=0))
        m_sel_i = (V_i!=0)
        sc_fit_i = fit_model_wrapper(SelMatchSpace(m_sel_i), V_i[m_sel_i])
        #fits_single[i] = sc_fit_i
        Vs_single[i] = V_i
        scores[i] = sc_fit_i.score
        #R2s[i] = sc_fit_i.score_R2

    i_best = np.argmin(scores)
    #v_pen_best = v_pens[i_best]
    #i_cv = np.where(v_pens==v_pen_cv)[0][0]
    #print("CV alpha: " + str(v_pen_cv) + " (" + str(R2s[i_cv]) + ")." + " Best alpha: " + str(v_pen_best) + " (" + str(R2s[i_best]) + ") .")
    best_v_pen = v_pens[i_best]
    V_best = Vs_single[i_best]
    m_sel_best = (V_best!=0)
    return SelMatchSpace(m_sel_best), V_best[m_sel_best], best_v_pen, V_best
示例#10
0
    def initialize(self, experiences=[]):
        scaler = StandardScaler()

        # 価値関数の定義
        if self.estimate_func == "Linear":
            estimator = LinearRegression()
        elif self.estimate_func == "NN":
            estimator = MLPRegressor(hidden_layer_sizes=(10, 10), max_iter=1)
        elif self.estimate_func == "Ridge":
            estimator = Ridge(alpha=self.alpha)
        elif self.estimate_func == "Ridge(withoutIntercept)":
            estimator = Ridge(alpha=self.alpha, fit_intercept=False)
        elif self.estimate_func == "Lasso":
            estimator = Lasso(alpha=self.alpha)
        elif self.estimate_func == "RidgeCV":
            estimator = RidgeCV(alphas=10**np.arange(-6, 1, 0.1), cv=5)
        elif self.estimate_func == "LassoCV":
            estimator = MultiTaskLassoCV(alphas=10**np.arange(-6, 1, 0.1),
                                         cv=5)
        self.model = Pipeline([("scaler", scaler), ("estimator", estimator)])

        states = np.vstack([e.s for e in experiences])
        # self.my_logger.write(states)
        self.model.named_steps["scaler"].fit(states)

        # Avoid the predict before fit.
        self.update([experiences[0]], gamma=0)
        self.initialized = True
示例#11
0
def test_uniform_targets():
    enet = ElasticNetCV(n_alphas=3)
    m_enet = MultiTaskElasticNetCV(n_alphas=3)
    lasso = LassoCV(n_alphas=3)
    m_lasso = MultiTaskLassoCV(n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution] * 3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution] * 3)
示例#12
0
def Lasso(train, test):  #Selecting features using Lasso (L1) regularisation
    print(len(train.Events[0]))
    sfm = SelectFromModel(MultiTaskLassoCV())
    sfm.fit(train.Events[:270010], test.Events)
    trainE = sfm.transform(train.Events[:270010])
    testE = sfm.transform(test.Events)
    print(len(testE[0]))
示例#13
0
def test_multi_task_lasso_cv_dtype():
    n_samples, n_features = 10, 3
    rng = np.random.RandomState(42)
    X = rng.binomial(1, .5, size=(n_samples, n_features))
    X = X.astype(int)  # make it explicit that X is int
    y = X[:, [0, 0]].copy()
    est = MultiTaskLassoCV(n_alphas=5, fit_intercept=True).fit(X, y)
    assert_array_almost_equal(est.coef_, [[1, 0, 0]] * 2, decimal=3)
示例#14
0
def _MTLassoCV_MatchSpace(X, Y, v_pens=None, n_v_cv = 5, **kwargs): #pylint: disable=missing-param-doc, unused-argument
    #A fake MT would do Lasso on y_mean = Y.mean(axis=1)
    varselectorfit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas = v_pens).fit(X, Y)
    V = np.sqrt(np.sum(np.square(varselectorfit.coef_), axis=0)) #n_tasks x n_features -> n_feature
    best_v_pen = varselectorfit.alpha_
    m_sel = (V!=0)
    transformer = SelMatchSpace(m_sel)
    return transformer, V[m_sel], best_v_pen, V
示例#15
0
    def select_mtlasso(self, X, y):
        mtlasso_alphas = MultiTaskLassoCV(alphas=[
            0.00001, .0001, .001, .002, .003, .004, .005, .006, .007, .008,
            .009, .099, .01, .011, .012, .013, .014, .015, .016, .017, .018,
            .019, .02, .025, .03, .035, .036, .037, .038, .039, .04, .041,
            .042, .043, .044, .045, .05, .06, .075, .1, .2, .225, .23, .24,
            .245, .246, .247, .248, .249, .25, .251, .252, .253, .254, .255,
            .26, .27, .275, .3, .35, .4, .45, .46, .47, .48, .481, .482, .483,
            .484, .485, .486, .487, .488, .489, .49, .491, .492, .493, .494,
            .495, .496, .497, .498, .499, .5, .51, .511, .512, .513, .514,
            .515, .516, .517, .518, .519, .52, .525, .53, .54, .55, .6, .75,
            .752, .7527, .7528, .7529, .753, .7531, .754, .7545, .755, .756,
            .76, .765, .77, .78, .79, .8, .9, 1.0, 1.2, 1.25, 1.5, 1.75, 2.0
        ])

        sel_alpha = mtlasso_alphas.fit(X, y)
        sel_alpha.alpha_
        print(sel_alpha.alpha_)
示例#16
0
 def test_model_multi_task_lasso_cv(self):
     model, X = fit_regression_model(MultiTaskLassoCV(), n_targets=2)
     model_onnx = convert_sklearn(
         model, "mutli-task lasso cv",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         verbose=False,
                         basename="SklearnMultiTaskLassoCV-Dec4")
def compare_to_lasso_analysis(adata, ccdtranscript):
    '''Perform a comparison of pseudotime analysis to LASSO analysis for finding CCD proteins'''
    prevPlotSize = plt.rcParams['figure.figsize']
    plt.rcParams['figure.figsize'] = (6, 5)

    print("ANALYZING SC-RNA-SEQ WITH LASSO")
    warnings.filterwarnings("ignore")
    fucci_rna_data = [(adata.obs["Red585"][ii], adata.obs["Green530"][ii])
                      for ii in np.arange(len(adata.obs))]
    imputer = KNNImputer(missing_values=0)
    expression = imputer.fit_transform(adata.X)
    fucci_rna_path = "output/pickles/fucci_rna_imputed_lasso.pkl"
    if os.path.exists(fucci_rna_path):
        fucci_rna = np.load(open(fucci_rna_path, 'rb'), allow_pickle=True)
    else:
        fucci_rna = MultiTaskLassoCV()
        fucci_rna.fit(expression, fucci_rna_data)
        pickle.dump(fucci_rna, open(fucci_rna_path, 'wb'))
    nz_coef = np.sum(fucci_rna.coef_, axis=0) != 0
    print(f"{sum(nz_coef)}: number of nonzero lasso coefficients")
    print(f"{adata.var_names[nz_coef]}: genes with nonzero lasso coeff")
    print(
        f"{sum(ccdtranscript[nz_coef]) / sum(nz_coef)}: % nonzero lasso found as CCD transcripts"
    )
    print(
        f"{np.sum(fucci_rna.coef_, axis=0)[nz_coef]}: coefficients for nonzero lasso coeff"
    )

    # Generate UMAP for CCD and nonCCD for the LASSO model
    adataCCd = adata[:, nz_coef]
    sc.pp.neighbors(adataCCd, n_neighbors=10, n_pcs=40)
    sc.tl.umap(adataCCd)
    sc.pl.umap(adataCCd, color="fucci_time", show=False, save=True)
    shutil.move("figures/umap.pdf", f"figures/umapRNALassoCCD.pdf")
    adataNonCCd = adata[:, ~nz_coef]
    sc.pp.neighbors(adataNonCCd, n_neighbors=10, n_pcs=40)
    sc.tl.umap(adataNonCCd)
    sc.pl.umap(adataNonCCd, color="fucci_time", show=False, save=True)
    shutil.move("figures/umap.pdf", f"figures/umapRNALassoNonCCD.pdf")
    plt.rcParams['figure.figsize'] = prevPlotSize
    warnings.filterwarnings("default")
示例#18
0
def fit_lasso(X, flavors):
    # derive the flavory profiles by fitting the LASSO
    flavors[flavors == 0] = 0.01  # logit(0) and logit(1) are not finite
    flavors[flavors == 1] = 0.99
    y = logit(flavors)
    idx = np.all(np.isfinite(y), axis=1)

    print 'Performing multi-task LASSO...'
    lasso = MultiTaskLassoCV(cv=7, n_jobs=7, fit_intercept=False, verbose=1).fit(X[idx], y[idx])
    weights = inv_logit(lasso.coef_.T)  # transform to 0 to 1 scale

    return weights
示例#19
0
 def fit(self, X, Y):
     assert shape(X)[0] == shape(Y)[0]
     assert ndim(Y) <= 2
     self.needs_unravel = False
     if ndim(Y) == 2 and shape(Y)[1] > 1:
         self.model = MultiTaskLassoCV(*self.args, **self.kwargs)
     else:
         if ndim(Y) == 2 and shape(Y)[1] == 1:
             Y = np.ravel(Y)
             self.needs_unravel = True
         self.model = LassoCV(*self.args, **self.kwargs)
     self.model.fit(X, Y)
     return self
示例#20
0
def pls_screen_as726x(x, y, n_comps=8):
    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
    pls = PLSRegression(n_components=n_comps)
    lasso = MultiTaskLassoCV(max_iter=40000)

    regr = make_pipeline(PolynomialFeatures(), pls)
    # regr = make_pipeline(PolynomialFeatures(), lasso)
    plot_learning_curve(regr, "Learning Curve", x, y, ax=ax2)

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.33, random_state=42)
    print('1')
    plt.show()
 def fit_force_params(self, alpha=None):
     """
     fit sparse linear regression on remaining n_variables-q variables
     alpha is penalization parameter, None triggers cross validation
     """
     if alpha is None:  # do cross validation
         self.force_model = \
             MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                              fit_intercept=False, normalize=False)
     else:
         self.force_model = \
             MultiTaskLasso(alpha=alpha, fit_intercept=False,
                            normalize=False)
     self.force_model.fit(self.features_forcing[self.mask_f], self.eps)
 def fit_lin_model(self, alpha=None):
     """
     fit sparse linear regression on first q variables
     alpha is penalization parameter, None triggers cross validation
     """
     if alpha is None:  # do cross validation
         self.lin_model = \
             MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                              fit_intercept=False, normalize=False,
                              max_iter=3500)
     else:
         self.lin_model = \
             MultiTaskLasso(alpha=alpha, fit_intercept=False,
                            normalize=False)
     self.lin_model.fit(self.features_lin_model[self.mask_l_m],
                        self.delta_v[self.mask_l_m])
示例#23
0
def _D_LassoCV_MatchSpace(X,
                          Y,
                          X_full,
                          D_full,
                          v_pens=None,
                          n_v_cv=5,
                          sample_frac=1,
                          y_V_share=0.5,
                          **kwargs):  # pylint: disable=missing-param-doc, unused-argument
    if sample_frac < 1:
        N_y = X.shape[0]
        sample_y = np.random.choice(N_y, int(sample_frac * N_y), replace=False)
        X = X[sample_y, :]
        Y = Y[sample_y, :]
        N_d = D_full.shape[0]
        sample_d = np.random.choice(N_d, int(sample_frac * N_d), replace=False)
        X_full = X_full[sample_d, :]
        D_full = D_full[sample_d]
    y_varselectorfit = MultiTaskLassoCV(normalize=True,
                                        cv=n_v_cv,
                                        alphas=v_pens).fit(X, Y)
    y_V = np.sqrt(np.sum(np.square(y_varselectorfit.coef_),
                         axis=0))  # n_tasks x n_features -> n_feature
    best_y_v_pen = y_varselectorfit.alpha_

    d_varselectorfit = LassoCV(normalize=True, cv=n_v_cv,
                               alphas=v_pens).fit(X_full, D_full)
    d_V = np.abs(d_varselectorfit.coef_)
    best_d_v_pen = d_varselectorfit.alpha_

    m_sel = (y_V + d_V) != 0
    transformer = SelMatchSpace(m_sel)
    if y_V.sum() == 0:
        V = d_V
    elif d_V.sum() == 0:
        V = y_V
    else:
        V = y_V_share * y_V / (y_V.sum()) + (1 - y_V_share) * d_V / (2 *
                                                                     d_V.sum())
    return transformer, V[m_sel], (best_y_v_pen, best_d_v_pen), V
示例#24
0
 def _compare_with_lasso_cv(self,
                            lasso_X,
                            lasso_y,
                            wlasso_X,
                            wlasso_y,
                            sample_weight,
                            alphas,
                            lasso_cv=3,
                            wlasso_cv=3,
                            params={},
                            tol=1e-8):
     # Check if multitask
     if np.ndim(lasso_y) > 1:
         lassoCV = MultiTaskLassoCV(alphas=alphas, cv=lasso_cv)
         wlassoCV = WeightedMultiTaskLassoCV(alphas=alphas, cv=wlasso_cv)
     else:
         lassoCV = LassoCV(alphas=alphas, cv=lasso_cv)
         wlassoCV = WeightedLassoCV(alphas=alphas, cv=wlasso_cv)
     lassoCV.set_params(**params)
     lassoCV.fit(lasso_X, lasso_y)
     wlassoCV.set_params(**params)
     wlassoCV.fit(wlasso_X, wlasso_y, sample_weight)
     # Check that same alpha is chosen
     self.assertEqual(lassoCV.alpha_, wlassoCV.alpha_)
     # Check that the coefficients are similar
     if np.ndim(lasso_y) > 1:
         for i in range(lasso_y.shape[1]):
             np.testing.assert_allclose(lassoCV.coef_[i],
                                        wlassoCV.coef_[i],
                                        atol=tol)
             if lassoCV.get_params()["fit_intercept"]:
                 self.assertAlmostEqual(lassoCV.intercept_[i],
                                        wlassoCV.intercept_[i])
     else:
         np.testing.assert_allclose(lassoCV.coef_, wlassoCV.coef_, atol=tol)
         self.assertAlmostEqual(lassoCV.intercept_, wlassoCV.intercept_)
示例#25
0
 def lassoCV(self, name):    
     '''
     Lasso Regression
     '''
     sciLasso = MultiTaskLassoCV( 
         fit_intercept=True,
         normalize=False,
         cv=12,
         tol = 0.001 )
     sciLasso.fit(self.X_train, self.Y_train)
     predict_test = sciLasso.predict(self.X_test)
     MSE = mean_squared_error(predict_test,self.Y_test)
     s = "Sci LassoCV            (MSE: %f)" % (MSE)
     print s
     # print  sciLasso.score(self.X_test, self.Y_test)
     print sciLasso.coef_
     print np.nonzero(sciLasso.coef_)
     predict_final = sciLasso.predict(self.X_final)
     genCSV( name + '_MSE' + str(MSE), self.index_final, predict_final )
X_train_scaled = scaler.transform(X_train_raw)
X_test_scaled = scaler.transform(X_test_raw)

## PCA and Feature Selection
pca = PCA(n_components=800)
selection = SelectKBest(k=850)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
combined_features.fit(X_train_scaled, train_labels.ravel())
#print(pca.explained_variance_ratio_) 
X_train_reduced = combined_features.transform(X_train_scaled)
X_test_reduced = combined_features.transform(X_test_scaled)

## Lasso CV for parameter optimization
t1 = time.time()
alps = np.linspace(.1,.625,15)
model = MultiTaskLassoCV(cv=5, alphas=alps).fit(X_train_reduced, Y_train_raw)
t_lasso_cv = time.time() - t1
print 'time to train', t_lasso_cv

# Display results
m_log_alphas = -np.log10(model.alphas_)

plt.figure()
plt.plot(m_log_alphas, model.mse_path_, ':')
plt.plot(m_log_alphas, model.mse_path_.mean(axis=-1), 'k',
         label='Average across the folds', linewidth=2)
plt.axvline(-np.log10(model.alpha_), linestyle='--', color='k',
            label='alpha: CV estimate')

plt.legend()
示例#27
0
print("Getting data")
path_train = 'data_train.txt'
path_test = 'data_test.txt'

X, Y = get_data_own(path_train)

print(X.shape)
print(Y.shape)

print("Split data for CV")
X_train, X_test , y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)

lasso = MultiTaskLasso(max_iter = max_iter, normalize = True)

print("Init train with multitasklassocv")
lassocv = MultiTaskLassoCV(alphas=None, cv=10, max_iter=max_iter, verbose=True, normalize=True)
lassocv.fit(X_train, y_train)

print("Fit multitasklasso with alpha from cv lasso")
lasso.set_params(alpha=lassocv.alpha_)
lasso.fit(X_train, y_train)

print("get mean square error")
mae = mean_absolute_error(y_test, lasso.predict(X_test))
print("mae: {}".format(mae))
rmsle = mean_squared_log_error(y_test, lasso.predict(X_test))
print("rmsle: {}".format(rmsle))
mape = mean_absolute_percentage_error(y_test, lasso.predict(X_test))
print("mape: {}".format(mape))

示例#28
0
 def __init__(self, train_path, test_path, pred_path):
     super().__init__(train_path, test_path, pred_path)
     self.multiLasso_model = MultiTaskLassoCV(
         alphas=[float(i) * 0.05 for i in range(1, 100)],
         cv=8,
         max_iter=1000000)
示例#29
0
def GetAllModelsForComparison(X_train, Y_train):
    models = {
        'ARDRegression': ARDRegression(),
        'BayesianRidge': BayesianRidge(),
        'ElasticNet': ElasticNet(),
        'ElasticNetCV': ElasticNetCV(),
        'Hinge': Hinge(),
        #'Huber': Huber(),
        'HuberRegressor': HuberRegressor(),
        'Lars': Lars(),
        'LarsCV': LarsCV(),
        'Lasso': Lasso(),
        'LassoCV': LassoCV(),
        'LassoLars': LassoLars(),
        'LassoLarsCV': LassoLarsCV(),
        'LinearRegression': LinearRegression(),
        'Log': Log(),
        'LogisticRegression': LogisticRegression(),
        'LogisticRegressionCV': LogisticRegressionCV(),
        'ModifiedHuber': ModifiedHuber(),
        'MultiTaskElasticNet': MultiTaskElasticNet(),
        'MultiTaskElasticNetCV': MultiTaskElasticNetCV(),
        'MultiTaskLasso': MultiTaskLasso(),
        'MultiTaskLassoCV': MultiTaskLassoCV(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'OrthogonalMatchingPursuitCV': OrthogonalMatchingPursuitCV(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
        'Perceptron': Perceptron(),
        'RANSACRegressor': RANSACRegressor(),
        #'RandomizedLasso': RandomizedLasso(),
        #'RandomizedLogisticRegression': RandomizedLogisticRegression(),
        'Ridge': Ridge(),
        'RidgeCV': RidgeCV(),
        'RidgeClassifier': RidgeClassifier(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        'SquaredLoss': SquaredLoss(),
        'TheilSenRegressor': TheilSenRegressor(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LinearClassifierMixin': LinearClassifierMixin(),
        'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis(),
        'QuadraticDiscriminantAnalysis': QuadraticDiscriminantAnalysis(),
        'StandardScaler': StandardScaler(),
        'TransformerMixin': TransformerMixin(),
        'BaseEstimator': BaseEstimator(),
        'KernelRidge': KernelRidge(),
        'RegressorMixin': RegressorMixin(),
        'LinearSVC': LinearSVC(),
        'LinearSVR': LinearSVR(),
        'NuSVC': NuSVC(),
        'NuSVR': NuSVR(),
        'OneClassSVM': OneClassSVM(),
        'SVC': SVC(),
        'SVR': SVR(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        #'BallTree': BallTree(),
        #'DistanceMetric': DistanceMetric(),
        #'KDTree': KDTree(),
        'KNeighborsClassifier': KNeighborsClassifier(),
        'KNeighborsRegressor': KNeighborsRegressor(),
        'KernelDensity': KernelDensity(),
        #'LSHForest': LSHForest(),
        'LocalOutlierFactor': LocalOutlierFactor(),
        'NearestCentroid': NearestCentroid(),
        'NearestNeighbors': NearestNeighbors(),
        'RadiusNeighborsClassifier': RadiusNeighborsClassifier(),
        'RadiusNeighborsRegressor': RadiusNeighborsRegressor(),
        #'GaussianProcess': GaussianProcess(),
        'GaussianProcessRegressor': GaussianProcessRegressor(),
        'GaussianProcessClassifier': GaussianProcessClassifier(),
        'CCA': CCA(),
        'PLSCanonical': PLSCanonical(),
        'PLSRegression': PLSRegression(),
        'PLSSVD': PLSSVD(),
        #'ABCMeta': ABCMeta(),
        #'BaseDiscreteNB': BaseDiscreteNB(),
        'BaseEstimator': BaseEstimator(),
        #'BaseNB': BaseNB(),
        'BernoulliNB': BernoulliNB(),
        'ClassifierMixin': ClassifierMixin(),
        'GaussianNB': GaussianNB(),
        'LabelBinarizer': LabelBinarizer(),
        'MultinomialNB': MultinomialNB(),
        'DecisionTreeClassifier': DecisionTreeClassifier(),
        'DecisionTreeRegressor': DecisionTreeRegressor(),
        'ExtraTreeClassifier': ExtraTreeClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'BaggingClassifier': BaggingClassifier(),
        'BaggingRegressor': BaggingRegressor(),
        #'BaseEnsemble': BaseEnsemble(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'IsolationForest': IsolationForest(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RandomForestRegressor': RandomForestRegressor(),
        'RandomTreesEmbedding': RandomTreesEmbedding(),
        #'VotingClassifier': VotingClassifier(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LabelBinarizer': LabelBinarizer(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'OneVsOneClassifier': OneVsOneClassifier(),
        #'OneVsRestClassifier': OneVsRestClassifier(),
        #'OutputCodeClassifier': OutputCodeClassifier(),
        'Parallel': Parallel(),
        #'ABCMeta': ABCMeta(),
        'BaseEstimator': BaseEstimator(),
        #'ClassifierChain': ClassifierChain(),
        'ClassifierMixin': ClassifierMixin(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'MultiOutputClassifier': MultiOutputClassifier(),
        #'MultiOutputEstimator': MultiOutputEstimator(),
        #'MultiOutputRegressor': MultiOutputRegressor(),
        'Parallel': Parallel(),
        'RegressorMixin': RegressorMixin(),
        'LabelPropagation': LabelPropagation(),
        'LabelSpreading': LabelSpreading(),
        'BaseEstimator': BaseEstimator(),
        'IsotonicRegression': IsotonicRegression(),
        'RegressorMixin': RegressorMixin(),
        'TransformerMixin': TransformerMixin(),
        'BernoulliRBM': BernoulliRBM(),
        'MLPClassifier': MLPClassifier(),
        'MLPRegressor': MLPRegressor()
    }
    return models
示例#30
0
    SVC(kernel='poly', probability=True, degree=4),
    SVC(kernel='poly', probability=True, degree=5),
    DecisionTreeClassifier(),
    KNeighborsClassifier(),
    GaussianNB(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    QuadraticDiscriminantAnalysis(),
    LinearDiscriminantAnalysis(),
    ElasticNetCV(max_iter=10000),
    LarsCV(),
    LassoCV(max_iter=10000),
    LassoLarsCV(),
    LogisticRegressionCV(scoring=multi_class_log_loss),
    MultiTaskElasticNetCV(),
    MultiTaskLassoCV(),
    OrthogonalMatchingPursuitCV(),
    RidgeClassifierCV()
]
algorithm = 17
if len(sys.argv) > 1:
    algorithm = int(sys.argv[1])

name = names[algorithm]
clf = classifiers[algorithm]
output_file_name = output_file_names[algorithm] + file_identifier

t = time.time()
random_state = np.random.RandomState(0)
print "Fitting classifier " + name
classifier = OneVsRestClassifier(clf, n_jobs=2)
# print('## Logistic Regression Results ##')
# logreg = LogisticRegression(penalty='l2')
# logreg.fit(X_train, y_train)
# y_pred_logreg = logreg.predict(X_test)
# print('R2:    ', r2_score(y_test, y_pred_logreg))
# print('MAE:   ', metrics.mean_absolute_error(y_test, y_pred_logreg))
# print('MSE:   ', metrics.mean_squared_error(y_test, y_pred_logreg))
# print('RMSE:  ', np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_logreg))))
# print('variance score:', explained_variance_score(y_test, y_pred_logreg, multioutput='uniform_average'))

# -----------------------------------------------------------------------------
# Method 3: MultiTaskLassoCV regression with 10-fold CV
# -----------------------------------------------------------------------------
print(' ')
print('## 2. Lasso Regression Results ##')
lasso = MultiTaskLassoCV(cv=10, eps=0.01, max_iter=1000)
t = time.time()
lasso.fit(X_train, y_train)
t_lasso = time.time() - t
y_pred_lasso = lasso.predict(X_test)
print('R2:    ', r2_score(y_test, y_pred_lasso))
print('MAE:   ', metrics.mean_absolute_error(y_test, y_pred_lasso))
print('MSE:   ', metrics.mean_squared_error(y_test, y_pred_lasso))
print('RMSE:  ',
      np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_lasso))))
print(
    'variance score: ',
    explained_variance_score(y_test,
                             y_pred_lasso,
                             multioutput='uniform_average'))
print('training time:   ', t_lasso)
    Xl_tr[:,k] = (Xl_tr[:,k]-mea_l[k])/sig_l[k] 
    
mea_h = np.zeros(Dh)
sig_h = np.zeros(Dh)
for k in range(Dh):
    mea_h[k] = np.mean(Xh_tr[:,k])
    sig_h[k] = np.std(Xh_tr[:,k])
    Xh_tr[:,k] = (Xh_tr[:,k]-mea_h[k])/sig_h[k]     


############## LassoCV ########################################################
from sklearn.linear_model import MultiTaskLassoCV
n_alphas = 5
alphas = np.logspace(-10, 0, n_alphas)

lasso = MultiTaskLassoCV(alphas = alphas, cv = 5, fit_intercept=False, normalize=False,n_jobs=3)
lasso.fit(Xl_tr, Xh_tr)

Lasso_lambda_opt = lasso.alpha_

print('\n Optimal lambda:', Lasso_lambda_opt)
############ Validation curve #################################################
"""
# validation curve
from sklearn.linear_model import Lasso
from sklearn.learning_curve import validation_curve

lambdas_range= np.append(0, np.logspace(0, 6, 28))
train_MSE, test_MSE = validation_curve(Lasso(),Xl_tr, Xh_tr, param_name="alpha", param_range=lambdas_range, 
                                             scoring = "mean_squared_error", cv=10)